27 "datasource": "-- Grafana --",
30 "iconColor": "rgba(0, 211, 255, 1)",
31 "name": "Annotations & Alerts",
40 "iteration": 1615564911000,
51 "title": "$ceph_hosts System Overview",
56 "colorBackground": false,
59 "rgba(245, 54, 54, 0.9)",
60 "rgba(237, 129, 40, 0.89)",
61 "rgba(50, 172, 45, 0.97)"
63 "datasource": "$datasource",
69 "thresholdLabels": false,
70 "thresholdMarkers": true
85 "name": "value to text",
89 "name": "range to text",
95 "nullPointMode": "connected",
98 "postfixFontSize": "50%",
100 "prefixFontSize": "50%",
109 "fillColor": "rgba(31, 118, 189, 0.18)",
111 "lineColor": "rgb(31, 120, 193)",
117 "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))",
118 "format": "time_series",
127 "type": "singlestat",
128 "valueFontSize": "80%",
136 "valueName": "current"
140 "interrupt": "#447EBC",
149 "datasource": "$datasource",
150 "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown",
160 "alignAsTable": false,
173 "nullPointMode": "null",
179 "seriesOverrides": [],
182 "steppedLine": false,
185 "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100",
186 "format": "time_series",
188 "legendFormat": "{{mode}}",
197 "title": "CPU Utilization",
201 "value_type": "individual"
214 "label": "% Utilization",
236 "Available": "#508642",
246 "datasource": "$datasource",
267 "nullPointMode": "null",
283 "steppedLine": false,
286 "expr": "(node_memory_MemTotal{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"})\n )\n \n",
287 "format": "time_series",
289 "legendFormat": "used",
293 "expr": "node_memory_MemFree{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} ",
294 "format": "time_series",
297 "legendFormat": "Free",
301 "expr": "(node_memory_Cached{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) \n",
302 "format": "time_series",
305 "legendFormat": "buffers/cache",
309 "expr": "node_memory_MemTotal{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} ",
310 "format": "time_series",
313 "legendFormat": "total",
320 "title": "RAM Usage",
324 "value_type": "individual"
362 "datasource": "$datasource",
363 "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')",
373 "alignAsTable": false,
388 "nullPointMode": "null",
396 "transform": "negative-Y"
401 "steppedLine": false,
404 "expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
405 "format": "time_series",
407 "legendFormat": "{{device}}.rx",
413 "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
414 "format": "time_series",
416 "legendFormat": "{{device}}.tx",
424 "title": "Network Load",
428 "value_type": "individual"
440 "format": "decbytes",
441 "label": "Send (-) / Receive (+)",
466 "datasource": "$datasource",
474 "hideTimeOverride": true,
488 "nullPointMode": "null",
496 "transform": "negative-Y"
501 "steppedLine": false,
504 "expr": "irate(node_network_receive_drop{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
505 "format": "time_series",
508 "legendFormat": "{{device}}.rx",
512 "expr": "irate(node_network_transmit_drop{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
513 "format": "time_series",
515 "legendFormat": "{{device}}.tx",
522 "title": "Network drop rate",
526 "value_type": "individual"
539 "label": "Send (-) / Receive (+)",
560 "cacheTimeout": null,
561 "colorBackground": false,
564 "rgba(245, 54, 54, 0.9)",
565 "rgba(237, 129, 40, 0.89)",
566 "rgba(50, 172, 45, 0.97)"
568 "datasource": "$datasource",
570 "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
576 "thresholdLabels": false,
577 "thresholdMarkers": true
592 "name": "value to text",
596 "name": "range to text",
602 "nullPointMode": "connected",
605 "postfixFontSize": "50%",
607 "prefixFontSize": "50%",
616 "fillColor": "rgba(31, 118, 189, 0.18)",
618 "lineColor": "rgb(31, 120, 193)",
624 "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})",
625 "format": "time_series",
633 "title": "Raw Capacity",
634 "type": "singlestat",
635 "valueFontSize": "80%",
643 "valueName": "current"
650 "datasource": "$datasource",
658 "hideTimeOverride": true,
672 "nullPointMode": "null",
680 "transform": "negative-Y"
685 "steppedLine": false,
688 "expr": "irate(node_network_receive_errs{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
689 "format": "time_series",
692 "legendFormat": "{{device}}.rx",
696 "expr": "irate(node_network_transmit_errs{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
697 "format": "time_series",
699 "legendFormat": "{{device}}.tx",
706 "title": "Network error rate",
710 "value_type": "individual"
723 "label": "Send (-) / Receive (+)",
754 "title": "OSD Disk Performance Statistics",
762 "datasource": "$datasource",
763 "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value",
785 "nullPointMode": "connected",
792 "alias": "/.*reads/",
793 "transform": "negative-Y"
798 "steppedLine": false,
801 "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
802 "format": "time_series",
804 "legendFormat": "{{device}}({{ceph_daemon}}) writes",
810 "expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
811 "format": "time_series",
814 "legendFormat": "{{device}}({{ceph_daemon}}) reads",
821 "title": "$ceph_hosts Disk IOPS",
825 "value_type": "individual"
838 "label": "Read (-) / Write (+)",
863 "datasource": "$datasource",
864 "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id",
886 "nullPointMode": "connected",
894 "transform": "negative-Y"
899 "steppedLine": false,
902 "expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
903 "format": "time_series",
905 "legendFormat": "{{device}}({{ceph_daemon}}) write",
909 "expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
910 "format": "time_series",
912 "legendFormat": "{{device}}({{ceph_daemon}}) read",
919 "title": "$ceph_hosts Throughput by Disk",
923 "value_type": "individual"
936 "label": "Read (-) / Write (+)",
961 "datasource": "$datasource",
962 "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id",
984 "nullPointMode": "null as zero",
989 "seriesOverrides": [],
992 "steppedLine": false,
995 "expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
996 "format": "time_series",
999 "legendFormat": "{{device}}({{ceph_daemon}})",
1006 "title": "$ceph_hosts Disk Latency",
1010 "value_type": "individual"
1048 "datasource": "$datasource",
1049 "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.",
1072 "nullPointMode": "connected",
1076 "percentage": false,
1080 "seriesOverrides": [],
1083 "steppedLine": false,
1086 "expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
1087 "format": "time_series",
1089 "intervalFactor": 1,
1090 "legendFormat": "{{device}}({{ceph_daemon}})",
1098 "title": "$ceph_hosts Disk utilization",
1102 "value_type": "individual"
1114 "format": "percent",
1137 "schemaVersion": 16,
1150 "label": "Data Source",
1151 "name": "datasource",
1153 "query": "prometheus",
1156 "skipUrlSync": false,
1157 "type": "datasource"
1162 "datasource": "$datasource",
1164 "includeAll": false,
1165 "label": "Hostname",
1167 "name": "ceph_hosts",
1169 "query": "label_values(node_scrape_collector_success, instance) ",
1171 "regex": "([^.:]*).*",
1172 "skipUrlSync": false,
1174 "tagValuesQuery": "",
1187 "refresh_intervals": [
1212 "title": "Host Details",