X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fmonitoring%2Fgrafana%2Fdashboards%2Fhosts-overview.json;h=804aa51cc9df3defb9a3fe967f38e832ab9f32de;hb=f6b5b4d738b87d88d2de35127b6b0e41eae2a272;hp=dbb7bdda74a79e02d6f0e1b6ab48f6bd727441c0;hpb=81eedcae9c6d684efdae63dc481d8be7c714897d;p=ceph.git diff --git a/ceph/monitoring/grafana/dashboards/hosts-overview.json b/ceph/monitoring/grafana/dashboards/hosts-overview.json index dbb7bdda7..804aa51cc 100644 --- a/ceph/monitoring/grafana/dashboards/hosts-overview.json +++ b/ceph/monitoring/grafana/dashboards/hosts-overview.json @@ -133,6 +133,7 @@ "datasource": "$datasource", "decimals": 0, "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster", + "decimals": 2, "format": "percentunit", "gauge": { "maxValue": 100, @@ -216,6 +217,7 @@ "datasource": "$datasource", "decimals": 0, "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)", + "decimals": 2, "format": "percentunit", "gauge": { "maxValue": 100, @@ -267,7 +269,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"[[osd_hosts]]|[[rgw_hosts]]|[[mon_hosts]]|[[mds_hosts]].*\"} ))", + "expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -431,7 +433,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg (\n ((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100)\n ) *\n on(instance, device) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")\n)", + "expr" : "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -572,7 +574,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10,( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)", + "expr": "topk(10,100 * ( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", @@ -581,11 +583,12 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "CPU Busy - Top 10 Hosts", "tooltip": { "shared": true, - "sort": 1, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -667,11 +670,12 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Network Load - Top 10", + "title": "Network Load - Top 10 Hosts", "tooltip": { "shared": true, - "sort": 1, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -738,7 +742,7 @@ "multi": false, "name": "osd_hosts", "options": [], - "query": "label_values(ceph_disk_occupation, instance)", + "query": "label_values(ceph_disk_occupation, exported_instance)", "refresh": 1, "regex": "([^.]*).*", "skipUrlSync": false,