21 "datasource": "-- Grafana --",
24 "iconColor": "rgba(0, 211, 255, 1)",
25 "name": "Annotations & Alerts",
36 "hideControls": false,
52 "repeatIteration": null,
55 "title": "RGW Overview - All Gateways",
64 "datasource": "$datasource",
85 "alignAsTable": false,
99 "nullPointMode": "null as zero",
105 "seriesOverrides": [ ],
108 "steppedLine": false,
111 "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
112 "format": "time_series",
114 "legendFormat": "GET {{rgw_host}}",
118 "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
119 "format": "time_series",
121 "legendFormat": "PUT {{rgw_host}}",
128 "title": "Average GET/PUT Latencies by RGW Instance",
132 "value_type": "individual"
134 "type": "timeseries",
166 "datasource": "$datasource",
172 "showPoints": "never"
187 "alignAsTable": false,
201 "nullPointMode": "null as zero",
207 "seriesOverrides": [ ],
210 "steppedLine": false,
213 "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
214 "format": "time_series",
216 "legendFormat": "{{rgw_host}}",
223 "title": "Total Requests/sec by RGW Instance",
227 "value_type": "individual"
229 "type": "timeseries",
261 "datasource": "$datasource",
262 "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts",
267 "showPoints": "never"
282 "alignAsTable": false,
296 "nullPointMode": "null as zero",
302 "seriesOverrides": [ ],
305 "steppedLine": false,
308 "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
309 "format": "time_series",
311 "legendFormat": "{{rgw_host}}",
318 "title": "GET Latencies by RGW Instance",
322 "value_type": "individual"
324 "type": "timeseries",
356 "datasource": "$datasource",
357 "description": "Total bytes transferred in/out of all radosgw instances within the cluster",
362 "showPoints": "never"
377 "alignAsTable": false,
391 "nullPointMode": "null as zero",
397 "seriesOverrides": [ ],
400 "steppedLine": false,
403 "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]))",
404 "format": "time_series",
406 "legendFormat": "GETs",
410 "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]))",
411 "format": "time_series",
413 "legendFormat": "PUTs",
420 "title": "Bandwidth Consumed by Type",
424 "value_type": "individual"
426 "type": "timeseries",
458 "datasource": "$datasource",
459 "description": "Total bytes transferred in/out through get/put operations, by radosgw instance",
464 "showPoints": "never"
479 "alignAsTable": false,
493 "nullPointMode": "null as zero",
499 "seriesOverrides": [ ],
502 "steppedLine": false,
505 "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
506 "format": "time_series",
508 "legendFormat": "{{rgw_host}}",
515 "title": "Bandwidth by RGW Instance",
519 "value_type": "individual"
521 "type": "timeseries",
553 "datasource": "$datasource",
554 "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts",
559 "showPoints": "never"
574 "alignAsTable": false,
588 "nullPointMode": "null as zero",
594 "seriesOverrides": [ ],
597 "steppedLine": false,
600 "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
601 "format": "time_series",
603 "legendFormat": "{{rgw_host}}",
610 "title": "PUT Latencies by RGW Instance",
614 "value_type": "individual"
616 "type": "timeseries",
655 "repeatIteration": null,
658 "title": "RGW Overview - HAProxy Metrics",
667 "datasource": "$datasource",
673 "showPoints": "never"
688 "alignAsTable": true,
702 "nullPointMode": "null as zero",
711 "alias": "/.*Back.*/",
712 "transform": "negative-Y"
730 "alias": "/.*other.*/"
736 "steppedLine": false,
739 "expr": "sum(\n rate(\n haproxy_frontend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"frontend\"}[$__rate_interval]\n )\n) by (code)\n",
740 "format": "time_series",
742 "legendFormat": "Frontend {{ code }}",
746 "expr": "sum(\n rate(\n haproxy_backend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"backend\"}[$__rate_interval]\n )\n) by (code)\n",
747 "format": "time_series",
749 "legendFormat": "Backend {{ code }}",
756 "title": "Total responses by HTTP code",
760 "value_type": "individual"
762 "type": "timeseries",
794 "datasource": "$datasource",
800 "showPoints": "never"
815 "alignAsTable": true,
829 "nullPointMode": "null as zero",
838 "alias": "/.*Response.*/",
839 "transform": "negative-Y"
842 "alias": "/.*Backend.*/",
843 "transform": "negative-Y"
849 "steppedLine": false,
852 "expr": "sum(\n rate(\n haproxy_frontend_http_requests_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
853 "format": "time_series",
855 "legendFormat": "Requests",
859 "expr": "sum(\n rate(\n haproxy_backend_response_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
860 "format": "time_series",
862 "legendFormat": "Response errors",
866 "expr": "sum(\n rate(\n haproxy_frontend_request_errors_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
867 "format": "time_series",
869 "legendFormat": "Requests errors",
873 "expr": "sum(\n rate(\n haproxy_backend_redispatch_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
874 "format": "time_series",
876 "legendFormat": "Backend redispatch",
880 "expr": "sum(\n rate(\n haproxy_backend_retry_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
881 "format": "time_series",
883 "legendFormat": "Backend retry",
887 "expr": "sum(\n rate(\n haproxy_frontend_requests_denied_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
888 "format": "time_series",
890 "legendFormat": "Request denied",
894 "expr": "sum(\n haproxy_backend_current_queue{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}\n) by (instance)\n",
895 "format": "time_series",
897 "legendFormat": "Backend Queued",
904 "title": "Total requests / responses",
908 "value_type": "individual"
910 "type": "timeseries",
942 "datasource": "$datasource",
948 "showPoints": "never"
963 "alignAsTable": true,
977 "nullPointMode": "null as zero",
986 "alias": "/.*Back.*/",
987 "transform": "negative-Y"
993 "steppedLine": false,
996 "expr": "sum(\n rate(\n haproxy_frontend_connections_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
997 "format": "time_series",
999 "legendFormat": "Front",
1003 "expr": "sum(\n rate(\n haproxy_backend_connection_attempts_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
1004 "format": "time_series",
1005 "intervalFactor": 1,
1006 "legendFormat": "Back",
1010 "expr": "sum(\n rate(\n haproxy_backend_connection_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
1011 "format": "time_series",
1012 "intervalFactor": 1,
1013 "legendFormat": "Back errors",
1020 "title": "Total number of connections",
1024 "value_type": "individual"
1026 "type": "timeseries",
1058 "datasource": "$datasource",
1064 "showPoints": "never"
1079 "alignAsTable": true,
1093 "nullPointMode": "null as zero",
1094 "percentage": false,
1099 "seriesOverrides": [
1102 "alias": "/.*OUT.*/",
1103 "transform": "negative-Y"
1109 "steppedLine": false,
1112 "expr": "sum(\n rate(\n haproxy_frontend_bytes_in_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
1113 "format": "time_series",
1114 "intervalFactor": 1,
1115 "legendFormat": "IN Front",
1119 "expr": "sum(\n rate(\n haproxy_frontend_bytes_out_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
1120 "format": "time_series",
1121 "intervalFactor": 2,
1122 "legendFormat": "OUT Front",
1126 "expr": "sum(\n rate(\n haproxy_backend_bytes_in_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
1127 "format": "time_series",
1128 "intervalFactor": 2,
1129 "legendFormat": "IN Back",
1133 "expr": "sum(\n rate(\n haproxy_backend_bytes_out_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
1134 "format": "time_series",
1135 "intervalFactor": 2,
1136 "legendFormat": "OUT Back",
1143 "title": "Current total of incoming / outgoing bytes",
1147 "value_type": "individual"
1149 "type": "timeseries",
1179 "schemaVersion": 16,
1193 "label": "Data Source",
1194 "name": "datasource",
1196 "query": "prometheus",
1199 "type": "datasource"
1204 "datasource": "$datasource",
1211 "query": "label_values(ceph_osd_metadata, cluster)",
1215 "tagValuesQuery": "",
1224 "datasource": "$datasource",
1231 "query": "label_values(ceph_osd_metadata{}, job)",
1235 "tagValuesQuery": "",
1244 "datasource": "$datasource",
1249 "name": "rgw_servers",
1251 "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
1253 "regex": "RGW Server",
1255 "tagValuesQuery": "",
1264 "datasource": "$datasource",
1267 "label": "HTTP Code",
1271 "query": "label_values(haproxy_server_http_responses_total{job=~\"$job_haproxy\", instance=~\"$ingress_service\"}, code)",
1275 "tagValuesQuery": "",
1284 "datasource": "$datasource",
1287 "label": "job haproxy",
1289 "name": "job_haproxy",
1291 "query": "label_values(haproxy_server_status, job)",
1295 "tagValuesQuery": "",
1304 "datasource": "$datasource",
1307 "label": "Ingress Service",
1309 "name": "ingress_service",
1311 "query": "label_values(haproxy_server_status{job=~\"$job_haproxy\"}, instance)",
1315 "tagValuesQuery": "",
1328 "refresh_intervals": [
1353 "title": "RGW Overview",