21 "datasource": "-- Grafana --",
24 "iconColor": "rgba(0, 211, 255, 1)",
25 "name": "Annotations & Alerts",
36 "hideControls": false,
52 "repeatIteration": null,
55 "title": "RGW Overview - All Gateways",
64 "datasource": "$datasource",
76 "alignAsTable": false,
90 "nullPointMode": "null",
96 "seriesOverrides": [ ],
102 "expr": "rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"}\n",
103 "format": "time_series",
105 "legendFormat": "GET AVG",
109 "expr": "rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"}\n",
110 "format": "time_series",
112 "legendFormat": "PUT AVG",
119 "title": "Average GET/PUT Latencies",
123 "value_type": "individual"
157 "datasource": "$datasource",
169 "alignAsTable": false,
183 "nullPointMode": "null",
189 "seriesOverrides": [ ],
192 "steppedLine": false,
195 "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
196 "format": "time_series",
198 "legendFormat": "{{rgw_host}}",
205 "title": "Total Requests/sec by RGW Instance",
209 "value_type": "individual"
243 "datasource": "$datasource",
244 "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts",
255 "alignAsTable": false,
269 "nullPointMode": "null",
275 "seriesOverrides": [ ],
278 "steppedLine": false,
281 "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
282 "format": "time_series",
284 "legendFormat": "{{rgw_host}}",
291 "title": "GET Latencies by RGW Instance",
295 "value_type": "individual"
329 "datasource": "$datasource",
330 "description": "Total bytes transferred in/out of all radosgw instances within the cluster",
341 "alignAsTable": false,
355 "nullPointMode": "null",
361 "seriesOverrides": [ ],
364 "steppedLine": false,
367 "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]))",
368 "format": "time_series",
370 "legendFormat": "GETs",
374 "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]))",
375 "format": "time_series",
377 "legendFormat": "PUTs",
384 "title": "Bandwidth Consumed by Type",
388 "value_type": "individual"
422 "datasource": "$datasource",
423 "description": "Total bytes transferred in/out through get/put operations, by radosgw instance",
434 "alignAsTable": false,
448 "nullPointMode": "null",
454 "seriesOverrides": [ ],
457 "steppedLine": false,
460 "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
461 "format": "time_series",
463 "legendFormat": "{{rgw_host}}",
470 "title": "Bandwidth by RGW Instance",
474 "value_type": "individual"
508 "datasource": "$datasource",
509 "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts",
520 "alignAsTable": false,
534 "nullPointMode": "null",
540 "seriesOverrides": [ ],
543 "steppedLine": false,
546 "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
547 "format": "time_series",
549 "legendFormat": "{{rgw_host}}",
556 "title": "PUT Latencies by RGW Instance",
560 "value_type": "individual"
601 "repeatIteration": null,
604 "title": "RGW Overview - HAProxy Metrics",
613 "datasource": "$datasource",
625 "alignAsTable": true,
639 "nullPointMode": "null",
648 "alias": "/.*Back.*/",
649 "transform": "negative-Y"
667 "alias": "/.*other.*/"
673 "steppedLine": false,
676 "expr": "sum(\n rate(\n haproxy_frontend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"frontend\"}[$__rate_interval]\n )\n) by (code)\n",
677 "format": "time_series",
679 "legendFormat": "Frontend {{ code }}",
683 "expr": "sum(\n rate(\n haproxy_backend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"backend\"}[$__rate_interval]\n )\n) by (code)\n",
684 "format": "time_series",
686 "legendFormat": "Backend {{ code }}",
693 "title": "Total responses by HTTP code",
697 "value_type": "individual"
731 "datasource": "$datasource",
743 "alignAsTable": true,
757 "nullPointMode": "null",
766 "alias": "/.*Response.*/",
767 "transform": "negative-Y"
770 "alias": "/.*Backend.*/",
771 "transform": "negative-Y"
777 "steppedLine": false,
780 "expr": "sum(\n rate(\n haproxy_frontend_http_requests_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
781 "format": "time_series",
783 "legendFormat": "Requests",
787 "expr": "sum(\n rate(\n haproxy_backend_response_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
788 "format": "time_series",
790 "legendFormat": "Response errors",
794 "expr": "sum(\n rate(\n haproxy_frontend_request_errors_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
795 "format": "time_series",
797 "legendFormat": "Requests errors",
801 "expr": "sum(\n rate(\n haproxy_backend_redispatch_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
802 "format": "time_series",
804 "legendFormat": "Backend redispatch",
808 "expr": "sum(\n rate(\n haproxy_backend_retry_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
809 "format": "time_series",
811 "legendFormat": "Backend retry",
815 "expr": "sum(\n rate(\n haproxy_frontend_requests_denied_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
816 "format": "time_series",
818 "legendFormat": "Request denied",
822 "expr": "sum(\n haproxy_backend_current_queue{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}\n) by (instance)\n",
823 "format": "time_series",
825 "legendFormat": "Backend Queued",
832 "title": "Total requests / responses",
836 "value_type": "individual"
870 "datasource": "$datasource",
882 "alignAsTable": true,
896 "nullPointMode": "null",
905 "alias": "/.*Back.*/",
906 "transform": "negative-Y"
912 "steppedLine": false,
915 "expr": "sum(\n rate(\n haproxy_frontend_connections_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
916 "format": "time_series",
918 "legendFormat": "Front",
922 "expr": "sum(\n rate(\n haproxy_backend_connection_attempts_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
923 "format": "time_series",
925 "legendFormat": "Back",
929 "expr": "sum(\n rate(\n haproxy_backend_connection_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
930 "format": "time_series",
932 "legendFormat": "Back errors",
939 "title": "Total number of connections",
943 "value_type": "individual"
977 "datasource": "$datasource",
989 "alignAsTable": true,
1003 "nullPointMode": "null",
1004 "percentage": false,
1009 "seriesOverrides": [
1012 "alias": "/.*OUT.*/",
1013 "transform": "negative-Y"
1019 "steppedLine": false,
1022 "expr": "sum(\n rate(\n haproxy_frontend_bytes_in_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
1023 "format": "time_series",
1024 "intervalFactor": 1,
1025 "legendFormat": "IN Front",
1029 "expr": "sum(\n rate(\n haproxy_frontend_bytes_out_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
1030 "format": "time_series",
1031 "intervalFactor": 2,
1032 "legendFormat": "OUT Front",
1036 "expr": "sum(\n rate(\n haproxy_backend_bytes_in_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
1037 "format": "time_series",
1038 "intervalFactor": 2,
1039 "legendFormat": "IN Back",
1043 "expr": "sum(\n rate(\n haproxy_backend_bytes_out_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
1044 "format": "time_series",
1045 "intervalFactor": 2,
1046 "legendFormat": "OUT Back",
1053 "title": "Current total of incoming / outgoing bytes",
1057 "value_type": "individual"
1089 "schemaVersion": 16,
1103 "label": "Data Source",
1104 "name": "datasource",
1106 "query": "prometheus",
1109 "type": "datasource"
1114 "datasource": "$datasource",
1121 "query": "label_values(ceph_osd_metadata, cluster)",
1125 "tagValuesQuery": "",
1134 "datasource": "$datasource",
1141 "query": "label_values(ceph_osd_metadata{}, job)",
1145 "tagValuesQuery": "",
1154 "datasource": "$datasource",
1159 "name": "rgw_servers",
1161 "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
1163 "regex": "RGW Server",
1165 "tagValuesQuery": "",
1174 "datasource": "$datasource",
1177 "label": "HTTP Code",
1181 "query": "label_values(haproxy_server_http_responses_total{job=~\"$job_haproxy\", instance=~\"$ingress_service\"}, code)",
1185 "tagValuesQuery": "",
1194 "datasource": "$datasource",
1197 "label": "job haproxy",
1199 "name": "job_haproxy",
1201 "query": "label_values(haproxy_server_status, job)",
1205 "tagValuesQuery": "",
1214 "datasource": "$datasource",
1217 "label": "Ingress Service",
1219 "name": "ingress_service",
1221 "query": "label_values(haproxy_server_status{job=~\"$job_haproxy\"}, instance)",
1225 "tagValuesQuery": "",
1238 "refresh_intervals": [
1263 "title": "RGW Overview",