]> git.proxmox.com Git - ceph.git/blobdiff - ceph/monitoring/ceph-mixin/prometheus_alerts.yml
import ceph quincy 17.2.6
[ceph.git] / ceph / monitoring / ceph-mixin / prometheus_alerts.yml
index a544d41eb0ee0ac15806a52c93ea5c36856c20cf..33a5c5059b89cebd56ef6cf0558816cb70f722b6 100644 (file)
@@ -518,7 +518,7 @@ groups:
         annotations:
           description: "Pool '{{ $labels.name }}' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours."
           summary: "Pool growth rate may soon exceed capacity"
-        expr: "(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id)    group_right ceph_pool_metadata) >= 95"
+        expr: "(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id, instance) group_right() ceph_pool_metadata) >= 95"
         labels:
           oid: "1.3.6.1.4.1.50495.1.2.1.9.2"
           severity: "warning"
@@ -563,6 +563,16 @@ groups:
         labels:
           severity: "warning"
           type: "ceph_default"
+      - alert: "CephDaemonSlowOps"
+        for: "30s"
+        expr: "ceph_daemon_health_metrics{type=\"SLOW_OPS\"} > 0"
+        labels: 
+          severity: 'warning'
+          type: 'ceph_default'
+        annotations:
+          summary: "{{ $labels.ceph_daemon }} operations are slow to complete"
+          description: "{{ $labels.ceph_daemon }} operations are taking too long to process (complaint time exceeded)"
+          documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#slow-ops"
   - name: "cephadm"
     rules:
       - alert: "CephadmUpgradeFailed"