]> git.proxmox.com Git - ceph.git/blob - ceph/monitoring/snmp/CEPH-MIB.txt
import quincy beta 17.1.0
[ceph.git] / ceph / monitoring / snmp / CEPH-MIB.txt
1 CEPH-MIB DEFINITIONS ::= BEGIN
2
3 IMPORTS
4 MODULE-IDENTITY, NOTIFICATION-TYPE, enterprises
5 FROM SNMPv2-SMI
6 MODULE-COMPLIANCE, NOTIFICATION-GROUP
7 FROM SNMPv2-CONF
8 ;
9
10 -- Linting information:
11 --
12 -- # smilint -l 6 -i notification-not-reversible ./CEPH-MIB.txt
13 --
14 -- ignore: notification-not-reversible since our SNMP gateway doesn't use SNMPv1
15 --
16
17 ceph MODULE-IDENTITY
18 LAST-UPDATED
19 "202111010000Z" -- Nov 01, 2021
20 ORGANIZATION
21 "The Ceph Project
22 https://ceph.io"
23 CONTACT-INFO
24 "Email: <dev@ceph.io>
25
26 Send comments to: <dev@ceph.io>"
27 DESCRIPTION
28 "The MIB module for Ceph. In it's current form it only
29 supports Notifications, since Ceph itself doesn't provide
30 any SNMP agent functionality.
31
32 Notifications are provided through a Prometheus/Alertmanager
33 webhook passing alerts to an external gateway service that is
34 responsible for formatting, forwarding and authenticating to
35 the SNMP receiver.
36 "
37 REVISION
38 "202111010000Z" --Nov 01, 2021
39 DESCRIPTION
40 "Latest version including the following updates;
41
42 - MIB restructure to align with linting
43 - names shortened and simplified (less verbose)
44 - Simplified structure due to switch to https://github.com/maxwo/snmp_notifier
45 - objects removed
46 - notifications updated
47 - Added module compliance
48 - Updated to latest prometheus alert rule definitions
49 "
50 ::= { enterprises 50495 }
51
52 cephCluster OBJECT IDENTIFIER ::= { ceph 1 }
53 cephConformance OBJECT IDENTIFIER ::= { ceph 2 }
54
55 -- cephMetadata is a placeholder for possible future expansion via an agent
56 -- where we could provide an overview of the clusters configuration
57 cephMetadata OBJECT IDENTIFIER ::= { cephCluster 1 }
58 cephNotifications OBJECT IDENTIFIER ::= { cephCluster 2 }
59
60 prometheus OBJECT IDENTIFIER ::= { cephNotifications 1 }
61
62 --
63 -- Notifications: first we define the notification 'branches' for the
64 -- different categories of notifications / alerts
65 promGeneric OBJECT IDENTIFIER ::= { prometheus 1 }
66 promHealthStatus OBJECT IDENTIFIER ::= { prometheus 2 }
67 promMon OBJECT IDENTIFIER ::= { prometheus 3 }
68 promOsd OBJECT IDENTIFIER ::= { prometheus 4 }
69 promMds OBJECT IDENTIFIER ::= { prometheus 5 }
70 promMgr OBJECT IDENTIFIER ::= { prometheus 6 }
71 promPGs OBJECT IDENTIFIER ::= { prometheus 7 }
72 promNode OBJECT IDENTIFIER ::= { prometheus 8 }
73 promPool OBJECT IDENTIFIER ::= { prometheus 9 }
74 promRados OBJECT IDENTIFIER ::= { prometheus 10 }
75 promCephadm OBJECT IDENTIFIER ::= { prometheus 11 }
76 promPrometheus OBJECT IDENTIFIER ::= { prometheus 12 }
77
78 promGenericNotification NOTIFICATION-TYPE
79 STATUS current
80 DESCRIPTION "Generic alert issued when the Prometheus rule doesn't provide an OID."
81 ::= { promGeneric 1 }
82
83 promGenericDaemonCrash NOTIFICATION-TYPE
84 STATUS current
85 DESCRIPTION "One or more daemons have crashed recently, and are yet to be archived"
86 ::= { promGeneric 2 }
87
88 promHealthStatusError NOTIFICATION-TYPE
89 STATUS current
90 DESCRIPTION "Ceph in health_error state for too long."
91 ::= { promHealthStatus 1 }
92
93 promHealthStatusWarning NOTIFICATION-TYPE
94 STATUS current
95 DESCRIPTION "Ceph in health_warn for too long."
96 ::= { promHealthStatus 2 }
97
98 promMonLowQuorum NOTIFICATION-TYPE
99 STATUS current
100 DESCRIPTION "Monitor count in quorum is low."
101 ::= { promMon 1 }
102
103 promMonDiskSpaceCritical NOTIFICATION-TYPE
104 STATUS current
105 DESCRIPTION "Monitor diskspace is critically low."
106 ::= { promMon 2 }
107
108 promOsdDownHigh NOTIFICATION-TYPE
109 STATUS current
110 DESCRIPTION "A high number of OSDs are down."
111 ::= { promOsd 1 }
112
113 promOsdDown NOTIFICATION-TYPE
114 STATUS current
115 DESCRIPTION "One or more Osds down."
116 ::= { promOsd 2 }
117
118 promOsdNearFull NOTIFICATION-TYPE
119 STATUS current
120 DESCRIPTION "An OSD is dangerously full."
121 ::= { promOsd 3 }
122
123 promOsdFlapping NOTIFICATION-TYPE
124 STATUS current
125 DESCRIPTION "An OSD was marked down at back up at least once a minute for 5 minutes."
126 ::= { promOsd 4 }
127
128 promOsdHighPgDeviation NOTIFICATION-TYPE
129 STATUS current
130 DESCRIPTION "An OSD deviates by more then 30% from average PG count."
131 ::= { promOsd 5 }
132
133 promOsdFull NOTIFICATION-TYPE
134 STATUS current
135 DESCRIPTION "An OSD has reached its full threshold."
136 ::= { promOsd 6 }
137
138 promOsdHighPredictedFailures NOTIFICATION-TYPE
139 STATUS current
140 DESCRIPTION "Normal self healing unable to cope with the number of devices predicted to fail."
141 ::= { promOsd 7 }
142
143 promOsdHostDown NOTIFICATION-TYPE
144 STATUS current
145 DESCRIPTION "Ceph OSD host is down."
146 ::= { promOsd 8 }
147
148 promMdsDamaged NOTIFICATION-TYPE
149 STATUS current
150 DESCRIPTION "Cephfs filesystem is damaged."
151 ::= { promMds 1 }
152
153 promMdsReadOnly NOTIFICATION-TYPE
154 STATUS current
155 DESCRIPTION "Cephfs filesystem marked as READ-ONLY"
156 ::= { promMds 2 }
157
158 promMdsOffline NOTIFICATION-TYPE
159 STATUS current
160 DESCRIPTION "Cephfs filesystem is unavailable/offline."
161 ::= { promMds 3 }
162
163 promMdsDegraded NOTIFICATION-TYPE
164 STATUS current
165 DESCRIPTION "Cephfs filesystem is in a degraded state."
166 ::= { promMds 4 }
167
168 promMdsNoStandby NOTIFICATION-TYPE
169 STATUS current
170 DESCRIPTION "Cephfs MDS daemon failure, no standby available"
171 ::= { promMds 5 }
172
173 promMgrModuleCrash NOTIFICATION-TYPE
174 STATUS current
175 DESCRIPTION "Ceph mgr module has crashed recently"
176 ::= { promMgr 1 }
177
178 promMgrPrometheusInactive NOTIFICATION-TYPE
179 STATUS current
180 DESCRIPTION "Ceph mgr prometheus module not responding"
181 ::= { promMgr 2 }
182
183 promPGsInactive NOTIFICATION-TYPE
184 STATUS current
185 DESCRIPTION "One or more PGs are inactive for more than 5 minutes."
186 ::= { promPGs 1 }
187
188 promPGsUnclean NOTIFICATION-TYPE
189 STATUS current
190 DESCRIPTION "One or more PGs are not clean for more than 15 minutes."
191 ::= { promPGs 2 }
192
193 promPGsUnavailable NOTIFICATION-TYPE
194 STATUS current
195 DESCRIPTION "One or more PGs is unavailable, blocking I/O to those objects."
196 ::= { promPGs 3 }
197
198 promPGsDamaged NOTIFICATION-TYPE
199 STATUS current
200 DESCRIPTION "One or more PGs is damaged."
201 ::= { promPGs 4 }
202
203 promPGsRecoveryFull NOTIFICATION-TYPE
204 STATUS current
205 DESCRIPTION "PG recovery is impaired due to full OSDs."
206 ::= { promPGs 5 }
207
208 promPGsBackfillFull NOTIFICATION-TYPE
209 STATUS current
210 DESCRIPTION "PG backfill is impaired due to full OSDs."
211 ::= { promPGs 6 }
212
213 promNodeRootVolumeFull NOTIFICATION-TYPE
214 STATUS current
215 DESCRIPTION "Root volume (OSD and MON store) is dangerously full (< 5% free)."
216 ::= { promNode 1 }
217
218 promNodeNetworkPacketDrops NOTIFICATION-TYPE
219 STATUS current
220 DESCRIPTION "A node experiences packet drop > 1 packet/s on an interface."
221 ::= { promNode 2 }
222
223 promNodeNetworkPacketErrors NOTIFICATION-TYPE
224 STATUS current
225 DESCRIPTION "A node experiences packet errors > 1 packet/s on an interface."
226 ::= { promNode 3 }
227
228 promNodeStorageFilling NOTIFICATION-TYPE
229 STATUS current
230 DESCRIPTION "A mountpoint will be full in less then 5 days assuming the average fillup rate of the past 48 hours."
231 ::= { promNode 4 }
232
233 promPoolFull NOTIFICATION-TYPE
234 STATUS current
235 DESCRIPTION "A pool is at 90% capacity or over."
236 ::= { promPool 1 }
237
238 promPoolFilling NOTIFICATION-TYPE
239 STATUS current
240 DESCRIPTION "A pool will be full in less then 5 days assuming the average fillup rate of the past 48 hours."
241 ::= { promPool 2 }
242
243 promRadosUnfound NOTIFICATION-TYPE
244 STATUS current
245 DESCRIPTION "A RADOS object can not be found, even though all OSDs are online."
246 ::= { promRados 1 }
247
248 promCephadmDaemonDown NOTIFICATION-TYPE
249 STATUS current
250 DESCRIPTION "Cephadm has determined that a daemon is down."
251 ::= { promCephadm 1 }
252
253 promCephadmUpgradeFailure NOTIFICATION-TYPE
254 STATUS current
255 DESCRIPTION "Cephadm attempted to upgrade the cluster and encountered a problem."
256 ::= { promCephadm 2 }
257
258 promPrometheusJobMissing NOTIFICATION-TYPE
259 STATUS current
260 DESCRIPTION "The prometheus scrape job is not defined."
261 ::= { promPrometheus 1 }
262 -- ---------------------------------------------------------- --
263 -- IEEE 802.1D MIB - Conformance Information
264 -- ---------------------------------------------------------- --
265
266 cephAlertGroups OBJECT IDENTIFIER ::= { cephConformance 1 }
267 cephCompliances OBJECT IDENTIFIER ::= { cephConformance 2 }
268
269 -- ---------------------------------------------------------- --
270 -- units of conformance
271 -- ---------------------------------------------------------- --
272
273 -- ---------------------------------------------------------- --
274 -- The Trap Notification Group
275 -- ---------------------------------------------------------- --
276
277 cephNotificationGroup NOTIFICATION-GROUP
278 NOTIFICATIONS {
279 promGenericNotification,
280 promGenericDaemonCrash,
281 promHealthStatusError,
282 promHealthStatusWarning,
283 promMonLowQuorum,
284 promMonDiskSpaceCritical,
285 promOsdDownHigh,
286 promOsdDown,
287 promOsdNearFull,
288 promOsdFlapping,
289 promOsdHighPgDeviation,
290 promOsdFull,
291 promOsdHighPredictedFailures,
292 promOsdHostDown,
293 promMdsDamaged,
294 promMdsReadOnly,
295 promMdsOffline,
296 promMdsDegraded,
297 promMdsNoStandby,
298 promMgrModuleCrash,
299 promMgrPrometheusInactive,
300 promPGsInactive,
301 promPGsUnclean,
302 promPGsUnavailable,
303 promPGsDamaged,
304 promPGsRecoveryFull,
305 promPGsBackfillFull,
306 promNodeRootVolumeFull,
307 promNodeNetworkPacketDrops,
308 promNodeNetworkPacketErrors,
309 promNodeStorageFilling,
310 promPoolFull,
311 promPoolFilling,
312 promRadosUnfound,
313 promCephadmDaemonDown,
314 promCephadmUpgradeFailure,
315 promPrometheusJobMissing
316 }
317 STATUS current
318 DESCRIPTION
319 "A collection of notifications triggered by the Prometheus
320 rules to convey Ceph cluster state"
321 ::= { cephAlertGroups 2 }
322
323 -- ---------------------------------------------------------- --
324 -- compliance statements
325 -- ---------------------------------------------------------- --
326
327 cephCompliance MODULE-COMPLIANCE
328 STATUS current
329 DESCRIPTION
330 "The Compliance statement for the Ceph MIB"
331 MODULE
332 MANDATORY-GROUPS {
333 cephNotificationGroup
334 }
335 ::= { cephCompliances 1 }
336
337 END