4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 * This file contains the functions which analyze the status of a pool. This
30 * include both the status of an active pool, as well as the status exported
31 * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of
32 * the pool. This status is independent (to a certain degree) from the state of
33 * the pool. A pool's state describes only whether or not it is capable of
34 * providing the necessary fault tolerance for data. The status describes the
35 * overall status of devices. A pool that is online can still have a device
36 * that is experiencing errors.
38 * Only a subset of the possible faults can be detected using 'zpool status',
39 * and not all possible errors correspond to a FMA message ID. The explanation
40 * is left up to the caller, depending on whether it is a live pool or an
48 #include <sys/systeminfo.h>
49 #include "libzfs_impl.h"
50 #include "zfeature_common.h"
53 * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines
54 * in libzfs.h. Note that there are some status results which go past the end
55 * of this table, and hence have no associated message ID.
57 static char *zfs_msgid_table
[] = {
77 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
81 vdev_missing(uint64_t state
, uint64_t aux
, uint64_t errs
)
83 return (state
== VDEV_STATE_CANT_OPEN
&&
84 aux
== VDEV_AUX_OPEN_FAILED
);
89 vdev_faulted(uint64_t state
, uint64_t aux
, uint64_t errs
)
91 return (state
== VDEV_STATE_FAULTED
);
96 vdev_errors(uint64_t state
, uint64_t aux
, uint64_t errs
)
98 return (state
== VDEV_STATE_DEGRADED
|| errs
!= 0);
103 vdev_broken(uint64_t state
, uint64_t aux
, uint64_t errs
)
105 return (state
== VDEV_STATE_CANT_OPEN
);
110 vdev_offlined(uint64_t state
, uint64_t aux
, uint64_t errs
)
112 return (state
== VDEV_STATE_OFFLINE
);
117 vdev_removed(uint64_t state
, uint64_t aux
, uint64_t errs
)
119 return (state
== VDEV_STATE_REMOVED
);
123 * Detect if any leaf devices that have seen errors or could not be opened.
126 find_vdev_problem(nvlist_t
*vdev
, int (*func
)(uint64_t, uint64_t, uint64_t))
134 * Ignore problems within a 'replacing' vdev, since we're presumably in
135 * the process of repairing any such errors, and don't want to call them
136 * out again. We'll pick up the fact that a resilver is happening
139 verify(nvlist_lookup_string(vdev
, ZPOOL_CONFIG_TYPE
, &type
) == 0);
140 if (strcmp(type
, VDEV_TYPE_REPLACING
) == 0)
143 if (nvlist_lookup_nvlist_array(vdev
, ZPOOL_CONFIG_CHILDREN
, &child
,
145 for (c
= 0; c
< children
; c
++)
146 if (find_vdev_problem(child
[c
], func
))
149 verify(nvlist_lookup_uint64_array(vdev
, ZPOOL_CONFIG_VDEV_STATS
,
150 (uint64_t **)&vs
, &c
) == 0);
152 if (func(vs
->vs_state
, vs
->vs_aux
,
154 vs
->vs_write_errors
+
155 vs
->vs_checksum_errors
))
160 * Check any L2 cache devs
162 if (nvlist_lookup_nvlist_array(vdev
, ZPOOL_CONFIG_L2CACHE
, &child
,
164 for (c
= 0; c
< children
; c
++)
165 if (find_vdev_problem(child
[c
], func
))
173 * Active pool health status.
175 * To determine the status for a pool, we make several passes over the config,
176 * picking the most egregious error we find. In order of importance, we do the
179 * - Check for a complete and valid configuration
180 * - Look for any faulted or missing devices in a non-replicated config
181 * - Check for any data errors
182 * - Check for any faulted or missing devices in a replicated config
183 * - Look for any devices showing errors
184 * - Check for any resilvering devices
186 * There can obviously be multiple errors within a single pool, so this routine
187 * only picks the most damaging of all the current errors to report.
189 static zpool_status_t
190 check_status(nvlist_t
*config
, boolean_t isimport
, zpool_errata_t
*erratap
)
194 pool_scan_stat_t
*ps
= NULL
;
202 unsigned long system_hostid
= get_system_hostid();
204 verify(nvlist_lookup_uint64(config
, ZPOOL_CONFIG_VERSION
,
206 verify(nvlist_lookup_nvlist(config
, ZPOOL_CONFIG_VDEV_TREE
,
208 verify(nvlist_lookup_uint64_array(nvroot
, ZPOOL_CONFIG_VDEV_STATS
,
209 (uint64_t **)&vs
, &vsc
) == 0);
210 verify(nvlist_lookup_uint64(config
, ZPOOL_CONFIG_POOL_STATE
,
214 * Currently resilvering a vdev
216 (void) nvlist_lookup_uint64_array(nvroot
, ZPOOL_CONFIG_SCAN_STATS
,
217 (uint64_t **)&ps
, &psc
);
218 if (ps
!= NULL
&& ps
->pss_func
== POOL_SCAN_RESILVER
&&
219 ps
->pss_state
== DSS_SCANNING
)
220 return (ZPOOL_STATUS_RESILVERING
);
223 * The multihost property is set and the pool may be active.
225 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
226 vs
->vs_aux
== VDEV_AUX_ACTIVE
) {
227 mmp_state_t mmp_state
;
230 nvinfo
= fnvlist_lookup_nvlist(config
, ZPOOL_CONFIG_LOAD_INFO
);
231 mmp_state
= fnvlist_lookup_uint64(nvinfo
,
232 ZPOOL_CONFIG_MMP_STATE
);
234 if (mmp_state
== MMP_STATE_ACTIVE
)
235 return (ZPOOL_STATUS_HOSTID_ACTIVE
);
236 else if (mmp_state
== MMP_STATE_NO_HOSTID
)
237 return (ZPOOL_STATUS_HOSTID_REQUIRED
);
239 return (ZPOOL_STATUS_HOSTID_MISMATCH
);
243 * Pool last accessed by another system.
245 (void) nvlist_lookup_uint64(config
, ZPOOL_CONFIG_HOSTID
, &hostid
);
246 if (hostid
!= 0 && (unsigned long)hostid
!= system_hostid
&&
247 stateval
== POOL_STATE_ACTIVE
)
248 return (ZPOOL_STATUS_HOSTID_MISMATCH
);
251 * Newer on-disk version.
253 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
254 vs
->vs_aux
== VDEV_AUX_VERSION_NEWER
)
255 return (ZPOOL_STATUS_VERSION_NEWER
);
258 * Unsupported feature(s).
260 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
261 vs
->vs_aux
== VDEV_AUX_UNSUP_FEAT
) {
264 verify(nvlist_lookup_nvlist(config
, ZPOOL_CONFIG_LOAD_INFO
,
266 if (nvlist_exists(nvinfo
, ZPOOL_CONFIG_CAN_RDONLY
))
267 return (ZPOOL_STATUS_UNSUP_FEAT_WRITE
);
268 return (ZPOOL_STATUS_UNSUP_FEAT_READ
);
272 * Check that the config is complete.
274 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
275 vs
->vs_aux
== VDEV_AUX_BAD_GUID_SUM
)
276 return (ZPOOL_STATUS_BAD_GUID_SUM
);
279 * Check whether the pool has suspended.
281 if (nvlist_lookup_uint64(config
, ZPOOL_CONFIG_SUSPENDED
,
285 if (nvlist_lookup_uint64(config
, ZPOOL_CONFIG_SUSPENDED_REASON
,
286 &reason
) == 0 && reason
== ZIO_SUSPEND_MMP
)
287 return (ZPOOL_STATUS_IO_FAILURE_MMP
);
289 if (suspended
== ZIO_FAILURE_MODE_CONTINUE
)
290 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE
);
291 return (ZPOOL_STATUS_IO_FAILURE_WAIT
);
295 * Could not read a log.
297 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
298 vs
->vs_aux
== VDEV_AUX_BAD_LOG
) {
299 return (ZPOOL_STATUS_BAD_LOG
);
303 * Bad devices in non-replicated config.
305 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
306 find_vdev_problem(nvroot
, vdev_faulted
))
307 return (ZPOOL_STATUS_FAULTED_DEV_NR
);
309 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
310 find_vdev_problem(nvroot
, vdev_missing
))
311 return (ZPOOL_STATUS_MISSING_DEV_NR
);
313 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
314 find_vdev_problem(nvroot
, vdev_broken
))
315 return (ZPOOL_STATUS_CORRUPT_LABEL_NR
);
318 * Corrupted pool metadata
320 if (vs
->vs_state
== VDEV_STATE_CANT_OPEN
&&
321 vs
->vs_aux
== VDEV_AUX_CORRUPT_DATA
)
322 return (ZPOOL_STATUS_CORRUPT_POOL
);
325 * Persistent data errors.
328 if (nvlist_lookup_uint64(config
, ZPOOL_CONFIG_ERRCOUNT
,
329 &nerr
) == 0 && nerr
!= 0)
330 return (ZPOOL_STATUS_CORRUPT_DATA
);
334 * Missing devices in a replicated config.
336 if (find_vdev_problem(nvroot
, vdev_faulted
))
337 return (ZPOOL_STATUS_FAULTED_DEV_R
);
338 if (find_vdev_problem(nvroot
, vdev_missing
))
339 return (ZPOOL_STATUS_MISSING_DEV_R
);
340 if (find_vdev_problem(nvroot
, vdev_broken
))
341 return (ZPOOL_STATUS_CORRUPT_LABEL_R
);
344 * Devices with errors
346 if (!isimport
&& find_vdev_problem(nvroot
, vdev_errors
))
347 return (ZPOOL_STATUS_FAILING_DEV
);
352 if (find_vdev_problem(nvroot
, vdev_offlined
))
353 return (ZPOOL_STATUS_OFFLINE_DEV
);
358 if (find_vdev_problem(nvroot
, vdev_removed
))
359 return (ZPOOL_STATUS_REMOVED_DEV
);
362 * Informational errata available.
364 (void) nvlist_lookup_uint64(config
, ZPOOL_CONFIG_ERRATA
, &errata
);
367 return (ZPOOL_STATUS_ERRATA
);
371 * Outdated, but usable, version
373 if (SPA_VERSION_IS_SUPPORTED(version
) && version
!= SPA_VERSION
)
374 return (ZPOOL_STATUS_VERSION_OLDER
);
377 * Usable pool with disabled features
379 if (version
>= SPA_VERSION_FEATURES
) {
384 feat
= fnvlist_lookup_nvlist(config
,
385 ZPOOL_CONFIG_LOAD_INFO
);
386 if (nvlist_exists(feat
, ZPOOL_CONFIG_ENABLED_FEAT
))
387 feat
= fnvlist_lookup_nvlist(feat
,
388 ZPOOL_CONFIG_ENABLED_FEAT
);
390 feat
= fnvlist_lookup_nvlist(config
,
391 ZPOOL_CONFIG_FEATURE_STATS
);
394 for (i
= 0; i
< SPA_FEATURES
; i
++) {
395 zfeature_info_t
*fi
= &spa_feature_table
[i
];
396 if (!nvlist_exists(feat
, fi
->fi_guid
))
397 return (ZPOOL_STATUS_FEAT_DISABLED
);
401 return (ZPOOL_STATUS_OK
);
405 zpool_get_status(zpool_handle_t
*zhp
, char **msgid
, zpool_errata_t
*errata
)
407 zpool_status_t ret
= check_status(zhp
->zpool_config
, B_FALSE
, errata
);
412 *msgid
= zfs_msgid_table
[ret
];
418 zpool_import_status(nvlist_t
*config
, char **msgid
, zpool_errata_t
*errata
)
420 zpool_status_t ret
= check_status(config
, B_TRUE
, errata
);
425 *msgid
= zfs_msgid_table
[ret
];