]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
b128c09f | 22 | * Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
34dc7c2f BB |
23 | * Use is subject to license terms. |
24 | */ | |
25 | ||
34dc7c2f BB |
26 | /* |
27 | * This file contains the functions which analyze the status of a pool. This | |
28 | * include both the status of an active pool, as well as the status exported | |
29 | * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of | |
30 | * the pool. This status is independent (to a certain degree) from the state of | |
31 | * the pool. A pool's state describes only whether or not it is capable of | |
32 | * providing the necessary fault tolerance for data. The status describes the | |
33 | * overall status of devices. A pool that is online can still have a device | |
34 | * that is experiencing errors. | |
35 | * | |
36 | * Only a subset of the possible faults can be detected using 'zpool status', | |
37 | * and not all possible errors correspond to a FMA message ID. The explanation | |
38 | * is left up to the caller, depending on whether it is a live pool or an | |
39 | * import. | |
40 | */ | |
41 | ||
42 | #include <libzfs.h> | |
43 | #include <string.h> | |
44 | #include <unistd.h> | |
45 | #include "libzfs_impl.h" | |
46 | ||
47 | /* | |
48 | * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines | |
49 | * in libzfs.h. Note that there are some status results which go past the end | |
50 | * of this table, and hence have no associated message ID. | |
51 | */ | |
52 | static char *zfs_msgid_table[] = { | |
53 | "ZFS-8000-14", | |
54 | "ZFS-8000-2Q", | |
55 | "ZFS-8000-3C", | |
56 | "ZFS-8000-4J", | |
57 | "ZFS-8000-5E", | |
58 | "ZFS-8000-6X", | |
59 | "ZFS-8000-72", | |
60 | "ZFS-8000-8A", | |
61 | "ZFS-8000-9P", | |
62 | "ZFS-8000-A5", | |
b128c09f BB |
63 | "ZFS-8000-EY", |
64 | "ZFS-8000-HC", | |
65 | "ZFS-8000-JQ", | |
66 | "ZFS-8000-K4", | |
34dc7c2f BB |
67 | }; |
68 | ||
69 | #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) | |
70 | ||
71 | /* ARGSUSED */ | |
72 | static int | |
73 | vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) | |
74 | { | |
75 | return (state == VDEV_STATE_CANT_OPEN && | |
76 | aux == VDEV_AUX_OPEN_FAILED); | |
77 | } | |
78 | ||
79 | /* ARGSUSED */ | |
80 | static int | |
81 | vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) | |
82 | { | |
83 | return (state == VDEV_STATE_FAULTED); | |
84 | } | |
85 | ||
86 | /* ARGSUSED */ | |
87 | static int | |
88 | vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) | |
89 | { | |
90 | return (state == VDEV_STATE_DEGRADED || errs != 0); | |
91 | } | |
92 | ||
93 | /* ARGSUSED */ | |
94 | static int | |
95 | vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) | |
96 | { | |
97 | return (state == VDEV_STATE_CANT_OPEN); | |
98 | } | |
99 | ||
100 | /* ARGSUSED */ | |
101 | static int | |
102 | vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) | |
103 | { | |
104 | return (state == VDEV_STATE_OFFLINE); | |
105 | } | |
106 | ||
107 | /* | |
108 | * Detect if any leaf devices that have seen errors or could not be opened. | |
109 | */ | |
110 | static boolean_t | |
111 | find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) | |
112 | { | |
113 | nvlist_t **child; | |
114 | vdev_stat_t *vs; | |
115 | uint_t c, children; | |
116 | char *type; | |
117 | ||
118 | /* | |
119 | * Ignore problems within a 'replacing' vdev, since we're presumably in | |
120 | * the process of repairing any such errors, and don't want to call them | |
121 | * out again. We'll pick up the fact that a resilver is happening | |
122 | * later. | |
123 | */ | |
124 | verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); | |
125 | if (strcmp(type, VDEV_TYPE_REPLACING) == 0) | |
126 | return (B_FALSE); | |
127 | ||
128 | if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, | |
129 | &children) == 0) { | |
130 | for (c = 0; c < children; c++) | |
131 | if (find_vdev_problem(child[c], func)) | |
132 | return (B_TRUE); | |
133 | } else { | |
134 | verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, | |
135 | (uint64_t **)&vs, &c) == 0); | |
136 | ||
137 | if (func(vs->vs_state, vs->vs_aux, | |
138 | vs->vs_read_errors + | |
139 | vs->vs_write_errors + | |
140 | vs->vs_checksum_errors)) | |
141 | return (B_TRUE); | |
142 | } | |
143 | ||
144 | return (B_FALSE); | |
145 | } | |
146 | ||
147 | /* | |
148 | * Active pool health status. | |
149 | * | |
150 | * To determine the status for a pool, we make several passes over the config, | |
151 | * picking the most egregious error we find. In order of importance, we do the | |
152 | * following: | |
153 | * | |
154 | * - Check for a complete and valid configuration | |
155 | * - Look for any faulted or missing devices in a non-replicated config | |
156 | * - Check for any data errors | |
157 | * - Check for any faulted or missing devices in a replicated config | |
158 | * - Look for any devices showing errors | |
159 | * - Check for any resilvering devices | |
160 | * | |
161 | * There can obviously be multiple errors within a single pool, so this routine | |
162 | * only picks the most damaging of all the current errors to report. | |
163 | */ | |
164 | static zpool_status_t | |
165 | check_status(nvlist_t *config, boolean_t isimport) | |
166 | { | |
167 | nvlist_t *nvroot; | |
168 | vdev_stat_t *vs; | |
169 | uint_t vsc; | |
170 | uint64_t nerr; | |
171 | uint64_t version; | |
172 | uint64_t stateval; | |
b128c09f | 173 | uint64_t suspended; |
34dc7c2f BB |
174 | uint64_t hostid = 0; |
175 | ||
176 | verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, | |
177 | &version) == 0); | |
178 | verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, | |
179 | &nvroot) == 0); | |
180 | verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, | |
181 | (uint64_t **)&vs, &vsc) == 0); | |
182 | verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, | |
183 | &stateval) == 0); | |
184 | (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); | |
185 | ||
186 | /* | |
187 | * Pool last accessed by another system. | |
188 | */ | |
189 | if (hostid != 0 && (unsigned long)hostid != gethostid() && | |
190 | stateval == POOL_STATE_ACTIVE) | |
191 | return (ZPOOL_STATUS_HOSTID_MISMATCH); | |
192 | ||
193 | /* | |
194 | * Newer on-disk version. | |
195 | */ | |
196 | if (vs->vs_state == VDEV_STATE_CANT_OPEN && | |
197 | vs->vs_aux == VDEV_AUX_VERSION_NEWER) | |
198 | return (ZPOOL_STATUS_VERSION_NEWER); | |
199 | ||
200 | /* | |
201 | * Check that the config is complete. | |
202 | */ | |
203 | if (vs->vs_state == VDEV_STATE_CANT_OPEN && | |
204 | vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) | |
205 | return (ZPOOL_STATUS_BAD_GUID_SUM); | |
206 | ||
b128c09f BB |
207 | /* |
208 | * Check whether the pool has suspended due to failed I/O. | |
209 | */ | |
210 | if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, | |
211 | &suspended) == 0) { | |
212 | if (suspended == ZIO_FAILURE_MODE_CONTINUE) | |
213 | return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); | |
214 | return (ZPOOL_STATUS_IO_FAILURE_WAIT); | |
215 | } | |
216 | ||
217 | /* | |
218 | * Could not read a log. | |
219 | */ | |
220 | if (vs->vs_state == VDEV_STATE_CANT_OPEN && | |
221 | vs->vs_aux == VDEV_AUX_BAD_LOG) { | |
222 | return (ZPOOL_STATUS_BAD_LOG); | |
223 | } | |
224 | ||
34dc7c2f BB |
225 | /* |
226 | * Bad devices in non-replicated config. | |
227 | */ | |
228 | if (vs->vs_state == VDEV_STATE_CANT_OPEN && | |
229 | find_vdev_problem(nvroot, vdev_faulted)) | |
230 | return (ZPOOL_STATUS_FAULTED_DEV_NR); | |
231 | ||
232 | if (vs->vs_state == VDEV_STATE_CANT_OPEN && | |
233 | find_vdev_problem(nvroot, vdev_missing)) | |
234 | return (ZPOOL_STATUS_MISSING_DEV_NR); | |
235 | ||
236 | if (vs->vs_state == VDEV_STATE_CANT_OPEN && | |
237 | find_vdev_problem(nvroot, vdev_broken)) | |
238 | return (ZPOOL_STATUS_CORRUPT_LABEL_NR); | |
239 | ||
240 | /* | |
241 | * Corrupted pool metadata | |
242 | */ | |
243 | if (vs->vs_state == VDEV_STATE_CANT_OPEN && | |
244 | vs->vs_aux == VDEV_AUX_CORRUPT_DATA) | |
245 | return (ZPOOL_STATUS_CORRUPT_POOL); | |
246 | ||
247 | /* | |
248 | * Persistent data errors. | |
249 | */ | |
250 | if (!isimport) { | |
251 | if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, | |
252 | &nerr) == 0 && nerr != 0) | |
253 | return (ZPOOL_STATUS_CORRUPT_DATA); | |
254 | } | |
255 | ||
256 | /* | |
257 | * Missing devices in a replicated config. | |
258 | */ | |
259 | if (find_vdev_problem(nvroot, vdev_faulted)) | |
260 | return (ZPOOL_STATUS_FAULTED_DEV_R); | |
261 | if (find_vdev_problem(nvroot, vdev_missing)) | |
262 | return (ZPOOL_STATUS_MISSING_DEV_R); | |
263 | if (find_vdev_problem(nvroot, vdev_broken)) | |
264 | return (ZPOOL_STATUS_CORRUPT_LABEL_R); | |
265 | ||
266 | /* | |
267 | * Devices with errors | |
268 | */ | |
269 | if (!isimport && find_vdev_problem(nvroot, vdev_errors)) | |
270 | return (ZPOOL_STATUS_FAILING_DEV); | |
271 | ||
272 | /* | |
273 | * Offlined devices | |
274 | */ | |
275 | if (find_vdev_problem(nvroot, vdev_offlined)) | |
276 | return (ZPOOL_STATUS_OFFLINE_DEV); | |
277 | ||
278 | /* | |
279 | * Currently resilvering | |
280 | */ | |
281 | if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) | |
282 | return (ZPOOL_STATUS_RESILVERING); | |
283 | ||
284 | /* | |
285 | * Outdated, but usable, version | |
286 | */ | |
287 | if (version < SPA_VERSION) | |
288 | return (ZPOOL_STATUS_VERSION_OLDER); | |
289 | ||
290 | return (ZPOOL_STATUS_OK); | |
291 | } | |
292 | ||
293 | zpool_status_t | |
294 | zpool_get_status(zpool_handle_t *zhp, char **msgid) | |
295 | { | |
296 | zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); | |
297 | ||
298 | if (ret >= NMSGID) | |
299 | *msgid = NULL; | |
300 | else | |
301 | *msgid = zfs_msgid_table[ret]; | |
302 | ||
303 | return (ret); | |
304 | } | |
305 | ||
306 | zpool_status_t | |
307 | zpool_import_status(nvlist_t *config, char **msgid) | |
308 | { | |
309 | zpool_status_t ret = check_status(config, B_TRUE); | |
310 | ||
311 | if (ret >= NMSGID) | |
312 | *msgid = NULL; | |
313 | else | |
314 | *msgid = zfs_msgid_table[ret]; | |
315 | ||
316 | return (ret); | |
317 | } |