]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright 2008 Sun Microsystems, Inc. All rights reserved. | |
23 | * Use is subject to license terms. | |
24 | */ | |
25 | ||
26 | #pragma ident "@(#)vdev_disk.c 1.15 08/04/09 SMI" | |
27 | ||
28 | #include <sys/zfs_context.h> | |
29 | #include <sys/spa.h> | |
30 | #include <sys/refcount.h> | |
31 | #include <sys/vdev_disk.h> | |
32 | #include <sys/vdev_impl.h> | |
33 | #include <sys/fs/zfs.h> | |
34 | #include <sys/zio.h> | |
35 | #include <sys/sunldi.h> | |
36 | ||
37 | /* | |
38 | * Virtual device vector for disks. | |
39 | */ | |
40 | ||
41 | extern ldi_ident_t zfs_li; | |
42 | ||
43 | typedef struct vdev_disk_buf { | |
44 | buf_t vdb_buf; | |
45 | zio_t *vdb_io; | |
46 | } vdev_disk_buf_t; | |
47 | ||
48 | static int | |
49 | vdev_disk_open_common(vdev_t *vd) | |
50 | { | |
51 | vdev_disk_t *dvd; | |
52 | dev_t dev; | |
53 | int error; | |
54 | ||
55 | /* | |
56 | * We must have a pathname, and it must be absolute. | |
57 | */ | |
58 | if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { | |
59 | vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; | |
60 | return (EINVAL); | |
61 | } | |
62 | ||
63 | dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); | |
64 | ||
65 | /* | |
66 | * When opening a disk device, we want to preserve the user's original | |
67 | * intent. We always want to open the device by the path the user gave | |
68 | * us, even if it is one of multiple paths to the save device. But we | |
69 | * also want to be able to survive disks being removed/recabled. | |
70 | * Therefore the sequence of opening devices is: | |
71 | * | |
72 | * 1. Try opening the device by path. For legacy pools without the | |
73 | * 'whole_disk' property, attempt to fix the path by appending 's0'. | |
74 | * | |
75 | * 2. If the devid of the device matches the stored value, return | |
76 | * success. | |
77 | * | |
78 | * 3. Otherwise, the device may have moved. Try opening the device | |
79 | * by the devid instead. | |
80 | * | |
81 | */ | |
82 | if (vd->vdev_devid != NULL) { | |
83 | if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, | |
84 | &dvd->vd_minor) != 0) { | |
85 | vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; | |
86 | return (EINVAL); | |
87 | } | |
88 | } | |
89 | ||
90 | error = EINVAL; /* presume failure */ | |
91 | ||
92 | if (vd->vdev_path != NULL) { | |
93 | ddi_devid_t devid; | |
94 | ||
95 | if (vd->vdev_wholedisk == -1ULL) { | |
96 | size_t len = strlen(vd->vdev_path) + 3; | |
97 | char *buf = kmem_alloc(len, KM_SLEEP); | |
98 | ldi_handle_t lh; | |
99 | ||
100 | (void) snprintf(buf, len, "%ss0", vd->vdev_path); | |
101 | ||
102 | if (ldi_open_by_name(buf, spa_mode, kcred, | |
103 | &lh, zfs_li) == 0) { | |
104 | spa_strfree(vd->vdev_path); | |
105 | vd->vdev_path = buf; | |
106 | vd->vdev_wholedisk = 1ULL; | |
107 | (void) ldi_close(lh, spa_mode, kcred); | |
108 | } else { | |
109 | kmem_free(buf, len); | |
110 | } | |
111 | } | |
112 | ||
113 | error = ldi_open_by_name(vd->vdev_path, spa_mode, kcred, | |
114 | &dvd->vd_lh, zfs_li); | |
115 | ||
116 | /* | |
117 | * Compare the devid to the stored value. | |
118 | */ | |
119 | if (error == 0 && vd->vdev_devid != NULL && | |
120 | ldi_get_devid(dvd->vd_lh, &devid) == 0) { | |
121 | if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { | |
122 | error = EINVAL; | |
123 | (void) ldi_close(dvd->vd_lh, spa_mode, kcred); | |
124 | dvd->vd_lh = NULL; | |
125 | } | |
126 | ddi_devid_free(devid); | |
127 | } | |
128 | ||
129 | /* | |
130 | * If we succeeded in opening the device, but 'vdev_wholedisk' | |
131 | * is not yet set, then this must be a slice. | |
132 | */ | |
133 | if (error == 0 && vd->vdev_wholedisk == -1ULL) | |
134 | vd->vdev_wholedisk = 0; | |
135 | } | |
136 | ||
137 | /* | |
138 | * If we were unable to open by path, or the devid check fails, open by | |
139 | * devid instead. | |
140 | */ | |
141 | if (error != 0 && vd->vdev_devid != NULL) | |
142 | error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, | |
143 | spa_mode, kcred, &dvd->vd_lh, zfs_li); | |
144 | ||
145 | /* | |
146 | * If all else fails, then try opening by physical path (if available) | |
147 | * or the logical path (if we failed due to the devid check). While not | |
148 | * as reliable as the devid, this will give us something, and the higher | |
149 | * level vdev validation will prevent us from opening the wrong device. | |
150 | */ | |
151 | if (error) { | |
152 | if (vd->vdev_physpath != NULL && | |
153 | (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != ENODEV) | |
154 | error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode, | |
155 | kcred, &dvd->vd_lh, zfs_li); | |
156 | ||
157 | /* | |
158 | * Note that we don't support the legacy auto-wholedisk support | |
159 | * as above. This hasn't been used in a very long time and we | |
160 | * don't need to propagate its oddities to this edge condition. | |
161 | */ | |
162 | if (error && vd->vdev_path != NULL) | |
163 | error = ldi_open_by_name(vd->vdev_path, spa_mode, kcred, | |
164 | &dvd->vd_lh, zfs_li); | |
165 | } | |
166 | ||
167 | if (error) | |
168 | vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; | |
169 | ||
170 | return (error); | |
171 | } | |
172 | ||
173 | static int | |
174 | vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) | |
175 | { | |
176 | vdev_disk_t *dvd; | |
177 | struct dk_minfo dkm; | |
178 | int error; | |
179 | dev_t dev; | |
180 | int otyp; | |
181 | ||
182 | error = vdev_disk_open_common(vd); | |
183 | if (error) | |
184 | return (error); | |
185 | ||
186 | dvd = vd->vdev_tsd; | |
187 | /* | |
188 | * Once a device is opened, verify that the physical device path (if | |
189 | * available) is up to date. | |
190 | */ | |
191 | if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && | |
192 | ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { | |
193 | char *physpath, *minorname; | |
194 | ||
195 | physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); | |
196 | minorname = NULL; | |
197 | if (ddi_dev_pathname(dev, otyp, physpath) == 0 && | |
198 | ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && | |
199 | (vd->vdev_physpath == NULL || | |
200 | strcmp(vd->vdev_physpath, physpath) != 0)) { | |
201 | if (vd->vdev_physpath) | |
202 | spa_strfree(vd->vdev_physpath); | |
203 | (void) strlcat(physpath, ":", MAXPATHLEN); | |
204 | (void) strlcat(physpath, minorname, MAXPATHLEN); | |
205 | vd->vdev_physpath = spa_strdup(physpath); | |
206 | } | |
207 | if (minorname) | |
208 | kmem_free(minorname, strlen(minorname) + 1); | |
209 | kmem_free(physpath, MAXPATHLEN); | |
210 | } | |
211 | ||
212 | /* | |
213 | * Determine the actual size of the device. | |
214 | */ | |
215 | if (ldi_get_size(dvd->vd_lh, psize) != 0) { | |
216 | vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; | |
217 | return (EINVAL); | |
218 | } | |
219 | ||
220 | /* | |
221 | * If we own the whole disk, try to enable disk write caching. | |
222 | * We ignore errors because it's OK if we can't do it. | |
223 | */ | |
224 | if (vd->vdev_wholedisk == 1) { | |
225 | int wce = 1; | |
226 | (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, | |
227 | FKIOCTL, kcred, NULL); | |
228 | } | |
229 | ||
230 | /* | |
231 | * Determine the device's minimum transfer size. | |
232 | * If the ioctl isn't supported, assume DEV_BSIZE. | |
233 | */ | |
234 | if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)&dkm, | |
235 | FKIOCTL, kcred, NULL) != 0) | |
236 | dkm.dki_lbsize = DEV_BSIZE; | |
237 | ||
238 | *ashift = highbit(MAX(dkm.dki_lbsize, SPA_MINBLOCKSIZE)) - 1; | |
239 | ||
240 | /* | |
241 | * Clear the nowritecache bit, so that on a vdev_reopen() we will | |
242 | * try again. | |
243 | */ | |
244 | vd->vdev_nowritecache = B_FALSE; | |
245 | ||
246 | return (0); | |
247 | } | |
248 | ||
249 | static void | |
250 | vdev_disk_close(vdev_t *vd) | |
251 | { | |
252 | vdev_disk_t *dvd = vd->vdev_tsd; | |
253 | ||
254 | if (dvd == NULL) | |
255 | return; | |
256 | ||
257 | if (dvd->vd_minor != NULL) | |
258 | ddi_devid_str_free(dvd->vd_minor); | |
259 | ||
260 | if (dvd->vd_devid != NULL) | |
261 | ddi_devid_free(dvd->vd_devid); | |
262 | ||
263 | if (dvd->vd_lh != NULL) | |
264 | (void) ldi_close(dvd->vd_lh, spa_mode, kcred); | |
265 | ||
266 | kmem_free(dvd, sizeof (vdev_disk_t)); | |
267 | vd->vdev_tsd = NULL; | |
268 | } | |
269 | ||
270 | int | |
271 | vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size, | |
272 | uint64_t offset, int flags) | |
273 | { | |
274 | buf_t *bp; | |
275 | int error = 0; | |
276 | ||
277 | if (vd_lh == NULL) | |
278 | return (EINVAL); | |
279 | ||
280 | ASSERT(flags & B_READ || flags & B_WRITE); | |
281 | ||
282 | bp = getrbuf(KM_SLEEP); | |
283 | bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST; | |
284 | bp->b_bcount = size; | |
285 | bp->b_un.b_addr = (void *)data; | |
286 | bp->b_lblkno = lbtodb(offset); | |
287 | bp->b_bufsize = size; | |
288 | ||
289 | error = ldi_strategy(vd_lh, bp); | |
290 | ASSERT(error == 0); | |
291 | if ((error = biowait(bp)) == 0 && bp->b_resid != 0) | |
292 | error = EIO; | |
293 | freerbuf(bp); | |
294 | ||
295 | return (error); | |
296 | } | |
297 | ||
298 | static int | |
299 | vdev_disk_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset, | |
300 | int flags) | |
301 | { | |
302 | int error = 0; | |
303 | vdev_disk_t *dvd = vd->vdev_tsd; | |
304 | ||
305 | if (vd == NULL || dvd == NULL || dvd->vd_lh == NULL) | |
306 | return (EINVAL); | |
307 | ||
308 | error = vdev_disk_physio(dvd->vd_lh, data, size, offset, flags); | |
309 | ||
310 | if (zio_injection_enabled && error == 0) | |
311 | error = zio_handle_device_injection(vd, EIO); | |
312 | ||
313 | return (error); | |
314 | } | |
315 | ||
316 | /* | |
317 | * Determine if the underlying device is accessible by reading and writing | |
318 | * to a known location. We must be able to do this during syncing context | |
319 | * and thus we cannot set the vdev state directly. | |
320 | */ | |
321 | static int | |
322 | vdev_disk_probe(vdev_t *vd) | |
323 | { | |
324 | uint64_t offset; | |
325 | vdev_t *nvd; | |
326 | int l, error = 0, retries = 0; | |
327 | char *vl_pad; | |
328 | ||
329 | if (vd == NULL) | |
330 | return (EINVAL); | |
331 | ||
332 | /* Hijack the current vdev */ | |
333 | nvd = vd; | |
334 | ||
335 | /* | |
336 | * Pick a random label to rewrite. | |
337 | */ | |
338 | l = spa_get_random(VDEV_LABELS); | |
339 | ASSERT(l < VDEV_LABELS); | |
340 | ||
341 | offset = vdev_label_offset(vd->vdev_psize, l, | |
342 | offsetof(vdev_label_t, vl_pad)); | |
343 | ||
344 | vl_pad = kmem_alloc(VDEV_SKIP_SIZE, KM_SLEEP); | |
345 | ||
346 | /* | |
347 | * Try to read and write to a special location on the | |
348 | * label. We use the existing vdev initially and only | |
349 | * try to create and reopen it if we encounter a failure. | |
350 | */ | |
351 | while ((error = vdev_disk_probe_io(nvd, vl_pad, VDEV_SKIP_SIZE, | |
352 | offset, B_READ)) != 0 && retries == 0) { | |
353 | ||
354 | nvd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); | |
355 | if (vd->vdev_path) | |
356 | nvd->vdev_path = spa_strdup(vd->vdev_path); | |
357 | if (vd->vdev_physpath) | |
358 | nvd->vdev_physpath = spa_strdup(vd->vdev_physpath); | |
359 | if (vd->vdev_devid) | |
360 | nvd->vdev_devid = spa_strdup(vd->vdev_devid); | |
361 | nvd->vdev_wholedisk = vd->vdev_wholedisk; | |
362 | nvd->vdev_guid = vd->vdev_guid; | |
363 | retries++; | |
364 | ||
365 | error = vdev_disk_open_common(nvd); | |
366 | if (error) | |
367 | break; | |
368 | } | |
369 | ||
370 | if (!error) { | |
371 | error = vdev_disk_probe_io(nvd, vl_pad, VDEV_SKIP_SIZE, | |
372 | offset, B_WRITE); | |
373 | } | |
374 | ||
375 | /* Clean up if we allocated a new vdev */ | |
376 | if (retries) { | |
377 | vdev_disk_close(nvd); | |
378 | if (nvd->vdev_path) | |
379 | spa_strfree(nvd->vdev_path); | |
380 | if (nvd->vdev_physpath) | |
381 | spa_strfree(nvd->vdev_physpath); | |
382 | if (nvd->vdev_devid) | |
383 | spa_strfree(nvd->vdev_devid); | |
384 | kmem_free(nvd, sizeof (vdev_t)); | |
385 | } | |
386 | kmem_free(vl_pad, VDEV_SKIP_SIZE); | |
387 | ||
388 | /* Reset the failing flag */ | |
389 | if (!error) | |
390 | vd->vdev_is_failing = B_FALSE; | |
391 | ||
392 | return (error); | |
393 | } | |
394 | ||
395 | static void | |
396 | vdev_disk_io_intr(buf_t *bp) | |
397 | { | |
398 | vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp; | |
399 | zio_t *zio = vdb->vdb_io; | |
400 | ||
401 | if ((zio->io_error = geterror(bp)) == 0 && bp->b_resid != 0) | |
402 | zio->io_error = EIO; | |
403 | ||
404 | kmem_free(vdb, sizeof (vdev_disk_buf_t)); | |
405 | ||
406 | zio_interrupt(zio); | |
407 | } | |
408 | ||
409 | static void | |
410 | vdev_disk_ioctl_done(void *zio_arg, int error) | |
411 | { | |
412 | zio_t *zio = zio_arg; | |
413 | ||
414 | zio->io_error = error; | |
415 | ||
416 | zio_interrupt(zio); | |
417 | } | |
418 | ||
419 | static int | |
420 | vdev_disk_io_start(zio_t *zio) | |
421 | { | |
422 | vdev_t *vd = zio->io_vd; | |
423 | vdev_disk_t *dvd = vd->vdev_tsd; | |
424 | vdev_disk_buf_t *vdb; | |
425 | buf_t *bp; | |
426 | int flags, error; | |
427 | ||
428 | if (zio->io_type == ZIO_TYPE_IOCTL) { | |
429 | zio_vdev_io_bypass(zio); | |
430 | ||
431 | /* XXPOLICY */ | |
432 | if (!vdev_readable(vd)) { | |
433 | zio->io_error = ENXIO; | |
434 | return (ZIO_PIPELINE_CONTINUE); | |
435 | } | |
436 | ||
437 | switch (zio->io_cmd) { | |
438 | ||
439 | case DKIOCFLUSHWRITECACHE: | |
440 | ||
441 | if (zfs_nocacheflush) | |
442 | break; | |
443 | ||
444 | if (vd->vdev_nowritecache) { | |
445 | zio->io_error = ENOTSUP; | |
446 | break; | |
447 | } | |
448 | ||
449 | zio->io_dk_callback.dkc_callback = vdev_disk_ioctl_done; | |
450 | zio->io_dk_callback.dkc_flag = FLUSH_VOLATILE; | |
451 | zio->io_dk_callback.dkc_cookie = zio; | |
452 | ||
453 | error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, | |
454 | (uintptr_t)&zio->io_dk_callback, | |
455 | FKIOCTL, kcred, NULL); | |
456 | ||
457 | if (error == 0) { | |
458 | /* | |
459 | * The ioctl will be done asychronously, | |
460 | * and will call vdev_disk_ioctl_done() | |
461 | * upon completion. | |
462 | */ | |
463 | return (ZIO_PIPELINE_STOP); | |
464 | } | |
465 | ||
466 | if (error == ENOTSUP || error == ENOTTY) { | |
467 | /* | |
468 | * If we get ENOTSUP or ENOTTY, we know that | |
469 | * no future attempts will ever succeed. | |
470 | * In this case we set a persistent bit so | |
471 | * that we don't bother with the ioctl in the | |
472 | * future. | |
473 | */ | |
474 | vd->vdev_nowritecache = B_TRUE; | |
475 | } | |
476 | zio->io_error = error; | |
477 | ||
478 | break; | |
479 | ||
480 | default: | |
481 | zio->io_error = ENOTSUP; | |
482 | } | |
483 | ||
484 | return (ZIO_PIPELINE_CONTINUE); | |
485 | } | |
486 | ||
487 | if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) | |
488 | return (ZIO_PIPELINE_STOP); | |
489 | ||
490 | if ((zio = vdev_queue_io(zio)) == NULL) | |
491 | return (ZIO_PIPELINE_STOP); | |
492 | ||
493 | if (zio->io_type == ZIO_TYPE_WRITE) | |
494 | error = vdev_writeable(vd) ? vdev_error_inject(vd, zio) : ENXIO; | |
495 | else | |
496 | error = vdev_readable(vd) ? vdev_error_inject(vd, zio) : ENXIO; | |
497 | error = (vd->vdev_remove_wanted || vd->vdev_is_failing) ? ENXIO : error; | |
498 | ||
499 | if (error) { | |
500 | zio->io_error = error; | |
501 | zio_interrupt(zio); | |
502 | return (ZIO_PIPELINE_STOP); | |
503 | } | |
504 | ||
505 | flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); | |
506 | flags |= B_BUSY | B_NOCACHE; | |
507 | if (zio->io_flags & ZIO_FLAG_FAILFAST) | |
508 | flags |= B_FAILFAST; | |
509 | ||
510 | vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP); | |
511 | ||
512 | vdb->vdb_io = zio; | |
513 | bp = &vdb->vdb_buf; | |
514 | ||
515 | bioinit(bp); | |
516 | bp->b_flags = flags; | |
517 | bp->b_bcount = zio->io_size; | |
518 | bp->b_un.b_addr = zio->io_data; | |
519 | bp->b_lblkno = lbtodb(zio->io_offset); | |
520 | bp->b_bufsize = zio->io_size; | |
521 | bp->b_iodone = (int (*)())vdev_disk_io_intr; | |
522 | ||
523 | error = ldi_strategy(dvd->vd_lh, bp); | |
524 | /* ldi_strategy() will return non-zero only on programming errors */ | |
525 | ASSERT(error == 0); | |
526 | ||
527 | return (ZIO_PIPELINE_STOP); | |
528 | } | |
529 | ||
530 | static int | |
531 | vdev_disk_io_done(zio_t *zio) | |
532 | { | |
533 | vdev_queue_io_done(zio); | |
534 | ||
535 | if (zio->io_type == ZIO_TYPE_WRITE) | |
536 | vdev_cache_write(zio); | |
537 | ||
538 | if (zio_injection_enabled && zio->io_error == 0) | |
539 | zio->io_error = zio_handle_device_injection(zio->io_vd, EIO); | |
540 | ||
541 | /* | |
542 | * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if | |
543 | * the device has been removed. If this is the case, then we trigger an | |
544 | * asynchronous removal of the device. Otherwise, probe the device and | |
545 | * make sure it's still accessible. | |
546 | */ | |
547 | if (zio->io_error == EIO) { | |
548 | vdev_t *vd = zio->io_vd; | |
549 | vdev_disk_t *dvd = vd->vdev_tsd; | |
550 | int state; | |
551 | ||
552 | state = DKIO_NONE; | |
553 | if (dvd && ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state, | |
554 | FKIOCTL, kcred, NULL) == 0 && | |
555 | state != DKIO_INSERTED) { | |
556 | vd->vdev_remove_wanted = B_TRUE; | |
557 | spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); | |
558 | } else if (vdev_probe(vd) != 0) { | |
559 | ASSERT(vd->vdev_ops->vdev_op_leaf); | |
560 | vd->vdev_is_failing = B_TRUE; | |
561 | } | |
562 | } | |
563 | ||
564 | return (ZIO_PIPELINE_CONTINUE); | |
565 | } | |
566 | ||
567 | vdev_ops_t vdev_disk_ops = { | |
568 | vdev_disk_open, | |
569 | vdev_disk_close, | |
570 | vdev_disk_probe, | |
571 | vdev_default_asize, | |
572 | vdev_disk_io_start, | |
573 | vdev_disk_io_done, | |
574 | NULL, | |
575 | VDEV_TYPE_DISK, /* name of this vdev type */ | |
576 | B_TRUE /* leaf vdev */ | |
577 | }; | |
578 | ||
579 | /* | |
580 | * Given the root disk device pathname, read the label from the device, | |
581 | * and construct a configuration nvlist. | |
582 | */ | |
583 | nvlist_t * | |
584 | vdev_disk_read_rootlabel(char *devpath) | |
585 | { | |
586 | nvlist_t *config = NULL; | |
587 | ldi_handle_t vd_lh; | |
588 | vdev_label_t *label; | |
589 | uint64_t s, size; | |
590 | int l; | |
591 | ||
592 | /* | |
593 | * Read the device label and build the nvlist. | |
594 | */ | |
595 | if (ldi_open_by_name(devpath, FREAD, kcred, &vd_lh, zfs_li)) | |
596 | return (NULL); | |
597 | ||
598 | if (ldi_get_size(vd_lh, &s)) | |
599 | return (NULL); | |
600 | ||
601 | size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); | |
602 | label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP); | |
603 | ||
604 | for (l = 0; l < VDEV_LABELS; l++) { | |
605 | uint64_t offset, state, txg = 0; | |
606 | ||
607 | /* read vdev label */ | |
608 | offset = vdev_label_offset(size, l, 0); | |
609 | if (vdev_disk_physio(vd_lh, (caddr_t)label, | |
610 | VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE + | |
611 | VDEV_PHYS_SIZE, offset, B_READ) != 0) | |
612 | continue; | |
613 | ||
614 | if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, | |
615 | sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) { | |
616 | config = NULL; | |
617 | continue; | |
618 | } | |
619 | ||
620 | if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, | |
621 | &state) != 0 || state >= POOL_STATE_DESTROYED) { | |
622 | nvlist_free(config); | |
623 | config = NULL; | |
624 | continue; | |
625 | } | |
626 | ||
627 | if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, | |
628 | &txg) != 0 || txg == 0) { | |
629 | nvlist_free(config); | |
630 | config = NULL; | |
631 | continue; | |
632 | } | |
633 | ||
634 | break; | |
635 | } | |
636 | ||
637 | kmem_free(label, sizeof (vdev_label_t)); | |
638 | return (config); | |
639 | } |