4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
23 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
24 * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
26 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
29 #include <sys/zfs_context.h>
30 #include <sys/spa_impl.h>
31 #include <sys/vdev_disk.h>
32 #include <sys/vdev_impl.h>
34 #include <sys/fs/zfs.h>
36 #include <linux/mod_compat.h>
37 #include <linux/msdos_fs.h>
38 #include <linux/vfs_compat.h>
40 char *zfs_vdev_scheduler
= VDEV_SCHEDULER
;
41 static void *zfs_vdev_holder
= VDEV_HOLDER
;
43 /* size of the "reserved" partition, in blocks */
44 #define EFI_MIN_RESV_SIZE (16 * 1024)
47 * Virtual device vector for disks.
49 typedef struct dio_request
{
50 zio_t
*dr_zio
; /* Parent ZIO */
51 atomic_t dr_ref
; /* References */
52 int dr_error
; /* Bio error */
53 int dr_bio_count
; /* Count of bio's */
54 struct bio
*dr_bio
[0]; /* Attached bio's */
58 #ifdef HAVE_OPEN_BDEV_EXCLUSIVE
60 vdev_bdev_mode(int smode
)
64 ASSERT3S(smode
& (FREAD
| FWRITE
), !=, 0);
76 vdev_bdev_mode(int smode
)
80 ASSERT3S(smode
& (FREAD
| FWRITE
), !=, 0);
82 if ((smode
& FREAD
) && !(smode
& FWRITE
))
87 #endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
90 * Returns the usable capacity (in bytes) for the partition or disk.
93 bdev_capacity(struct block_device
*bdev
)
95 return (i_size_read(bdev
->bd_inode
));
99 * Returns the maximum expansion capacity of the block device (in bytes).
101 * It is possible to expand a vdev when it has been created as a wholedisk
102 * and the containing block device has increased in capacity. Or when the
103 * partition containing the pool has been manually increased in size.
105 * This function is only responsible for calculating the potential expansion
106 * size so it can be reported by 'zpool list'. The efi_use_whole_disk() is
107 * responsible for verifying the expected partition layout in the wholedisk
108 * case, and updating the partition table if appropriate. Once the partition
109 * size has been increased the additional capacity will be visible using
113 bdev_max_capacity(struct block_device
*bdev
, uint64_t wholedisk
)
118 if (wholedisk
&& bdev
->bd_part
!= NULL
&& bdev
!= bdev
->bd_contains
) {
120 * When reporting maximum expansion capacity for a wholedisk
121 * deduct any capacity which is expected to be lost due to
122 * alignment restrictions. Over reporting this value isn't
123 * harmful and would only result in slightly less capacity
124 * than expected post expansion.
126 available
= i_size_read(bdev
->bd_contains
->bd_inode
) -
127 ((EFI_MIN_RESV_SIZE
+ NEW_START_BLOCK
+
128 PARTITION_END_ALIGNMENT
) << SECTOR_BITS
);
132 psize
= bdev_capacity(bdev
);
134 psize
= bdev_capacity(bdev
);
141 vdev_disk_error(zio_t
*zio
)
144 * This function can be called in interrupt context, for instance while
145 * handling IRQs coming from a misbehaving disk device; use printk()
146 * which is safe from any context.
148 printk(KERN_WARNING
"zio pool=%s vdev=%s error=%d type=%d "
149 "offset=%llu size=%llu flags=%x\n", spa_name(zio
->io_spa
),
150 zio
->io_vd
->vdev_path
, zio
->io_error
, zio
->io_type
,
151 (u_longlong_t
)zio
->io_offset
, (u_longlong_t
)zio
->io_size
,
156 * Use the Linux 'noop' elevator for zfs managed block devices. This
157 * strikes the ideal balance by allowing the zfs elevator to do all
158 * request ordering and prioritization. While allowing the Linux
159 * elevator to do the maximum front/back merging allowed by the
160 * physical device. This yields the largest possible requests for
161 * the device with the lowest total overhead.
164 vdev_elevator_switch(vdev_t
*v
, char *elevator
)
166 vdev_disk_t
*vd
= v
->vdev_tsd
;
167 struct request_queue
*q
;
171 for (int c
= 0; c
< v
->vdev_children
; c
++)
172 vdev_elevator_switch(v
->vdev_child
[c
], elevator
);
174 if (!v
->vdev_ops
->vdev_op_leaf
|| vd
->vd_bdev
== NULL
)
177 q
= bdev_get_queue(vd
->vd_bdev
);
178 device
= vd
->vd_bdev
->bd_disk
->disk_name
;
181 * Skip devices which are not whole disks (partitions).
182 * Device-mapper devices are excepted since they may be whole
183 * disks despite the vdev_wholedisk flag, in which case we can
184 * and should switch the elevator. If the device-mapper device
185 * does not have an elevator (i.e. dm-raid, dm-crypt, etc.) the
186 * "Skip devices without schedulers" check below will fail.
188 if (!v
->vdev_wholedisk
&& strncmp(device
, "dm-", 3) != 0)
191 /* Leave existing scheduler when set to "none" */
192 if ((strncmp(elevator
, "none", 4) == 0) && (strlen(elevator
) == 4))
196 * The elevator_change() function was available in kernels from
197 * 2.6.36 to 4.11. When not available fall back to using the user
198 * mode helper functionality to set the elevator via sysfs. This
199 * requires /bin/echo and sysfs to be mounted which may not be true
200 * early in the boot process.
202 #ifdef HAVE_ELEVATOR_CHANGE
203 error
= elevator_change(q
, elevator
);
205 #define SET_SCHEDULER_CMD \
206 "exec 0</dev/null " \
207 " 1>/sys/block/%s/queue/scheduler " \
211 char *argv
[] = { "/bin/sh", "-c", NULL
, NULL
};
212 char *envp
[] = { NULL
};
214 argv
[2] = kmem_asprintf(SET_SCHEDULER_CMD
, device
, elevator
);
215 error
= call_usermodehelper(argv
[0], argv
, envp
, UMH_WAIT_PROC
);
217 #endif /* HAVE_ELEVATOR_CHANGE */
219 zfs_dbgmsg("Unable to set \"%s\" scheduler for %s (%s): %d\n",
220 elevator
, v
->vdev_path
, device
, error
);
225 vdev_disk_open(vdev_t
*v
, uint64_t *psize
, uint64_t *max_psize
,
228 struct block_device
*bdev
;
229 fmode_t mode
= vdev_bdev_mode(spa_mode(v
->vdev_spa
));
230 int count
= 0, block_size
;
231 int bdev_retry_count
= 50;
234 /* Must have a pathname and it must be absolute. */
235 if (v
->vdev_path
== NULL
|| v
->vdev_path
[0] != '/') {
236 v
->vdev_stat
.vs_aux
= VDEV_AUX_BAD_LABEL
;
237 vdev_dbgmsg(v
, "invalid vdev_path");
238 return (SET_ERROR(EINVAL
));
242 * Reopen the device if it is currently open. When expanding a
243 * partition force re-scanning the partition table while closed
244 * in order to get an accurate updated block device size. Then
245 * since udev may need to recreate the device links increase the
246 * open retry count before reporting the device as unavailable.
250 char disk_name
[BDEVNAME_SIZE
+ 6] = "/dev/";
251 boolean_t reread_part
= B_FALSE
;
253 rw_enter(&vd
->vd_lock
, RW_WRITER
);
258 if (v
->vdev_expanding
&& bdev
!= bdev
->bd_contains
) {
259 bdevname(bdev
->bd_contains
, disk_name
+ 5);
260 reread_part
= B_TRUE
;
263 vdev_bdev_close(bdev
, mode
);
267 bdev
= vdev_bdev_open(disk_name
, mode
, zfs_vdev_holder
);
269 int error
= vdev_bdev_reread_part(bdev
);
270 vdev_bdev_close(bdev
, mode
);
272 bdev_retry_count
= 100;
276 vd
= kmem_zalloc(sizeof (vdev_disk_t
), KM_SLEEP
);
278 rw_init(&vd
->vd_lock
, NULL
, RW_DEFAULT
, NULL
);
279 rw_enter(&vd
->vd_lock
, RW_WRITER
);
283 * Devices are always opened by the path provided at configuration
284 * time. This means that if the provided path is a udev by-id path
285 * then drives may be re-cabled without an issue. If the provided
286 * path is a udev by-path path, then the physical location information
287 * will be preserved. This can be critical for more complicated
288 * configurations where drives are located in specific physical
289 * locations to maximize the systems tolerance to component failure.
291 * Alternatively, you can provide your own udev rule to flexibly map
292 * the drives as you see fit. It is not advised that you use the
293 * /dev/[hd]d devices which may be reordered due to probing order.
294 * Devices in the wrong locations will be detected by the higher
295 * level vdev validation.
297 * The specified paths may be briefly removed and recreated in
298 * response to udev events. This should be exceptionally unlikely
299 * because the zpool command makes every effort to verify these paths
300 * have already settled prior to reaching this point. Therefore,
301 * a ENOENT failure at this point is highly likely to be transient
302 * and it is reasonable to sleep and retry before giving up. In
303 * practice delays have been observed to be on the order of 100ms.
305 bdev
= ERR_PTR(-ENXIO
);
306 while (IS_ERR(bdev
) && count
< bdev_retry_count
) {
307 bdev
= vdev_bdev_open(v
->vdev_path
, mode
, zfs_vdev_holder
);
308 if (unlikely(PTR_ERR(bdev
) == -ENOENT
)) {
309 schedule_timeout(MSEC_TO_TICK(10));
311 } else if (IS_ERR(bdev
)) {
317 int error
= -PTR_ERR(bdev
);
318 vdev_dbgmsg(v
, "open error=%d count=%d\n", error
, count
);
321 rw_exit(&vd
->vd_lock
);
322 return (SET_ERROR(error
));
326 rw_exit(&vd
->vd_lock
);
329 /* Determine the physical block size */
330 block_size
= vdev_bdev_block_size(vd
->vd_bdev
);
332 /* Clear the nowritecache bit, causes vdev_reopen() to try again. */
333 v
->vdev_nowritecache
= B_FALSE
;
335 /* Inform the ZIO pipeline that we are non-rotational */
336 v
->vdev_nonrot
= blk_queue_nonrot(bdev_get_queue(vd
->vd_bdev
));
338 /* Physical volume size in bytes for the partition */
339 *psize
= bdev_capacity(vd
->vd_bdev
);
341 /* Physical volume size in bytes including possible expansion space */
342 *max_psize
= bdev_max_capacity(vd
->vd_bdev
, v
->vdev_wholedisk
);
344 /* Based on the minimum sector size set the block size */
345 *ashift
= highbit64(MAX(block_size
, SPA_MINBLOCKSIZE
)) - 1;
347 /* Try to set the io scheduler elevator algorithm */
348 (void) vdev_elevator_switch(v
, zfs_vdev_scheduler
);
354 vdev_disk_close(vdev_t
*v
)
356 vdev_disk_t
*vd
= v
->vdev_tsd
;
358 if (v
->vdev_reopening
|| vd
== NULL
)
361 if (vd
->vd_bdev
!= NULL
) {
362 vdev_bdev_close(vd
->vd_bdev
,
363 vdev_bdev_mode(spa_mode(v
->vdev_spa
)));
366 rw_destroy(&vd
->vd_lock
);
367 kmem_free(vd
, sizeof (vdev_disk_t
));
371 static dio_request_t
*
372 vdev_disk_dio_alloc(int bio_count
)
377 dr
= kmem_zalloc(sizeof (dio_request_t
) +
378 sizeof (struct bio
*) * bio_count
, KM_SLEEP
);
380 atomic_set(&dr
->dr_ref
, 0);
381 dr
->dr_bio_count
= bio_count
;
384 for (i
= 0; i
< dr
->dr_bio_count
; i
++)
385 dr
->dr_bio
[i
] = NULL
;
392 vdev_disk_dio_free(dio_request_t
*dr
)
396 for (i
= 0; i
< dr
->dr_bio_count
; i
++)
398 bio_put(dr
->dr_bio
[i
]);
400 kmem_free(dr
, sizeof (dio_request_t
) +
401 sizeof (struct bio
*) * dr
->dr_bio_count
);
405 vdev_disk_dio_get(dio_request_t
*dr
)
407 atomic_inc(&dr
->dr_ref
);
411 vdev_disk_dio_put(dio_request_t
*dr
)
413 int rc
= atomic_dec_return(&dr
->dr_ref
);
416 * Free the dio_request when the last reference is dropped and
417 * ensure zio_interpret is called only once with the correct zio
420 zio_t
*zio
= dr
->dr_zio
;
421 int error
= dr
->dr_error
;
423 vdev_disk_dio_free(dr
);
426 zio
->io_error
= error
;
427 ASSERT3S(zio
->io_error
, >=, 0);
429 vdev_disk_error(zio
);
431 zio_delay_interrupt(zio
);
438 BIO_END_IO_PROTO(vdev_disk_physio_completion
, bio
, error
)
440 dio_request_t
*dr
= bio
->bi_private
;
443 if (dr
->dr_error
== 0) {
444 #ifdef HAVE_1ARG_BIO_END_IO_T
445 dr
->dr_error
= BIO_END_IO_ERROR(bio
);
448 dr
->dr_error
= -(error
);
449 else if (!test_bit(BIO_UPTODATE
, &bio
->bi_flags
))
454 /* Drop reference acquired by __vdev_disk_physio */
455 rc
= vdev_disk_dio_put(dr
);
459 bio_map(struct bio
*bio
, void *bio_ptr
, unsigned int bio_size
)
461 unsigned int offset
, size
, i
;
464 offset
= offset_in_page(bio_ptr
);
465 for (i
= 0; i
< bio
->bi_max_vecs
; i
++) {
466 size
= PAGE_SIZE
- offset
;
474 if (is_vmalloc_addr(bio_ptr
))
475 page
= vmalloc_to_page(bio_ptr
);
477 page
= virt_to_page(bio_ptr
);
480 * Some network related block device uses tcp_sendpage, which
481 * doesn't behave well when using 0-count page, this is a
482 * safety net to catch them.
484 ASSERT3S(page_count(page
), >, 0);
486 if (bio_add_page(bio
, page
, size
, offset
) != size
)
498 bio_map_abd_off(struct bio
*bio
, abd_t
*abd
, unsigned int size
, size_t off
)
500 if (abd_is_linear(abd
))
501 return (bio_map(bio
, ((char *)abd_to_buf(abd
)) + off
, size
));
503 return (abd_scatter_bio_map_off(bio
, abd
, size
, off
));
507 vdev_submit_bio_impl(struct bio
*bio
)
509 #ifdef HAVE_1ARG_SUBMIT_BIO
516 #ifdef HAVE_BIO_SET_DEV
517 #if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY)
519 * The Linux 5.0 kernel updated the bio_set_dev() macro so it calls the
520 * GPL-only bio_associate_blkg() symbol thus inadvertently converting
521 * the entire macro. Provide a minimal version which always assigns the
522 * request queue's root_blkg to the bio.
525 vdev_bio_associate_blkg(struct bio
*bio
)
527 struct request_queue
*q
= bio
->bi_disk
->queue
;
529 ASSERT3P(q
, !=, NULL
);
530 ASSERT3P(q
->root_blkg
, !=, NULL
);
531 ASSERT3P(bio
->bi_blkg
, ==, NULL
);
533 if (blkg_tryget(q
->root_blkg
))
534 bio
->bi_blkg
= q
->root_blkg
;
536 #define bio_associate_blkg vdev_bio_associate_blkg
540 * Provide a bio_set_dev() helper macro for pre-Linux 4.14 kernels.
543 bio_set_dev(struct bio
*bio
, struct block_device
*bdev
)
547 #endif /* HAVE_BIO_SET_DEV */
550 vdev_submit_bio(struct bio
*bio
)
552 #ifdef HAVE_CURRENT_BIO_TAIL
553 struct bio
**bio_tail
= current
->bio_tail
;
554 current
->bio_tail
= NULL
;
555 vdev_submit_bio_impl(bio
);
556 current
->bio_tail
= bio_tail
;
558 struct bio_list
*bio_list
= current
->bio_list
;
559 current
->bio_list
= NULL
;
560 vdev_submit_bio_impl(bio
);
561 current
->bio_list
= bio_list
;
566 __vdev_disk_physio(struct block_device
*bdev
, zio_t
*zio
,
567 size_t io_size
, uint64_t io_offset
, int rw
, int flags
)
572 int bio_size
, bio_count
= 16;
573 int i
= 0, error
= 0;
574 #if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
575 struct blk_plug plug
;
578 * Accessing outside the block device is never allowed.
580 if (io_offset
+ io_size
> bdev
->bd_inode
->i_size
) {
581 vdev_dbgmsg(zio
->io_vd
,
582 "Illegal access %llu size %llu, device size %llu",
583 io_offset
, io_size
, i_size_read(bdev
->bd_inode
));
584 return (SET_ERROR(EIO
));
588 dr
= vdev_disk_dio_alloc(bio_count
);
590 return (SET_ERROR(ENOMEM
));
592 if (zio
&& !(zio
->io_flags
& (ZIO_FLAG_IO_RETRY
| ZIO_FLAG_TRYHARD
)))
593 bio_set_flags_failfast(bdev
, &flags
);
598 * When the IO size exceeds the maximum bio size for the request
599 * queue we are forced to break the IO in multiple bio's and wait
600 * for them all to complete. Ideally, all pool users will set
601 * their volume block size to match the maximum request size and
602 * the common case will be one bio per vdev IO request.
606 bio_offset
= io_offset
;
608 for (i
= 0; i
<= dr
->dr_bio_count
; i
++) {
610 /* Finished constructing bio's for given buffer */
615 * By default only 'bio_count' bio's per dio are allowed.
616 * However, if we find ourselves in a situation where more
617 * are needed we allocate a larger dio and warn the user.
619 if (dr
->dr_bio_count
== i
) {
620 vdev_disk_dio_free(dr
);
625 /* bio_alloc() with __GFP_WAIT never returns NULL */
626 dr
->dr_bio
[i
] = bio_alloc(GFP_NOIO
,
627 MIN(abd_nr_pages_off(zio
->io_abd
, bio_size
, abd_offset
),
629 if (unlikely(dr
->dr_bio
[i
] == NULL
)) {
630 vdev_disk_dio_free(dr
);
631 return (SET_ERROR(ENOMEM
));
634 /* Matching put called by vdev_disk_physio_completion */
635 vdev_disk_dio_get(dr
);
637 bio_set_dev(dr
->dr_bio
[i
], bdev
);
638 BIO_BI_SECTOR(dr
->dr_bio
[i
]) = bio_offset
>> 9;
639 dr
->dr_bio
[i
]->bi_end_io
= vdev_disk_physio_completion
;
640 dr
->dr_bio
[i
]->bi_private
= dr
;
641 bio_set_op_attrs(dr
->dr_bio
[i
], rw
, flags
);
643 /* Remaining size is returned to become the new size */
644 bio_size
= bio_map_abd_off(dr
->dr_bio
[i
], zio
->io_abd
,
645 bio_size
, abd_offset
);
647 /* Advance in buffer and construct another bio if needed */
648 abd_offset
+= BIO_BI_SIZE(dr
->dr_bio
[i
]);
649 bio_offset
+= BIO_BI_SIZE(dr
->dr_bio
[i
]);
652 /* Extra reference to protect dio_request during vdev_submit_bio */
653 vdev_disk_dio_get(dr
);
655 #if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
656 if (dr
->dr_bio_count
> 1)
657 blk_start_plug(&plug
);
660 /* Submit all bio's associated with this dio */
661 for (i
= 0; i
< dr
->dr_bio_count
; i
++)
663 vdev_submit_bio(dr
->dr_bio
[i
]);
665 #if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
666 if (dr
->dr_bio_count
> 1)
667 blk_finish_plug(&plug
);
670 (void) vdev_disk_dio_put(dr
);
675 BIO_END_IO_PROTO(vdev_disk_io_flush_completion
, bio
, error
)
677 zio_t
*zio
= bio
->bi_private
;
678 #ifdef HAVE_1ARG_BIO_END_IO_T
679 zio
->io_error
= BIO_END_IO_ERROR(bio
);
681 zio
->io_error
= -error
;
684 if (zio
->io_error
&& (zio
->io_error
== EOPNOTSUPP
))
685 zio
->io_vd
->vdev_nowritecache
= B_TRUE
;
688 ASSERT3S(zio
->io_error
, >=, 0);
690 vdev_disk_error(zio
);
695 vdev_disk_io_flush(struct block_device
*bdev
, zio_t
*zio
)
697 struct request_queue
*q
;
700 q
= bdev_get_queue(bdev
);
702 return (SET_ERROR(ENXIO
));
704 bio
= bio_alloc(GFP_NOIO
, 0);
705 /* bio_alloc() with __GFP_WAIT never returns NULL */
706 if (unlikely(bio
== NULL
))
707 return (SET_ERROR(ENOMEM
));
709 bio
->bi_end_io
= vdev_disk_io_flush_completion
;
710 bio
->bi_private
= zio
;
711 bio_set_dev(bio
, bdev
);
713 vdev_submit_bio(bio
);
714 invalidate_bdev(bdev
);
720 vdev_disk_io_start(zio_t
*zio
)
722 vdev_t
*v
= zio
->io_vd
;
723 vdev_disk_t
*vd
= v
->vdev_tsd
;
724 int rw
, flags
, error
;
727 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
728 * Nothing to be done here but return failure.
731 zio
->io_error
= ENXIO
;
736 rw_enter(&vd
->vd_lock
, RW_READER
);
739 * If the vdev is closed, it's likely due to a failed reopen and is
740 * in the UNAVAIL state. Nothing to be done here but return failure.
742 if (vd
->vd_bdev
== NULL
) {
743 rw_exit(&vd
->vd_lock
);
744 zio
->io_error
= ENXIO
;
749 switch (zio
->io_type
) {
752 if (!vdev_readable(v
)) {
753 rw_exit(&vd
->vd_lock
);
754 zio
->io_error
= SET_ERROR(ENXIO
);
759 switch (zio
->io_cmd
) {
760 case DKIOCFLUSHWRITECACHE
:
762 if (zfs_nocacheflush
)
765 if (v
->vdev_nowritecache
) {
766 zio
->io_error
= SET_ERROR(ENOTSUP
);
770 error
= vdev_disk_io_flush(vd
->vd_bdev
, zio
);
772 rw_exit(&vd
->vd_lock
);
776 zio
->io_error
= error
;
781 zio
->io_error
= SET_ERROR(ENOTSUP
);
784 rw_exit(&vd
->vd_lock
);
789 #if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG)
790 flags
= (1 << BIO_RW_UNPLUG
);
791 #elif defined(REQ_UNPLUG)
800 #if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG)
801 flags
= (1 << BIO_RW_UNPLUG
);
802 #elif defined(REQ_UNPLUG)
810 rw_exit(&vd
->vd_lock
);
811 zio
->io_error
= SET_ERROR(ENOTSUP
);
816 zio
->io_target_timestamp
= zio_handle_io_delay(zio
);
817 error
= __vdev_disk_physio(vd
->vd_bdev
, zio
,
818 zio
->io_size
, zio
->io_offset
, rw
, flags
);
819 rw_exit(&vd
->vd_lock
);
822 zio
->io_error
= error
;
829 vdev_disk_io_done(zio_t
*zio
)
832 * If the device returned EIO, we revalidate the media. If it is
833 * determined the media has changed this triggers the asynchronous
834 * removal of the device from the configuration.
836 if (zio
->io_error
== EIO
) {
837 vdev_t
*v
= zio
->io_vd
;
838 vdev_disk_t
*vd
= v
->vdev_tsd
;
840 if (check_disk_change(vd
->vd_bdev
)) {
841 vdev_bdev_invalidate(vd
->vd_bdev
);
842 v
->vdev_remove_wanted
= B_TRUE
;
843 spa_async_request(zio
->io_spa
, SPA_ASYNC_REMOVE
);
849 vdev_disk_hold(vdev_t
*vd
)
851 ASSERT(spa_config_held(vd
->vdev_spa
, SCL_STATE
, RW_WRITER
));
853 /* We must have a pathname, and it must be absolute. */
854 if (vd
->vdev_path
== NULL
|| vd
->vdev_path
[0] != '/')
858 * Only prefetch path and devid info if the device has
861 if (vd
->vdev_tsd
!= NULL
)
864 /* XXX: Implement me as a vnode lookup for the device */
865 vd
->vdev_name_vp
= NULL
;
866 vd
->vdev_devid_vp
= NULL
;
870 vdev_disk_rele(vdev_t
*vd
)
872 ASSERT(spa_config_held(vd
->vdev_spa
, SCL_STATE
, RW_WRITER
));
874 /* XXX: Implement me as a vnode rele for the device */
878 param_set_vdev_scheduler(const char *val
, zfs_kernel_param_t
*kp
)
884 return (SET_ERROR(-EINVAL
));
886 if ((p
= strchr(val
, '\n')) != NULL
)
889 if (spa_mode_global
!= 0) {
890 mutex_enter(&spa_namespace_lock
);
891 while ((spa
= spa_next(spa
)) != NULL
) {
892 if (spa_state(spa
) != POOL_STATE_ACTIVE
||
893 !spa_writeable(spa
) || spa_suspended(spa
))
896 spa_open_ref(spa
, FTAG
);
897 mutex_exit(&spa_namespace_lock
);
898 vdev_elevator_switch(spa
->spa_root_vdev
, (char *)val
);
899 mutex_enter(&spa_namespace_lock
);
900 spa_close(spa
, FTAG
);
902 mutex_exit(&spa_namespace_lock
);
905 return (param_set_charp(val
, kp
));
908 vdev_ops_t vdev_disk_ops
= {
920 VDEV_TYPE_DISK
, /* name of this vdev type */
921 B_TRUE
/* leaf vdev */
924 module_param_call(zfs_vdev_scheduler
, param_set_vdev_scheduler
,
925 param_get_charp
, &zfs_vdev_scheduler
, 0644);
926 MODULE_PARM_DESC(zfs_vdev_scheduler
, "I/O scheduler");