4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
24 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
25 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
32 #include <linux/blkdev.h>
33 #include <linux/backing-dev.h>
34 #include <linux/hdreg.h>
35 #include <linux/major.h>
36 #include <linux/msdos_fs.h> /* for SECTOR_* */
37 #include <linux/bio.h>
40 #include <linux/blk-mq.h>
43 #ifndef HAVE_BLK_QUEUE_FLAG_SET
45 blk_queue_flag_set(unsigned int flag
, struct request_queue
*q
)
47 queue_flag_set(flag
, q
);
51 #ifndef HAVE_BLK_QUEUE_FLAG_CLEAR
53 blk_queue_flag_clear(unsigned int flag
, struct request_queue
*q
)
55 queue_flag_clear(flag
, q
);
61 * The blk_queue_write_cache() interface has replaced blk_queue_flush()
62 * interface. However, the new interface is GPL-only thus we implement
63 * our own trivial wrapper when the GPL-only version is detected.
66 * The blk_queue_flush() interface has replaced blk_queue_ordered()
67 * interface. However, while the old interface was available to all the
68 * new one is GPL-only. Thus if the GPL-only version is detected we
69 * implement our own trivial helper.
72 blk_queue_set_write_cache(struct request_queue
*q
, bool wc
, bool fua
)
74 #if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
76 blk_queue_flag_set(QUEUE_FLAG_WC
, q
);
78 blk_queue_flag_clear(QUEUE_FLAG_WC
, q
);
80 blk_queue_flag_set(QUEUE_FLAG_FUA
, q
);
82 blk_queue_flag_clear(QUEUE_FLAG_FUA
, q
);
83 #elif defined(HAVE_BLK_QUEUE_WRITE_CACHE)
84 blk_queue_write_cache(q
, wc
, fua
);
85 #elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
87 q
->flush_flags
|= REQ_FLUSH
;
89 q
->flush_flags
|= REQ_FUA
;
90 #elif defined(HAVE_BLK_QUEUE_FLUSH)
91 blk_queue_flush(q
, (wc
? REQ_FLUSH
: 0) | (fua
? REQ_FUA
: 0));
93 #error "Unsupported kernel"
98 blk_queue_set_read_ahead(struct request_queue
*q
, unsigned long ra_pages
)
100 #if !defined(HAVE_BLK_QUEUE_UPDATE_READAHEAD) && \
101 !defined(HAVE_DISK_UPDATE_READAHEAD)
102 #ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
103 q
->backing_dev_info
->ra_pages
= ra_pages
;
105 q
->backing_dev_info
.ra_pages
= ra_pages
;
110 #ifdef HAVE_BIO_BVEC_ITER
111 #define BIO_BI_SECTOR(bio) (bio)->bi_iter.bi_sector
112 #define BIO_BI_SIZE(bio) (bio)->bi_iter.bi_size
113 #define BIO_BI_IDX(bio) (bio)->bi_iter.bi_idx
114 #define BIO_BI_SKIP(bio) (bio)->bi_iter.bi_bvec_done
115 #define bio_for_each_segment4(bv, bvp, b, i) \
116 bio_for_each_segment((bv), (b), (i))
117 typedef struct bvec_iter bvec_iterator_t
;
119 #define BIO_BI_SECTOR(bio) (bio)->bi_sector
120 #define BIO_BI_SIZE(bio) (bio)->bi_size
121 #define BIO_BI_IDX(bio) (bio)->bi_idx
122 #define BIO_BI_SKIP(bio) (0)
123 #define bio_for_each_segment4(bv, bvp, b, i) \
124 bio_for_each_segment((bvp), (b), (i))
125 typedef int bvec_iterator_t
;
129 bio_set_flags_failfast(struct block_device
*bdev
, int *flags
, bool dev
,
130 bool transport
, bool driver
)
134 * Disable FAILFAST for loopback devices because of the
135 * following incorrect BUG_ON() in loop_make_request().
136 * This support is also disabled for md devices because the
137 * test suite layers md devices on top of loopback devices.
138 * This may be removed when the loopback driver is fixed.
140 * BUG_ON(!lo || (rw != READ && rw != WRITE));
142 if ((MAJOR(bdev
->bd_dev
) == LOOP_MAJOR
) ||
143 (MAJOR(bdev
->bd_dev
) == MD_MAJOR
))
146 #ifdef BLOCK_EXT_MAJOR
147 if (MAJOR(bdev
->bd_dev
) == BLOCK_EXT_MAJOR
)
149 #endif /* BLOCK_EXT_MAJOR */
150 #endif /* CONFIG_BUG */
153 *flags
|= REQ_FAILFAST_DEV
;
155 *flags
|= REQ_FAILFAST_TRANSPORT
;
157 *flags
|= REQ_FAILFAST_DRIVER
;
161 * Maximum disk label length, it may be undefined for some kernels.
163 #if !defined(DISK_NAME_LEN)
164 #define DISK_NAME_LEN 32
165 #endif /* DISK_NAME_LEN */
167 #ifdef HAVE_BIO_BI_STATUS
169 bi_status_to_errno(blk_status_t status
)
174 case BLK_STS_NOTSUPP
:
176 case BLK_STS_TIMEOUT
:
180 case BLK_STS_TRANSPORT
:
188 case BLK_STS_PROTECTION
:
190 case BLK_STS_RESOURCE
:
201 static inline blk_status_t
202 errno_to_bi_status(int error
)
208 return (BLK_STS_NOTSUPP
);
210 return (BLK_STS_TIMEOUT
);
212 return (BLK_STS_NOSPC
);
214 return (BLK_STS_TRANSPORT
);
216 return (BLK_STS_TARGET
);
218 return (BLK_STS_NEXUS
);
220 return (BLK_STS_MEDIUM
);
222 return (BLK_STS_PROTECTION
);
224 return (BLK_STS_RESOURCE
);
226 return (BLK_STS_AGAIN
);
228 return (BLK_STS_IOERR
);
230 return (BLK_STS_IOERR
);
233 #endif /* HAVE_BIO_BI_STATUS */
237 * The bio_endio() prototype changed slightly. These are helper
238 * macro's to ensure the prototype and invocation are handled.
240 #ifdef HAVE_1ARG_BIO_END_IO_T
241 #ifdef HAVE_BIO_BI_STATUS
242 #define BIO_END_IO_ERROR(bio) bi_status_to_errno(bio->bi_status)
243 #define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x)
244 #define BIO_END_IO(bio, error) bio_set_bi_status(bio, error)
246 bio_set_bi_status(struct bio
*bio
, int error
)
248 ASSERT3S(error
, <=, 0);
249 bio
->bi_status
= errno_to_bi_status(-error
);
253 #define BIO_END_IO_ERROR(bio) (-(bio->bi_error))
254 #define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x)
255 #define BIO_END_IO(bio, error) bio_set_bi_error(bio, error)
257 bio_set_bi_error(struct bio
*bio
, int error
)
259 ASSERT3S(error
, <=, 0);
260 bio
->bi_error
= error
;
263 #endif /* HAVE_BIO_BI_STATUS */
266 #define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x, int z)
267 #define BIO_END_IO(bio, error) bio_endio(bio, error);
268 #endif /* HAVE_1ARG_BIO_END_IO_T */
274 * 2.6.36 - 5.14 MACRO,
277 * Check the disk status and return B_TRUE if alive
280 static inline boolean_t
281 zfs_check_disk_status(struct block_device
*bdev
)
283 #if defined(GENHD_FL_UP)
284 return (!!(bdev
->bd_disk
->flags
& GENHD_FL_UP
));
285 #elif defined(GD_DEAD)
286 return (!test_bit(GD_DEAD
, &bdev
->bd_disk
->state
));
289 * This is encountered if neither GENHD_FL_UP nor GD_DEAD is available in
290 * the kernel - likely due to an MACRO change that needs to be chased down.
292 #error "Unsupported kernel: no usable disk status check"
298 * 3.10.0 CentOS 7.x API,
299 * blkdev_reread_part()
301 * For older kernels trigger a re-reading of the partition table by calling
302 * check_disk_change() which calls flush_disk() to invalidate the device.
304 * For newer kernels (as of 5.10), bdev_check_media_change is used, in favor of
305 * check_disk_change(), with the modification that invalidation is no longer
308 #ifdef HAVE_CHECK_DISK_CHANGE
309 #define zfs_check_media_change(bdev) check_disk_change(bdev)
310 #ifdef HAVE_BLKDEV_REREAD_PART
311 #define vdev_bdev_reread_part(bdev) blkdev_reread_part(bdev)
313 #define vdev_bdev_reread_part(bdev) check_disk_change(bdev)
314 #endif /* HAVE_BLKDEV_REREAD_PART */
316 #ifdef HAVE_BDEV_CHECK_MEDIA_CHANGE
318 zfs_check_media_change(struct block_device
*bdev
)
320 #ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
321 struct gendisk
*gd
= bdev
->bd_disk
;
322 const struct block_device_operations
*bdo
= gd
->fops
;
325 if (!bdev_check_media_change(bdev
))
328 #ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
330 * Force revalidation, to mimic the old behavior of
331 * check_disk_change()
333 if (bdo
->revalidate_disk
)
334 bdo
->revalidate_disk(gd
);
339 #define vdev_bdev_reread_part(bdev) zfs_check_media_change(bdev)
342 * This is encountered if check_disk_change() and bdev_check_media_change()
343 * are not available in the kernel - likely due to an API change that needs
346 #error "Unsupported kernel: no usable disk change check"
347 #endif /* HAVE_BDEV_CHECK_MEDIA_CHANGE */
348 #endif /* HAVE_CHECK_DISK_CHANGE */
352 * The function was exported for use, prior to this it existed but the
353 * symbol was not exported.
355 * 4.4.0-6.21 API change for Ubuntu
356 * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions.
359 * Changed to take a dev_t argument which is set on success and return a
360 * non-zero error code on failure.
363 vdev_lookup_bdev(const char *path
, dev_t
*dev
)
365 #if defined(HAVE_DEVT_LOOKUP_BDEV)
366 return (lookup_bdev(path
, dev
));
367 #elif defined(HAVE_1ARG_LOOKUP_BDEV)
368 struct block_device
*bdev
= lookup_bdev(path
);
370 return (PTR_ERR(bdev
));
376 #elif defined(HAVE_MODE_LOOKUP_BDEV)
377 struct block_device
*bdev
= lookup_bdev(path
, FMODE_READ
);
379 return (PTR_ERR(bdev
));
386 #error "Unsupported kernel"
391 * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
393 #if !defined(HAVE_BIO_SET_OP_ATTRS)
395 bio_set_op_attrs(struct bio
*bio
, unsigned rw
, unsigned flags
)
397 #if defined(HAVE_BIO_BI_OPF)
398 bio
->bi_opf
= rw
| flags
;
400 bio
->bi_rw
|= rw
| flags
;
401 #endif /* HAVE_BIO_BI_OPF */
406 * bio_set_flush - Set the appropriate flags in a bio to guarantee
407 * data are on non-volatile media on completion.
410 * Introduce WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a
411 * replacement for WRITE_BARRIER to allow expressing richer semantics
412 * to the block layer. It's up to the block layer to implement the
413 * semantics correctly. Use the WRITE_FLUSH_FUA flag combination.
416 * REQ_FLUSH was renamed to REQ_PREFLUSH. For consistency with previous
417 * OpenZFS releases, prefer the WRITE_FLUSH_FUA flag set if it's available.
420 * The read/write flags and their modifiers, including WRITE_FLUSH,
421 * WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in
422 * torvalds/linux@70fd7614 and replaced by direct flag modification
423 * of the REQ_ flags in bio->bi_opf. Use REQ_PREFLUSH.
426 bio_set_flush(struct bio
*bio
)
428 #if defined(HAVE_REQ_PREFLUSH) /* >= 4.10 */
429 bio_set_op_attrs(bio
, 0, REQ_PREFLUSH
| REQ_OP_WRITE
);
430 #elif defined(WRITE_FLUSH_FUA) /* >= 2.6.37 and <= 4.9 */
431 bio_set_op_attrs(bio
, 0, WRITE_FLUSH_FUA
);
433 #error "Allowing the build will cause bio_set_flush requests to be ignored."
447 * in all cases but may have a performance impact for some kernels. It
448 * has the advantage of minimizing kernel specific changes in the zvol code.
451 static inline boolean_t
452 bio_is_flush(struct bio
*bio
)
454 #if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF)
455 return ((bio_op(bio
) == REQ_OP_FLUSH
) || (bio
->bi_opf
& REQ_PREFLUSH
));
456 #elif defined(HAVE_REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF)
457 return (bio
->bi_opf
& REQ_PREFLUSH
);
458 #elif defined(HAVE_REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF)
459 return (bio
->bi_rw
& REQ_PREFLUSH
);
460 #elif defined(HAVE_REQ_FLUSH)
461 return (bio
->bi_rw
& REQ_FLUSH
);
463 #error "Unsupported kernel"
469 * REQ_FUA flag moved to bio->bi_opf
474 static inline boolean_t
475 bio_is_fua(struct bio
*bio
)
477 #if defined(HAVE_BIO_BI_OPF)
478 return (bio
->bi_opf
& REQ_FUA
);
479 #elif defined(REQ_FUA)
480 return (bio
->bi_rw
& REQ_FUA
);
482 #error "Allowing the build will cause fua requests to be ignored."
493 * In all cases the normal I/O path is used for discards. The only
494 * difference is how the kernel tags individual I/Os as discards.
496 static inline boolean_t
497 bio_is_discard(struct bio
*bio
)
499 #if defined(HAVE_REQ_OP_DISCARD)
500 return (bio_op(bio
) == REQ_OP_DISCARD
);
501 #elif defined(HAVE_REQ_DISCARD)
502 return (bio
->bi_rw
& REQ_DISCARD
);
504 #error "Unsupported kernel"
510 * REQ_OP_SECURE_ERASE
515 static inline boolean_t
516 bio_is_secure_erase(struct bio
*bio
)
518 #if defined(HAVE_REQ_OP_SECURE_ERASE)
519 return (bio_op(bio
) == REQ_OP_SECURE_ERASE
);
520 #elif defined(REQ_SECURE)
521 return (bio
->bi_rw
& REQ_SECURE
);
529 * Discard granularity and alignment restrictions may now be set. For
530 * older kernels which do not support this it is safe to skip it.
533 blk_queue_discard_granularity(struct request_queue
*q
, unsigned int dg
)
535 q
->limits
.discard_granularity
= dg
;
540 * bdev_max_discard_sectors()
543 * blk_queue_discard()
545 static inline boolean_t
546 bdev_discard_supported(struct block_device
*bdev
)
548 #if defined(HAVE_BDEV_MAX_DISCARD_SECTORS)
549 return (!!bdev_max_discard_sectors(bdev
));
550 #elif defined(HAVE_BLK_QUEUE_DISCARD)
551 return (!!blk_queue_discard(bdev_get_queue(bdev
)));
553 #error "Unsupported kernel"
559 * bdev_max_secure_erase_sectors()
562 * blk_queue_secure_erase()
565 * blk_queue_secdiscard()
567 static inline boolean_t
568 bdev_secure_discard_supported(struct block_device
*bdev
)
570 #if defined(HAVE_BDEV_MAX_SECURE_ERASE_SECTORS)
571 return (!!bdev_max_secure_erase_sectors(bdev
));
572 #elif defined(HAVE_BLK_QUEUE_SECURE_ERASE)
573 return (!!blk_queue_secure_erase(bdev_get_queue(bdev
)));
574 #elif defined(HAVE_BLK_QUEUE_SECDISCARD)
575 return (!!blk_queue_secdiscard(bdev_get_queue(bdev
)));
577 #error "Unsupported kernel"
582 * A common holder for vdev_bdev_open() is used to relax the exclusive open
583 * semantics slightly. Internal vdev disk callers may pass VDEV_HOLDER to
584 * allow them to open the device multiple times. Other kernel callers and
585 * user space processes which don't pass this value will get EBUSY. This is
586 * currently required for the correct operation of hot spares.
588 #define VDEV_HOLDER ((void *)0x2401de7)
590 static inline unsigned long
591 blk_generic_start_io_acct(struct request_queue
*q
__attribute__((unused
)),
592 struct gendisk
*disk
__attribute__((unused
)),
593 int rw
__attribute__((unused
)), struct bio
*bio
)
595 #if defined(HAVE_BDEV_IO_ACCT_63)
596 return (bdev_start_io_acct(bio
->bi_bdev
, bio_op(bio
),
598 #elif defined(HAVE_BDEV_IO_ACCT_OLD)
599 return (bdev_start_io_acct(bio
->bi_bdev
, bio_sectors(bio
),
600 bio_op(bio
), jiffies
));
601 #elif defined(HAVE_DISK_IO_ACCT)
602 return (disk_start_io_acct(disk
, bio_sectors(bio
), bio_op(bio
)));
603 #elif defined(HAVE_BIO_IO_ACCT)
604 return (bio_start_io_acct(bio
));
605 #elif defined(HAVE_GENERIC_IO_ACCT_3ARG)
606 unsigned long start_time
= jiffies
;
607 generic_start_io_acct(rw
, bio_sectors(bio
), &disk
->part0
);
609 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
610 unsigned long start_time
= jiffies
;
611 generic_start_io_acct(q
, rw
, bio_sectors(bio
), &disk
->part0
);
620 blk_generic_end_io_acct(struct request_queue
*q
__attribute__((unused
)),
621 struct gendisk
*disk
__attribute__((unused
)),
622 int rw
__attribute__((unused
)), struct bio
*bio
, unsigned long start_time
)
624 #if defined(HAVE_BDEV_IO_ACCT_63)
625 bdev_end_io_acct(bio
->bi_bdev
, bio_op(bio
), bio_sectors(bio
),
627 #elif defined(HAVE_BDEV_IO_ACCT_OLD)
628 bdev_end_io_acct(bio
->bi_bdev
, bio_op(bio
), start_time
);
629 #elif defined(HAVE_DISK_IO_ACCT)
630 disk_end_io_acct(disk
, bio_op(bio
), start_time
);
631 #elif defined(HAVE_BIO_IO_ACCT)
632 bio_end_io_acct(bio
, start_time
);
633 #elif defined(HAVE_GENERIC_IO_ACCT_3ARG)
634 generic_end_io_acct(rw
, &disk
->part0
, start_time
);
635 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
636 generic_end_io_acct(q
, rw
, &disk
->part0
, start_time
);
640 #ifndef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
641 static inline struct request_queue
*
642 blk_generic_alloc_queue(make_request_fn make_request
, int node_id
)
644 #if defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN)
645 return (blk_alloc_queue(make_request
, node_id
));
646 #elif defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH)
647 return (blk_alloc_queue_rh(make_request
, node_id
));
649 struct request_queue
*q
= blk_alloc_queue(GFP_KERNEL
);
651 blk_queue_make_request(q
, make_request
);
656 #endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
659 * All the io_*() helper functions below can operate on a bio, or a rq, but
660 * not both. The older submit_bio() codepath will pass a bio, and the
661 * newer blk-mq codepath will pass a rq.
664 io_data_dir(struct bio
*bio
, struct request
*rq
)
668 if (op_is_write(req_op(rq
))) {
675 ASSERT3P(rq
, ==, NULL
);
677 return (bio_data_dir(bio
));
681 io_is_flush(struct bio
*bio
, struct request
*rq
)
685 return (req_op(rq
) == REQ_OP_FLUSH
);
687 ASSERT3P(rq
, ==, NULL
);
689 return (bio_is_flush(bio
));
693 io_is_discard(struct bio
*bio
, struct request
*rq
)
697 return (req_op(rq
) == REQ_OP_DISCARD
);
699 ASSERT3P(rq
, ==, NULL
);
701 return (bio_is_discard(bio
));
705 io_is_secure_erase(struct bio
*bio
, struct request
*rq
)
709 return (req_op(rq
) == REQ_OP_SECURE_ERASE
);
711 ASSERT3P(rq
, ==, NULL
);
713 return (bio_is_secure_erase(bio
));
717 io_is_fua(struct bio
*bio
, struct request
*rq
)
721 return (rq
->cmd_flags
& REQ_FUA
);
723 ASSERT3P(rq
, ==, NULL
);
725 return (bio_is_fua(bio
));
729 static inline uint64_t
730 io_offset(struct bio
*bio
, struct request
*rq
)
734 return (blk_rq_pos(rq
) << 9);
736 ASSERT3P(rq
, ==, NULL
);
738 return (BIO_BI_SECTOR(bio
) << 9);
741 static inline uint64_t
742 io_size(struct bio
*bio
, struct request
*rq
)
746 return (blk_rq_bytes(rq
));
748 ASSERT3P(rq
, ==, NULL
);
750 return (BIO_BI_SIZE(bio
));
754 io_has_data(struct bio
*bio
, struct request
*rq
)
758 return (bio_has_data(rq
->bio
));
760 ASSERT3P(rq
, ==, NULL
);
762 return (bio_has_data(bio
));
764 #endif /* _ZFS_BLKDEV_H */