4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
24 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
25 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
32 #include <linux/blkdev.h>
33 #include <linux/elevator.h>
34 #include <linux/backing-dev.h>
35 #include <linux/hdreg.h>
38 typedef unsigned __bitwise__ fmode_t
;
39 #endif /* HAVE_FMODE_T */
41 #ifndef HAVE_BLK_QUEUE_FLAG_SET
43 blk_queue_flag_set(unsigned int flag
, struct request_queue
*q
)
45 queue_flag_set(flag
, q
);
49 #ifndef HAVE_BLK_QUEUE_FLAG_CLEAR
51 blk_queue_flag_clear(unsigned int flag
, struct request_queue
*q
)
53 queue_flag_clear(flag
, q
);
59 * The blk_queue_write_cache() interface has replaced blk_queue_flush()
60 * interface. However, the new interface is GPL-only thus we implement
61 * our own trivial wrapper when the GPL-only version is detected.
64 * The blk_queue_flush() interface has replaced blk_queue_ordered()
65 * interface. However, while the old interface was available to all the
66 * new one is GPL-only. Thus if the GPL-only version is detected we
67 * implement our own trivial helper.
70 * Legacy blk_queue_ordered() interface.
73 blk_queue_set_write_cache(struct request_queue
*q
, bool wc
, bool fua
)
75 #if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
77 blk_queue_flag_set(QUEUE_FLAG_WC
, q
);
79 blk_queue_flag_clear(QUEUE_FLAG_WC
, q
);
81 blk_queue_flag_set(QUEUE_FLAG_FUA
, q
);
83 blk_queue_flag_clear(QUEUE_FLAG_FUA
, q
);
84 #elif defined(HAVE_BLK_QUEUE_WRITE_CACHE)
85 blk_queue_write_cache(q
, wc
, fua
);
86 #elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
88 q
->flush_flags
|= REQ_FLUSH
;
90 q
->flush_flags
|= REQ_FUA
;
91 #elif defined(HAVE_BLK_QUEUE_FLUSH)
92 blk_queue_flush(q
, (wc
? REQ_FLUSH
: 0) | (fua
? REQ_FUA
: 0));
94 blk_queue_ordered(q
, QUEUE_ORDERED_DRAIN
, NULL
);
99 * Most of the blk_* macros were removed in 2.6.36. Ostensibly this was
100 * done to improve readability and allow easier grepping. However, from
101 * a portability stand point the macros are helpful. Therefore the needed
102 * macros are redefined here if they are missing from the kernel.
104 #ifndef blk_fs_request
105 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
110 * The blk_queue_stackable() queue flag was added in 2.6.27 to handle dm
111 * stacking drivers. Prior to this request stacking drivers were detected
112 * by checking (q->request_fn == NULL), for earlier kernels we revert to
113 * this legacy behavior.
115 #ifndef blk_queue_stackable
116 #define blk_queue_stackable(q) ((q)->request_fn == NULL)
121 * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors().
123 #ifndef HAVE_BLK_QUEUE_MAX_HW_SECTORS
124 #define blk_queue_max_hw_sectors __blk_queue_max_hw_sectors
126 __blk_queue_max_hw_sectors(struct request_queue
*q
, unsigned int max_hw_sectors
)
128 blk_queue_max_sectors(q
, max_hw_sectors
);
134 * The blk_queue_max_segments() function consolidates
135 * blk_queue_max_hw_segments() and blk_queue_max_phys_segments().
137 #ifndef HAVE_BLK_QUEUE_MAX_SEGMENTS
138 #define blk_queue_max_segments __blk_queue_max_segments
140 __blk_queue_max_segments(struct request_queue
*q
, unsigned short max_segments
)
142 blk_queue_max_phys_segments(q
, max_segments
);
143 blk_queue_max_hw_segments(q
, max_segments
);
148 blk_queue_set_read_ahead(struct request_queue
*q
, unsigned long ra_pages
)
150 #ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
151 q
->backing_dev_info
->ra_pages
= ra_pages
;
153 q
->backing_dev_info
.ra_pages
= ra_pages
;
157 #ifndef HAVE_GET_DISK_AND_MODULE
158 static inline struct kobject
*
159 get_disk_and_module(struct gendisk
*disk
)
161 return (get_disk(disk
));
165 #ifndef HAVE_GET_DISK_RO
167 get_disk_ro(struct gendisk
*disk
)
172 policy
= disk
->part
[0]->policy
;
176 #endif /* HAVE_GET_DISK_RO */
178 #ifdef HAVE_BIO_BVEC_ITER
179 #define BIO_BI_SECTOR(bio) (bio)->bi_iter.bi_sector
180 #define BIO_BI_SIZE(bio) (bio)->bi_iter.bi_size
181 #define BIO_BI_IDX(bio) (bio)->bi_iter.bi_idx
182 #define BIO_BI_SKIP(bio) (bio)->bi_iter.bi_bvec_done
183 #define bio_for_each_segment4(bv, bvp, b, i) \
184 bio_for_each_segment((bv), (b), (i))
185 typedef struct bvec_iter bvec_iterator_t
;
187 #define BIO_BI_SECTOR(bio) (bio)->bi_sector
188 #define BIO_BI_SIZE(bio) (bio)->bi_size
189 #define BIO_BI_IDX(bio) (bio)->bi_idx
190 #define BIO_BI_SKIP(bio) (0)
191 #define bio_for_each_segment4(bv, bvp, b, i) \
192 bio_for_each_segment((bvp), (b), (i))
193 typedef int bvec_iterator_t
;
197 * Portable helper for correctly setting the FAILFAST flags. The
198 * correct usage has changed 3 times from 2.6.12 to 2.6.38.
201 bio_set_flags_failfast(struct block_device
*bdev
, int *flags
)
205 * Disable FAILFAST for loopback devices because of the
206 * following incorrect BUG_ON() in loop_make_request().
207 * This support is also disabled for md devices because the
208 * test suite layers md devices on top of loopback devices.
209 * This may be removed when the loopback driver is fixed.
211 * BUG_ON(!lo || (rw != READ && rw != WRITE));
213 if ((MAJOR(bdev
->bd_dev
) == LOOP_MAJOR
) ||
214 (MAJOR(bdev
->bd_dev
) == MD_MAJOR
))
217 #ifdef BLOCK_EXT_MAJOR
218 if (MAJOR(bdev
->bd_dev
) == BLOCK_EXT_MAJOR
)
220 #endif /* BLOCK_EXT_MAJOR */
221 #endif /* CONFIG_BUG */
223 #if defined(HAVE_BIO_RW_FAILFAST_DTD)
224 /* BIO_RW_FAILFAST_* preferred interface from 2.6.28 - 2.6.35 */
226 (1 << BIO_RW_FAILFAST_DEV
) |
227 (1 << BIO_RW_FAILFAST_TRANSPORT
) |
228 (1 << BIO_RW_FAILFAST_DRIVER
));
229 #elif defined(HAVE_REQ_FAILFAST_MASK)
231 * REQ_FAILFAST_* preferred interface from 2.6.36 - 2.6.xx,
232 * the BIO_* and REQ_* flags were unified under REQ_* flags.
234 *flags
|= REQ_FAILFAST_MASK
;
236 #error "Undefined block IO FAILFAST interface."
241 * Maximum disk label length, it may be undefined for some kernels.
243 #ifndef DISK_NAME_LEN
244 #define DISK_NAME_LEN 32
245 #endif /* DISK_NAME_LEN */
247 #ifdef HAVE_BIO_BI_STATUS
249 bi_status_to_errno(blk_status_t status
)
254 case BLK_STS_NOTSUPP
:
256 case BLK_STS_TIMEOUT
:
260 case BLK_STS_TRANSPORT
:
268 case BLK_STS_PROTECTION
:
270 case BLK_STS_RESOURCE
:
281 static inline blk_status_t
282 errno_to_bi_status(int error
)
288 return (BLK_STS_NOTSUPP
);
290 return (BLK_STS_TIMEOUT
);
292 return (BLK_STS_NOSPC
);
294 return (BLK_STS_TRANSPORT
);
296 return (BLK_STS_TARGET
);
298 return (BLK_STS_NEXUS
);
300 return (BLK_STS_MEDIUM
);
302 return (BLK_STS_PROTECTION
);
304 return (BLK_STS_RESOURCE
);
306 return (BLK_STS_AGAIN
);
308 return (BLK_STS_IOERR
);
310 return (BLK_STS_IOERR
);
313 #endif /* HAVE_BIO_BI_STATUS */
317 * The bio_endio() prototype changed slightly. These are helper
318 * macro's to ensure the prototype and invocation are handled.
320 #ifdef HAVE_1ARG_BIO_END_IO_T
321 #ifdef HAVE_BIO_BI_STATUS
322 #define BIO_END_IO_ERROR(bio) bi_status_to_errno(bio->bi_status)
323 #define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x)
324 #define BIO_END_IO(bio, error) bio_set_bi_status(bio, error)
326 bio_set_bi_status(struct bio
*bio
, int error
)
328 ASSERT3S(error
, <=, 0);
329 bio
->bi_status
= errno_to_bi_status(-error
);
333 #define BIO_END_IO_ERROR(bio) (-(bio->bi_error))
334 #define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x)
335 #define BIO_END_IO(bio, error) bio_set_bi_error(bio, error)
337 bio_set_bi_error(struct bio
*bio
, int error
)
339 ASSERT3S(error
, <=, 0);
340 bio
->bi_error
= error
;
343 #endif /* HAVE_BIO_BI_STATUS */
346 #define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x, int z)
347 #define BIO_END_IO(bio, error) bio_endio(bio, error);
348 #endif /* HAVE_1ARG_BIO_END_IO_T */
351 * 2.6.38 - 2.6.x API,
352 * blkdev_get_by_path()
355 * 2.6.28 - 2.6.37 API,
356 * open_bdev_exclusive()
357 * close_bdev_exclusive()
359 * 2.6.12 - 2.6.27 API,
363 * Used to exclusively open a block device from within the kernel.
365 #if defined(HAVE_BLKDEV_GET_BY_PATH)
366 #define vdev_bdev_open(path, md, hld) blkdev_get_by_path(path, \
367 (md) | FMODE_EXCL, hld)
368 #define vdev_bdev_close(bdev, md) blkdev_put(bdev, (md) | FMODE_EXCL)
369 #elif defined(HAVE_OPEN_BDEV_EXCLUSIVE)
370 #define vdev_bdev_open(path, md, hld) open_bdev_exclusive(path, md, hld)
371 #define vdev_bdev_close(bdev, md) close_bdev_exclusive(bdev, md)
373 #define vdev_bdev_open(path, md, hld) open_bdev_excl(path, md, hld)
374 #define vdev_bdev_close(bdev, md) close_bdev_excl(bdev)
375 #endif /* HAVE_BLKDEV_GET_BY_PATH | HAVE_OPEN_BDEV_EXCLUSIVE */
379 * The function invalidate_bdev() lost it's second argument because
382 #ifdef HAVE_1ARG_INVALIDATE_BDEV
383 #define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev)
385 #define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev, 1)
386 #endif /* HAVE_1ARG_INVALIDATE_BDEV */
390 * The function was exported for use, prior to this it existed but the
391 * symbol was not exported.
393 * 4.4.0-6.21 API change for Ubuntu
394 * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions.
396 #ifdef HAVE_1ARG_LOOKUP_BDEV
397 #define vdev_lookup_bdev(path) lookup_bdev(path)
399 #ifdef HAVE_2ARGS_LOOKUP_BDEV
400 #define vdev_lookup_bdev(path) lookup_bdev(path, 0)
402 #define vdev_lookup_bdev(path) ERR_PTR(-ENOTSUP)
403 #endif /* HAVE_2ARGS_LOOKUP_BDEV */
404 #endif /* HAVE_1ARG_LOOKUP_BDEV */
408 * To ensure good performance preferentially use the physical block size
409 * for proper alignment. The physical size is supposed to be the internal
410 * sector size used by the device. This is often 4096 byte for AF devices,
411 * while a smaller 512 byte logical size is supported for compatibility.
413 * Unfortunately, many drives still misreport their physical sector size.
414 * For devices which are known to lie you may need to manually set this
415 * at pool creation time with 'zpool create -o ashift=12 ...'.
417 * When the physical block size interface isn't available, we fall back to
418 * the logical block size interface and then the older hard sector size.
420 #ifdef HAVE_BDEV_PHYSICAL_BLOCK_SIZE
421 #define vdev_bdev_block_size(bdev) bdev_physical_block_size(bdev)
423 #ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE
424 #define vdev_bdev_block_size(bdev) bdev_logical_block_size(bdev)
426 #define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev)
427 #endif /* HAVE_BDEV_LOGICAL_BLOCK_SIZE */
428 #endif /* HAVE_BDEV_PHYSICAL_BLOCK_SIZE */
430 #ifndef HAVE_BIO_SET_OP_ATTRS
432 * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
435 bio_set_op_attrs(struct bio
*bio
, unsigned rw
, unsigned flags
)
437 bio
->bi_rw
|= rw
| flags
;
442 * bio_set_flush - Set the appropriate flags in a bio to guarantee
443 * data are on non-volatile media on completion.
445 * 2.6.X - 2.6.36 API,
446 * WRITE_BARRIER - Tells the block layer to commit all previously submitted
447 * writes to stable storage before this one is started and that the current
448 * write is on stable storage upon completion. Also prevents reordering
449 * on both sides of the current operation.
452 * Introduce WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a
453 * replacement for WRITE_BARRIER to allow expressing richer semantics
454 * to the block layer. It's up to the block layer to implement the
455 * semantics correctly. Use the WRITE_FLUSH_FUA flag combination.
458 * REQ_FLUSH was renamed to REQ_PREFLUSH. For consistency with previous
459 * ZoL releases, prefer the WRITE_FLUSH_FUA flag set if it's available.
462 * The read/write flags and their modifiers, including WRITE_FLUSH,
463 * WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in
464 * torvalds/linux@70fd7614 and replaced by direct flag modification
465 * of the REQ_ flags in bio->bi_opf. Use REQ_PREFLUSH.
468 bio_set_flush(struct bio
*bio
)
470 #if defined(REQ_PREFLUSH) /* >= 4.10 */
471 bio_set_op_attrs(bio
, 0, REQ_PREFLUSH
);
472 #elif defined(WRITE_FLUSH_FUA) /* >= 2.6.37 and <= 4.9 */
473 bio_set_op_attrs(bio
, 0, WRITE_FLUSH_FUA
);
474 #elif defined(WRITE_BARRIER) /* < 2.6.37 */
475 bio_set_op_attrs(bio
, 0, WRITE_BARRIER
);
477 #error "Allowing the build will cause bio_set_flush requests to be ignored."
491 * 2.6.x - 2.6.35 API,
492 * HAVE_BIO_RW_BARRIER
494 * Used to determine if a cache flush has been requested. This check has
495 * been left intentionally broad in order to cover both a legacy flush
496 * and the new preflush behavior introduced in Linux 4.8. This is correct
497 * in all cases but may have a performance impact for some kernels. It
498 * has the advantage of minimizing kernel specific changes in the zvol code.
501 static inline boolean_t
502 bio_is_flush(struct bio
*bio
)
504 #if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF)
505 return ((bio_op(bio
) == REQ_OP_FLUSH
) || (bio
->bi_opf
& REQ_PREFLUSH
));
506 #elif defined(REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF)
507 return (bio
->bi_opf
& REQ_PREFLUSH
);
508 #elif defined(REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF)
509 return (bio
->bi_rw
& REQ_PREFLUSH
);
510 #elif defined(REQ_FLUSH)
511 return (bio
->bi_rw
& REQ_FLUSH
);
512 #elif defined(HAVE_BIO_RW_BARRIER)
513 return (bio
->bi_rw
& (1 << BIO_RW_BARRIER
));
515 #error "Allowing the build will cause flush requests to be ignored."
521 * REQ_FUA flag moved to bio->bi_opf
526 static inline boolean_t
527 bio_is_fua(struct bio
*bio
)
529 #if defined(HAVE_BIO_BI_OPF)
530 return (bio
->bi_opf
& REQ_FUA
);
531 #elif defined(REQ_FUA)
532 return (bio
->bi_rw
& REQ_FUA
);
534 #error "Allowing the build will cause fua requests to be ignored."
545 * 2.6.28 - 2.6.35 API,
548 * In all cases the normal I/O path is used for discards. The only
549 * difference is how the kernel tags individual I/Os as discards.
551 * Note that 2.6.32 era kernels provide both BIO_RW_DISCARD and REQ_DISCARD,
552 * where BIO_RW_DISCARD is the correct interface. Therefore, it is important
553 * that the HAVE_BIO_RW_DISCARD check occur before the REQ_DISCARD check.
555 static inline boolean_t
556 bio_is_discard(struct bio
*bio
)
558 #if defined(HAVE_REQ_OP_DISCARD)
559 return (bio_op(bio
) == REQ_OP_DISCARD
);
560 #elif defined(HAVE_BIO_RW_DISCARD)
561 return (bio
->bi_rw
& (1 << BIO_RW_DISCARD
));
562 #elif defined(REQ_DISCARD)
563 return (bio
->bi_rw
& REQ_DISCARD
);
565 /* potentially triggering the DMU_MAX_ACCESS assertion. */
566 #error "Allowing the build will cause discard requests to become writes."
572 * REQ_OP_SECURE_ERASE
577 * 2.6.x - 2.6.35 API,
578 * Unsupported by kernel
580 static inline boolean_t
581 bio_is_secure_erase(struct bio
*bio
)
583 #if defined(HAVE_REQ_OP_SECURE_ERASE)
584 return (bio_op(bio
) == REQ_OP_SECURE_ERASE
);
585 #elif defined(REQ_SECURE)
586 return (bio
->bi_rw
& REQ_SECURE
);
594 * Discard granularity and alignment restrictions may now be set. For
595 * older kernels which do not support this it is safe to skip it.
597 #ifdef HAVE_DISCARD_GRANULARITY
599 blk_queue_discard_granularity(struct request_queue
*q
, unsigned int dg
)
601 q
->limits
.discard_granularity
= dg
;
604 #define blk_queue_discard_granularity(x, dg) ((void)0)
605 #endif /* HAVE_DISCARD_GRANULARITY */
608 * Default Linux IO Scheduler,
609 * Setting the scheduler to noop will allow the Linux IO scheduler to
610 * still perform front and back merging, while leaving the request
611 * ordering and prioritization to the ZFS IO scheduler.
613 #define VDEV_SCHEDULER "noop"
616 * A common holder for vdev_bdev_open() is used to relax the exclusive open
617 * semantics slightly. Internal vdev disk callers may pass VDEV_HOLDER to
618 * allow them to open the device multiple times. Other kernel callers and
619 * user space processes which don't pass this value will get EBUSY. This is
620 * currently required for the correct operation of hot spares.
622 #define VDEV_HOLDER ((void *)0x2401de7)
625 blk_generic_start_io_acct(struct request_queue
*q
, int rw
,
626 unsigned long sectors
, struct hd_struct
*part
)
628 #if defined(HAVE_GENERIC_IO_ACCT_3ARG)
629 generic_start_io_acct(rw
, sectors
, part
);
630 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
631 generic_start_io_acct(q
, rw
, sectors
, part
);
636 blk_generic_end_io_acct(struct request_queue
*q
, int rw
,
637 struct hd_struct
*part
, unsigned long start_time
)
639 #if defined(HAVE_GENERIC_IO_ACCT_3ARG)
640 generic_end_io_acct(rw
, part
, start_time
);
641 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
642 generic_end_io_acct(q
, rw
, part
, start_time
);
646 #endif /* _ZFS_BLKDEV_H */