]> git.proxmox.com Git - mirror_zfs.git/blame - include/linux/blkdev_compat.h
OpenZFS 9337 - zfs get all is slow due to uncached metadata
[mirror_zfs.git] / include / linux / blkdev_compat.h
CommitLineData
60101509
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
61e90960 21
60101509 22/*
61e90960 23 * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
60101509
BB
24 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
25 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
26 * LLNL-CODE-403049.
27 */
28
61e90960 29#ifndef _ZFS_BLKDEV_H
d1d7e268 30#define _ZFS_BLKDEV_H
60101509
BB
31
32#include <linux/blkdev.h>
33#include <linux/elevator.h>
bc17f104 34#include <linux/backing-dev.h>
93ce2b4c 35#include <linux/hdreg.h>
7b98f0d9 36#include <linux/msdos_fs.h> /* for SECTOR_* */
60101509
BB
37
38#ifndef HAVE_FMODE_T
39typedef unsigned __bitwise__ fmode_t;
40#endif /* HAVE_FMODE_T */
41
10f88c5c
GDN
42#ifndef HAVE_BLK_QUEUE_FLAG_SET
43static inline void
44blk_queue_flag_set(unsigned int flag, struct request_queue *q)
45{
d6bb2217 46 queue_flag_set(flag, q);
10f88c5c
GDN
47}
48#endif
49
50#ifndef HAVE_BLK_QUEUE_FLAG_CLEAR
51static inline void
52blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
53{
d6bb2217 54 queue_flag_clear(flag, q);
10f88c5c
GDN
55}
56#endif
57
b18019d2 58/*
cf41432c
BB
59 * 4.7 - 4.x API,
60 * The blk_queue_write_cache() interface has replaced blk_queue_flush()
61 * interface. However, the new interface is GPL-only thus we implement
62 * our own trivial wrapper when the GPL-only version is detected.
63 *
64 * 2.6.36 - 4.6 API,
b18019d2
ED
65 * The blk_queue_flush() interface has replaced blk_queue_ordered()
66 * interface. However, while the old interface was available to all the
67 * new one is GPL-only. Thus if the GPL-only version is detected we
cf41432c
BB
68 * implement our own trivial helper.
69 *
70 * 2.6.x - 2.6.35
71 * Legacy blk_queue_ordered() interface.
68e8f59a 72 */
68e8f59a 73static inline void
cf41432c 74blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
68e8f59a 75{
cf41432c 76#if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
68e8f59a 77 if (wc)
d6bb2217 78 blk_queue_flag_set(QUEUE_FLAG_WC, q);
68e8f59a 79 else
d6bb2217 80 blk_queue_flag_clear(QUEUE_FLAG_WC, q);
68e8f59a 81 if (fua)
d6bb2217 82 blk_queue_flag_set(QUEUE_FLAG_FUA, q);
68e8f59a 83 else
d6bb2217 84 blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
cf41432c
BB
85#elif defined(HAVE_BLK_QUEUE_WRITE_CACHE)
86 blk_queue_write_cache(q, wc, fua);
87#elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
88 if (wc)
89 q->flush_flags |= REQ_FLUSH;
90 if (fua)
91 q->flush_flags |= REQ_FUA;
92#elif defined(HAVE_BLK_QUEUE_FLUSH)
93 blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0));
94#else
95 blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL);
68e8f59a 96#endif
cf41432c 97}
68e8f59a 98
8326eb46
BB
99/*
100 * Most of the blk_* macros were removed in 2.6.36. Ostensibly this was
101 * done to improve readability and allow easier grepping. However, from
102 * a portability stand point the macros are helpful. Therefore the needed
103 * macros are redefined here if they are missing from the kernel.
104 */
105#ifndef blk_fs_request
d1d7e268 106#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
8326eb46
BB
107#endif
108
34037afe
ED
109/*
110 * 2.6.34 API change,
111 * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors().
112 */
113#ifndef HAVE_BLK_QUEUE_MAX_HW_SECTORS
d1d7e268 114#define blk_queue_max_hw_sectors __blk_queue_max_hw_sectors
34037afe
ED
115static inline void
116__blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
117{
118 blk_queue_max_sectors(q, max_hw_sectors);
119}
120#endif
121
122/*
123 * 2.6.34 API change,
124 * The blk_queue_max_segments() function consolidates
125 * blk_queue_max_hw_segments() and blk_queue_max_phys_segments().
126 */
127#ifndef HAVE_BLK_QUEUE_MAX_SEGMENTS
d1d7e268 128#define blk_queue_max_segments __blk_queue_max_segments
34037afe
ED
129static inline void
130__blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
131{
132 blk_queue_max_phys_segments(q, max_segments);
133 blk_queue_max_hw_segments(q, max_segments);
134}
135#endif
136
bc17f104
RY
137static inline void
138blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
139{
140#ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
141 q->backing_dev_info->ra_pages = ra_pages;
142#else
143 q->backing_dev_info.ra_pages = ra_pages;
144#endif
145}
146
dd3e1e30
GDN
147#ifndef HAVE_GET_DISK_AND_MODULE
148static inline struct kobject *
149get_disk_and_module(struct gendisk *disk)
150{
151 return (get_disk(disk));
152}
153#endif
154
60101509
BB
155#ifndef HAVE_GET_DISK_RO
156static inline int
157get_disk_ro(struct gendisk *disk)
158{
159 int policy = 0;
160
161 if (disk->part[0])
162 policy = disk->part[0]->policy;
163
d1d7e268 164 return (policy);
60101509
BB
165}
166#endif /* HAVE_GET_DISK_RO */
167
d4541210
CC
168#ifdef HAVE_BIO_BVEC_ITER
169#define BIO_BI_SECTOR(bio) (bio)->bi_iter.bi_sector
170#define BIO_BI_SIZE(bio) (bio)->bi_iter.bi_size
171#define BIO_BI_IDX(bio) (bio)->bi_iter.bi_idx
2727b9d3 172#define BIO_BI_SKIP(bio) (bio)->bi_iter.bi_bvec_done
37f9dac5
RY
173#define bio_for_each_segment4(bv, bvp, b, i) \
174 bio_for_each_segment((bv), (b), (i))
175typedef struct bvec_iter bvec_iterator_t;
d4541210
CC
176#else
177#define BIO_BI_SECTOR(bio) (bio)->bi_sector
178#define BIO_BI_SIZE(bio) (bio)->bi_size
179#define BIO_BI_IDX(bio) (bio)->bi_idx
2727b9d3 180#define BIO_BI_SKIP(bio) (0)
37f9dac5
RY
181#define bio_for_each_segment4(bv, bvp, b, i) \
182 bio_for_each_segment((bvp), (b), (i))
183typedef int bvec_iterator_t;
d4541210
CC
184#endif
185
61e90960
BB
186/*
187 * Portable helper for correctly setting the FAILFAST flags. The
188 * correct usage has changed 3 times from 2.6.12 to 2.6.38.
189 */
2959d94a
BB
190static inline void
191bio_set_flags_failfast(struct block_device *bdev, int *flags)
192{
f4af6bb7 193#ifdef CONFIG_BUG
2959d94a 194 /*
f4af6bb7
BB
195 * Disable FAILFAST for loopback devices because of the
196 * following incorrect BUG_ON() in loop_make_request().
2959d94a
BB
197 * This support is also disabled for md devices because the
198 * test suite layers md devices on top of loopback devices.
199 * This may be removed when the loopback driver is fixed.
200 *
201 * BUG_ON(!lo || (rw != READ && rw != WRITE));
202 */
2959d94a
BB
203 if ((MAJOR(bdev->bd_dev) == LOOP_MAJOR) ||
204 (MAJOR(bdev->bd_dev) == MD_MAJOR))
205 return;
206
207#ifdef BLOCK_EXT_MAJOR
208 if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
209 return;
210#endif /* BLOCK_EXT_MAJOR */
211#endif /* CONFIG_BUG */
f4af6bb7 212
e4853338 213#if defined(HAVE_BIO_RW_FAILFAST_DTD)
f4af6bb7 214 /* BIO_RW_FAILFAST_* preferred interface from 2.6.28 - 2.6.35 */
d1d7e268
MK
215 *flags |= (
216 (1 << BIO_RW_FAILFAST_DEV) |
217 (1 << BIO_RW_FAILFAST_TRANSPORT) |
218 (1 << BIO_RW_FAILFAST_DRIVER));
e4853338 219#elif defined(HAVE_REQ_FAILFAST_MASK)
d1d7e268
MK
220 /*
221 * REQ_FAILFAST_* preferred interface from 2.6.36 - 2.6.xx,
222 * the BIO_* and REQ_* flags were unified under REQ_* flags.
223 */
f4af6bb7 224 *flags |= REQ_FAILFAST_MASK;
e4853338
TC
225#else
226#error "Undefined block IO FAILFAST interface."
227#endif
2959d94a
BB
228}
229
61e90960
BB
230/*
231 * Maximum disk label length, it may be undefined for some kernels.
232 */
60101509 233#ifndef DISK_NAME_LEN
d1d7e268 234#define DISK_NAME_LEN 32
60101509
BB
235#endif /* DISK_NAME_LEN */
236
36ba27e9
BB
237#ifdef HAVE_BIO_BI_STATUS
238static inline int
239bi_status_to_errno(blk_status_t status)
240{
241 switch (status) {
242 case BLK_STS_OK:
243 return (0);
244 case BLK_STS_NOTSUPP:
245 return (EOPNOTSUPP);
246 case BLK_STS_TIMEOUT:
247 return (ETIMEDOUT);
248 case BLK_STS_NOSPC:
249 return (ENOSPC);
250 case BLK_STS_TRANSPORT:
251 return (ENOLINK);
252 case BLK_STS_TARGET:
253 return (EREMOTEIO);
254 case BLK_STS_NEXUS:
255 return (EBADE);
256 case BLK_STS_MEDIUM:
257 return (ENODATA);
258 case BLK_STS_PROTECTION:
259 return (EILSEQ);
260 case BLK_STS_RESOURCE:
261 return (ENOMEM);
262 case BLK_STS_AGAIN:
263 return (EAGAIN);
264 case BLK_STS_IOERR:
265 return (EIO);
266 default:
267 return (EIO);
268 }
269}
270
271static inline blk_status_t
272errno_to_bi_status(int error)
273{
274 switch (error) {
275 case 0:
276 return (BLK_STS_OK);
277 case EOPNOTSUPP:
278 return (BLK_STS_NOTSUPP);
279 case ETIMEDOUT:
280 return (BLK_STS_TIMEOUT);
281 case ENOSPC:
282 return (BLK_STS_NOSPC);
283 case ENOLINK:
284 return (BLK_STS_TRANSPORT);
285 case EREMOTEIO:
286 return (BLK_STS_TARGET);
287 case EBADE:
288 return (BLK_STS_NEXUS);
289 case ENODATA:
290 return (BLK_STS_MEDIUM);
291 case EILSEQ:
292 return (BLK_STS_PROTECTION);
293 case ENOMEM:
294 return (BLK_STS_RESOURCE);
295 case EAGAIN:
296 return (BLK_STS_AGAIN);
297 case EIO:
298 return (BLK_STS_IOERR);
299 default:
300 return (BLK_STS_IOERR);
301 }
302}
303#endif /* HAVE_BIO_BI_STATUS */
304
61e90960 305/*
784a7fe5
LW
306 * 4.3 API change
307 * The bio_endio() prototype changed slightly. These are helper
308 * macro's to ensure the prototype and invocation are handled.
61e90960 309 */
784a7fe5 310#ifdef HAVE_1ARG_BIO_END_IO_T
36ba27e9
BB
311#ifdef HAVE_BIO_BI_STATUS
312#define BIO_END_IO_ERROR(bio) bi_status_to_errno(bio->bi_status)
313#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x)
314#define BIO_END_IO(bio, error) bio_set_bi_status(bio, error)
315static inline void
316bio_set_bi_status(struct bio *bio, int error)
317{
318 ASSERT3S(error, <=, 0);
319 bio->bi_status = errno_to_bi_status(-error);
320 bio_endio(bio);
321}
322#else
323#define BIO_END_IO_ERROR(bio) (-(bio->bi_error))
784a7fe5 324#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x)
36ba27e9
BB
325#define BIO_END_IO(bio, error) bio_set_bi_error(bio, error)
326static inline void
327bio_set_bi_error(struct bio *bio, int error)
328{
329 ASSERT3S(error, <=, 0);
330 bio->bi_error = error;
331 bio_endio(bio);
332}
333#endif /* HAVE_BIO_BI_STATUS */
334
61e90960 335#else
784a7fe5
LW
336#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x, int z)
337#define BIO_END_IO(bio, error) bio_endio(bio, error);
338#endif /* HAVE_1ARG_BIO_END_IO_T */
61e90960
BB
339
340/*
45066d1f
BB
341 * 2.6.38 - 2.6.x API,
342 * blkdev_get_by_path()
343 * blkdev_put()
344 *
345 * 2.6.28 - 2.6.37 API,
346 * open_bdev_exclusive()
347 * close_bdev_exclusive()
348 *
349 * 2.6.12 - 2.6.27 API,
350 * open_bdev_excl()
351 * close_bdev_excl()
352 *
61e90960
BB
353 * Used to exclusively open a block device from within the kernel.
354 */
45066d1f 355#if defined(HAVE_BLKDEV_GET_BY_PATH)
d1d7e268 356#define vdev_bdev_open(path, md, hld) blkdev_get_by_path(path, \
45066d1f 357 (md) | FMODE_EXCL, hld)
d1d7e268 358#define vdev_bdev_close(bdev, md) blkdev_put(bdev, (md) | FMODE_EXCL)
45066d1f 359#elif defined(HAVE_OPEN_BDEV_EXCLUSIVE)
d1d7e268
MK
360#define vdev_bdev_open(path, md, hld) open_bdev_exclusive(path, md, hld)
361#define vdev_bdev_close(bdev, md) close_bdev_exclusive(bdev, md)
61e90960 362#else
d1d7e268
MK
363#define vdev_bdev_open(path, md, hld) open_bdev_excl(path, md, hld)
364#define vdev_bdev_close(bdev, md) close_bdev_excl(bdev)
45066d1f 365#endif /* HAVE_BLKDEV_GET_BY_PATH | HAVE_OPEN_BDEV_EXCLUSIVE */
61e90960
BB
366
367/*
368 * 2.6.22 API change
369 * The function invalidate_bdev() lost it's second argument because
370 * it was unused.
371 */
372#ifdef HAVE_1ARG_INVALIDATE_BDEV
d1d7e268 373#define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev)
61e90960 374#else
d1d7e268 375#define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev, 1)
61e90960
BB
376#endif /* HAVE_1ARG_INVALIDATE_BDEV */
377
2b7ab9d4
BB
378/*
379 * 2.6.27 API change
e02aaf17 380 * The function was exported for use, prior to this it existed but the
2b7ab9d4 381 * symbol was not exported.
e02aaf17
HM
382 *
383 * 4.4.0-6.21 API change for Ubuntu
384 * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions.
2b7ab9d4 385 */
e02aaf17
HM
386#ifdef HAVE_1ARG_LOOKUP_BDEV
387#define vdev_lookup_bdev(path) lookup_bdev(path)
388#else
389#ifdef HAVE_2ARGS_LOOKUP_BDEV
390#define vdev_lookup_bdev(path) lookup_bdev(path, 0)
391#else
392#define vdev_lookup_bdev(path) ERR_PTR(-ENOTSUP)
393#endif /* HAVE_2ARGS_LOOKUP_BDEV */
394#endif /* HAVE_1ARG_LOOKUP_BDEV */
2b7ab9d4 395
61e90960
BB
396/*
397 * 2.6.30 API change
2404b014
BB
398 * To ensure good performance preferentially use the physical block size
399 * for proper alignment. The physical size is supposed to be the internal
400 * sector size used by the device. This is often 4096 byte for AF devices,
401 * while a smaller 512 byte logical size is supported for compatibility.
402 *
403 * Unfortunately, many drives still misreport their physical sector size.
404 * For devices which are known to lie you may need to manually set this
405 * at pool creation time with 'zpool create -o ashift=12 ...'.
406 *
407 * When the physical block size interface isn't available, we fall back to
408 * the logical block size interface and then the older hard sector size.
61e90960 409 */
2404b014 410#ifdef HAVE_BDEV_PHYSICAL_BLOCK_SIZE
d1d7e268
MK
411#define vdev_bdev_block_size(bdev) bdev_physical_block_size(bdev)
412#else
413#ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE
414#define vdev_bdev_block_size(bdev) bdev_logical_block_size(bdev)
61e90960 415#else
d1d7e268
MK
416#define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev)
417#endif /* HAVE_BDEV_LOGICAL_BLOCK_SIZE */
2404b014 418#endif /* HAVE_BDEV_PHYSICAL_BLOCK_SIZE */
61e90960 419
a5e046ea 420#ifndef HAVE_BIO_SET_OP_ATTRS
96801d29 421/*
a5e046ea 422 * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
96801d29 423 */
a5e046ea
TC
424static inline void
425bio_set_op_attrs(struct bio *bio, unsigned rw, unsigned flags)
426{
427 bio->bi_rw |= rw | flags;
428}
429#endif
430
431/*
432 * bio_set_flush - Set the appropriate flags in a bio to guarantee
433 * data are on non-volatile media on completion.
434 *
435 * 2.6.X - 2.6.36 API,
436 * WRITE_BARRIER - Tells the block layer to commit all previously submitted
437 * writes to stable storage before this one is started and that the current
438 * write is on stable storage upon completion. Also prevents reordering
439 * on both sides of the current operation.
440 *
441 * 2.6.37 - 4.8 API,
442 * Introduce WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a
443 * replacement for WRITE_BARRIER to allow expressing richer semantics
444 * to the block layer. It's up to the block layer to implement the
445 * semantics correctly. Use the WRITE_FLUSH_FUA flag combination.
446 *
447 * 4.8 - 4.9 API,
448 * REQ_FLUSH was renamed to REQ_PREFLUSH. For consistency with previous
449 * ZoL releases, prefer the WRITE_FLUSH_FUA flag set if it's available.
450 *
451 * 4.10 API,
452 * The read/write flags and their modifiers, including WRITE_FLUSH,
453 * WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in
454 * torvalds/linux@70fd7614 and replaced by direct flag modification
455 * of the REQ_ flags in bio->bi_opf. Use REQ_PREFLUSH.
456 */
457static inline void
458bio_set_flush(struct bio *bio)
459{
46300986
TH
460#if defined(REQ_PREFLUSH) /* >= 4.10 */
461 bio_set_op_attrs(bio, 0, REQ_PREFLUSH);
a5e046ea
TC
462#elif defined(WRITE_FLUSH_FUA) /* >= 2.6.37 and <= 4.9 */
463 bio_set_op_attrs(bio, 0, WRITE_FLUSH_FUA);
46300986
TH
464#elif defined(WRITE_BARRIER) /* < 2.6.37 */
465 bio_set_op_attrs(bio, 0, WRITE_BARRIER);
76e5f6fe 466#else
a5e046ea 467#error "Allowing the build will cause bio_set_flush requests to be ignored."
76e5f6fe 468#endif
a5e046ea 469}
76e5f6fe 470
cf41432c
BB
471/*
472 * 4.8 - 4.x API,
473 * REQ_OP_FLUSH
474 *
475 * 4.8-rc0 - 4.8-rc1,
476 * REQ_PREFLUSH
477 *
478 * 2.6.36 - 4.7 API,
479 * REQ_FLUSH
480 *
481 * 2.6.x - 2.6.35 API,
482 * HAVE_BIO_RW_BARRIER
483 *
484 * Used to determine if a cache flush has been requested. This check has
485 * been left intentionally broad in order to cover both a legacy flush
486 * and the new preflush behavior introduced in Linux 4.8. This is correct
487 * in all cases but may have a performance impact for some kernels. It
488 * has the advantage of minimizing kernel specific changes in the zvol code.
6eb73b00 489 *
cf41432c
BB
490 */
491static inline boolean_t
492bio_is_flush(struct bio *bio)
493{
494#if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF)
495 return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH));
496#elif defined(REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF)
497 return (bio->bi_opf & REQ_PREFLUSH);
498#elif defined(REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF)
499 return (bio->bi_rw & REQ_PREFLUSH);
6eb73b00
BB
500#elif defined(REQ_FLUSH)
501 return (bio->bi_rw & REQ_FLUSH);
46300986
TH
502#elif defined(HAVE_BIO_RW_BARRIER)
503 return (bio->bi_rw & (1 << BIO_RW_BARRIER));
96801d29 504#else
5fc73c46 505#error "Allowing the build will cause flush requests to be ignored."
96801d29 506#endif
cf41432c 507}
76e5f6fe 508
cf41432c
BB
509/*
510 * 4.8 - 4.x API,
511 * REQ_FUA flag moved to bio->bi_opf
512 *
513 * 2.6.x - 4.7 API,
514 * REQ_FUA
515 */
516static inline boolean_t
517bio_is_fua(struct bio *bio)
518{
519#if defined(HAVE_BIO_BI_OPF)
520 return (bio->bi_opf & REQ_FUA);
521#elif defined(REQ_FUA)
522 return (bio->bi_rw & REQ_FUA);
523#else
5fc73c46 524#error "Allowing the build will cause fua requests to be ignored."
37f9dac5 525#endif
cf41432c 526}
96801d29 527
30930fba 528/*
cf41432c
BB
529 * 4.8 - 4.x API,
530 * REQ_OP_DISCARD
3b86aeb2
CC
531 *
532 * 2.6.36 - 4.7 API,
533 * REQ_DISCARD
534 *
cf41432c
BB
535 * 2.6.28 - 2.6.35 API,
536 * BIO_RW_DISCARD
3b86aeb2
CC
537 *
538 * In all cases the normal I/O path is used for discards. The only
539 * difference is how the kernel tags individual I/Os as discards.
6eb73b00
BB
540 *
541 * Note that 2.6.32 era kernels provide both BIO_RW_DISCARD and REQ_DISCARD,
542 * where BIO_RW_DISCARD is the correct interface. Therefore, it is important
543 * that the HAVE_BIO_RW_DISCARD check occur before the REQ_DISCARD check.
30930fba 544 */
3b86aeb2
CC
545static inline boolean_t
546bio_is_discard(struct bio *bio)
547{
cf41432c
BB
548#if defined(HAVE_REQ_OP_DISCARD)
549 return (bio_op(bio) == REQ_OP_DISCARD);
cf41432c
BB
550#elif defined(HAVE_BIO_RW_DISCARD)
551 return (bio->bi_rw & (1 << BIO_RW_DISCARD));
6eb73b00
BB
552#elif defined(REQ_DISCARD)
553 return (bio->bi_rw & REQ_DISCARD);
37f9dac5 554#else
5fc73c46 555/* potentially triggering the DMU_MAX_ACCESS assertion. */
556#error "Allowing the build will cause discard requests to become writes."
30930fba 557#endif
3b86aeb2 558}
cf41432c
BB
559
560/*
561 * 4.8 - 4.x API,
562 * REQ_OP_SECURE_ERASE
563 *
564 * 2.6.36 - 4.7 API,
565 * REQ_SECURE
566 *
567 * 2.6.x - 2.6.35 API,
568 * Unsupported by kernel
569 */
570static inline boolean_t
571bio_is_secure_erase(struct bio *bio)
572{
573#if defined(HAVE_REQ_OP_SECURE_ERASE)
574 return (bio_op(bio) == REQ_OP_SECURE_ERASE);
575#elif defined(REQ_SECURE)
576 return (bio->bi_rw & REQ_SECURE);
37f9dac5 577#else
cf41432c 578 return (0);
37f9dac5 579#endif
cf41432c 580}
30930fba 581
ee5fd0bb
ED
582/*
583 * 2.6.33 API change
584 * Discard granularity and alignment restrictions may now be set. For
585 * older kernels which do not support this it is safe to skip it.
586 */
587#ifdef HAVE_DISCARD_GRANULARITY
588static inline void
589blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
590{
591 q->limits.discard_granularity = dg;
592}
593#else
d1d7e268 594#define blk_queue_discard_granularity(x, dg) ((void)0)
ee5fd0bb
ED
595#endif /* HAVE_DISCARD_GRANULARITY */
596
61e90960
BB
597/*
598 * Default Linux IO Scheduler,
599 * Setting the scheduler to noop will allow the Linux IO scheduler to
600 * still perform front and back merging, while leaving the request
601 * ordering and prioritization to the ZFS IO scheduler.
602 */
603#define VDEV_SCHEDULER "noop"
60101509 604
8128bd89
BB
605/*
606 * A common holder for vdev_bdev_open() is used to relax the exclusive open
607 * semantics slightly. Internal vdev disk callers may pass VDEV_HOLDER to
608 * allow them to open the device multiple times. Other kernel callers and
609 * user space processes which don't pass this value will get EBUSY. This is
610 * currently required for the correct operation of hot spares.
611 */
d1d7e268 612#define VDEV_HOLDER ((void *)0x2401de7)
8128bd89 613
692e55b8 614static inline void
787acae0
GDN
615blk_generic_start_io_acct(struct request_queue *q, int rw,
616 unsigned long sectors, struct hd_struct *part)
692e55b8 617{
787acae0
GDN
618#if defined(HAVE_GENERIC_IO_ACCT_3ARG)
619 generic_start_io_acct(rw, sectors, part);
620#elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
621 generic_start_io_acct(q, rw, sectors, part);
622#endif
692e55b8
CC
623}
624
625static inline void
787acae0
GDN
626blk_generic_end_io_acct(struct request_queue *q, int rw,
627 struct hd_struct *part, unsigned long start_time)
692e55b8 628{
787acae0
GDN
629#if defined(HAVE_GENERIC_IO_ACCT_3ARG)
630 generic_end_io_acct(rw, part, start_time);
631#elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
632 generic_end_io_acct(q, rw, part, start_time);
8198d18c 633#endif
787acae0 634}
8198d18c 635
61e90960 636#endif /* _ZFS_BLKDEV_H */