]>
Commit | Line | Data |
---|---|---|
7bdf406d TG |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
23 | * Copyright (C) 2011 Lawrence Livermore National Security, LLC. | |
24 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
25 | * Written by Brian Behlendorf <behlendorf1@llnl.gov>. | |
26 | * LLNL-CODE-403049. | |
27 | */ | |
28 | ||
29 | #ifndef _ZFS_BLKDEV_H | |
30 | #define _ZFS_BLKDEV_H | |
31 | ||
32 | #include <linux/blkdev.h> | |
33 | #include <linux/elevator.h> | |
34 | ||
35 | #ifndef HAVE_FMODE_T | |
36 | typedef unsigned __bitwise__ fmode_t; | |
37 | #endif /* HAVE_FMODE_T */ | |
38 | ||
39 | /* | |
40 | * 4.7 - 4.x API, | |
41 | * The blk_queue_write_cache() interface has replaced blk_queue_flush() | |
42 | * interface. However, the new interface is GPL-only thus we implement | |
43 | * our own trivial wrapper when the GPL-only version is detected. | |
44 | * | |
45 | * 2.6.36 - 4.6 API, | |
46 | * The blk_queue_flush() interface has replaced blk_queue_ordered() | |
47 | * interface. However, while the old interface was available to all the | |
48 | * new one is GPL-only. Thus if the GPL-only version is detected we | |
49 | * implement our own trivial helper. | |
50 | * | |
51 | * 2.6.x - 2.6.35 | |
52 | * Legacy blk_queue_ordered() interface. | |
53 | */ | |
54 | static inline void | |
55 | blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua) | |
56 | { | |
57 | #if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY) | |
58 | spin_lock_irq(q->queue_lock); | |
59 | if (wc) | |
60 | queue_flag_set(QUEUE_FLAG_WC, q); | |
61 | else | |
62 | queue_flag_clear(QUEUE_FLAG_WC, q); | |
63 | if (fua) | |
64 | queue_flag_set(QUEUE_FLAG_FUA, q); | |
65 | else | |
66 | queue_flag_clear(QUEUE_FLAG_FUA, q); | |
67 | spin_unlock_irq(q->queue_lock); | |
68 | #elif defined(HAVE_BLK_QUEUE_WRITE_CACHE) | |
69 | blk_queue_write_cache(q, wc, fua); | |
70 | #elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) | |
71 | if (wc) | |
72 | q->flush_flags |= REQ_FLUSH; | |
73 | if (fua) | |
74 | q->flush_flags |= REQ_FUA; | |
75 | #elif defined(HAVE_BLK_QUEUE_FLUSH) | |
76 | blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0)); | |
77 | #else | |
78 | blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL); | |
79 | #endif | |
80 | } | |
81 | ||
82 | /* | |
83 | * Most of the blk_* macros were removed in 2.6.36. Ostensibly this was | |
84 | * done to improve readability and allow easier grepping. However, from | |
85 | * a portability stand point the macros are helpful. Therefore the needed | |
86 | * macros are redefined here if they are missing from the kernel. | |
87 | */ | |
88 | #ifndef blk_fs_request | |
89 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) | |
90 | #endif | |
91 | ||
92 | /* | |
93 | * 2.6.27 API change, | |
94 | * The blk_queue_stackable() queue flag was added in 2.6.27 to handle dm | |
95 | * stacking drivers. Prior to this request stacking drivers were detected | |
96 | * by checking (q->request_fn == NULL), for earlier kernels we revert to | |
97 | * this legacy behavior. | |
98 | */ | |
99 | #ifndef blk_queue_stackable | |
100 | #define blk_queue_stackable(q) ((q)->request_fn == NULL) | |
101 | #endif | |
102 | ||
103 | /* | |
104 | * 2.6.34 API change, | |
105 | * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors(). | |
106 | */ | |
107 | #ifndef HAVE_BLK_QUEUE_MAX_HW_SECTORS | |
108 | #define blk_queue_max_hw_sectors __blk_queue_max_hw_sectors | |
109 | static inline void | |
110 | __blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) | |
111 | { | |
112 | blk_queue_max_sectors(q, max_hw_sectors); | |
113 | } | |
114 | #endif | |
115 | ||
116 | /* | |
117 | * 2.6.34 API change, | |
118 | * The blk_queue_max_segments() function consolidates | |
119 | * blk_queue_max_hw_segments() and blk_queue_max_phys_segments(). | |
120 | */ | |
121 | #ifndef HAVE_BLK_QUEUE_MAX_SEGMENTS | |
122 | #define blk_queue_max_segments __blk_queue_max_segments | |
123 | static inline void | |
124 | __blk_queue_max_segments(struct request_queue *q, unsigned short max_segments) | |
125 | { | |
126 | blk_queue_max_phys_segments(q, max_segments); | |
127 | blk_queue_max_hw_segments(q, max_segments); | |
128 | } | |
129 | #endif | |
130 | ||
131 | #ifndef HAVE_GET_DISK_RO | |
132 | static inline int | |
133 | get_disk_ro(struct gendisk *disk) | |
134 | { | |
135 | int policy = 0; | |
136 | ||
137 | if (disk->part[0]) | |
138 | policy = disk->part[0]->policy; | |
139 | ||
140 | return (policy); | |
141 | } | |
142 | #endif /* HAVE_GET_DISK_RO */ | |
143 | ||
144 | #ifdef HAVE_BIO_BVEC_ITER | |
145 | #define BIO_BI_SECTOR(bio) (bio)->bi_iter.bi_sector | |
146 | #define BIO_BI_SIZE(bio) (bio)->bi_iter.bi_size | |
147 | #define BIO_BI_IDX(bio) (bio)->bi_iter.bi_idx | |
51d97d8f | 148 | #define BIO_BI_SKIP(bio) (bio)->bi_iter.bi_bvec_done |
7bdf406d TG |
149 | #define bio_for_each_segment4(bv, bvp, b, i) \ |
150 | bio_for_each_segment((bv), (b), (i)) | |
151 | typedef struct bvec_iter bvec_iterator_t; | |
152 | #else | |
153 | #define BIO_BI_SECTOR(bio) (bio)->bi_sector | |
154 | #define BIO_BI_SIZE(bio) (bio)->bi_size | |
155 | #define BIO_BI_IDX(bio) (bio)->bi_idx | |
51d97d8f | 156 | #define BIO_BI_SKIP(bio) (0) |
7bdf406d TG |
157 | #define bio_for_each_segment4(bv, bvp, b, i) \ |
158 | bio_for_each_segment((bvp), (b), (i)) | |
159 | typedef int bvec_iterator_t; | |
160 | #endif | |
161 | ||
162 | /* | |
163 | * Portable helper for correctly setting the FAILFAST flags. The | |
164 | * correct usage has changed 3 times from 2.6.12 to 2.6.38. | |
165 | */ | |
166 | static inline void | |
167 | bio_set_flags_failfast(struct block_device *bdev, int *flags) | |
168 | { | |
169 | #ifdef CONFIG_BUG | |
170 | /* | |
171 | * Disable FAILFAST for loopback devices because of the | |
172 | * following incorrect BUG_ON() in loop_make_request(). | |
173 | * This support is also disabled for md devices because the | |
174 | * test suite layers md devices on top of loopback devices. | |
175 | * This may be removed when the loopback driver is fixed. | |
176 | * | |
177 | * BUG_ON(!lo || (rw != READ && rw != WRITE)); | |
178 | */ | |
179 | if ((MAJOR(bdev->bd_dev) == LOOP_MAJOR) || | |
180 | (MAJOR(bdev->bd_dev) == MD_MAJOR)) | |
181 | return; | |
182 | ||
183 | #ifdef BLOCK_EXT_MAJOR | |
184 | if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR) | |
185 | return; | |
186 | #endif /* BLOCK_EXT_MAJOR */ | |
187 | #endif /* CONFIG_BUG */ | |
188 | ||
189 | #if defined(HAVE_BIO_RW_FAILFAST_DTD) | |
190 | /* BIO_RW_FAILFAST_* preferred interface from 2.6.28 - 2.6.35 */ | |
191 | *flags |= ( | |
192 | (1 << BIO_RW_FAILFAST_DEV) | | |
193 | (1 << BIO_RW_FAILFAST_TRANSPORT) | | |
194 | (1 << BIO_RW_FAILFAST_DRIVER)); | |
195 | #elif defined(HAVE_REQ_FAILFAST_MASK) | |
196 | /* | |
197 | * REQ_FAILFAST_* preferred interface from 2.6.36 - 2.6.xx, | |
198 | * the BIO_* and REQ_* flags were unified under REQ_* flags. | |
199 | */ | |
200 | *flags |= REQ_FAILFAST_MASK; | |
201 | #else | |
202 | #error "Undefined block IO FAILFAST interface." | |
203 | #endif | |
204 | } | |
205 | ||
206 | /* | |
207 | * Maximum disk label length, it may be undefined for some kernels. | |
208 | */ | |
209 | #ifndef DISK_NAME_LEN | |
210 | #define DISK_NAME_LEN 32 | |
211 | #endif /* DISK_NAME_LEN */ | |
212 | ||
213 | /* | |
214 | * 4.3 API change | |
215 | * The bio_endio() prototype changed slightly. These are helper | |
216 | * macro's to ensure the prototype and invocation are handled. | |
217 | */ | |
218 | #ifdef HAVE_1ARG_BIO_END_IO_T | |
219 | #define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x) | |
220 | #define BIO_END_IO(bio, error) bio->bi_error = error; bio_endio(bio); | |
221 | #else | |
222 | #define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x, int z) | |
223 | #define BIO_END_IO(bio, error) bio_endio(bio, error); | |
224 | #endif /* HAVE_1ARG_BIO_END_IO_T */ | |
225 | ||
226 | /* | |
227 | * 2.6.38 - 2.6.x API, | |
228 | * blkdev_get_by_path() | |
229 | * blkdev_put() | |
230 | * | |
231 | * 2.6.28 - 2.6.37 API, | |
232 | * open_bdev_exclusive() | |
233 | * close_bdev_exclusive() | |
234 | * | |
235 | * 2.6.12 - 2.6.27 API, | |
236 | * open_bdev_excl() | |
237 | * close_bdev_excl() | |
238 | * | |
239 | * Used to exclusively open a block device from within the kernel. | |
240 | */ | |
241 | #if defined(HAVE_BLKDEV_GET_BY_PATH) | |
242 | #define vdev_bdev_open(path, md, hld) blkdev_get_by_path(path, \ | |
243 | (md) | FMODE_EXCL, hld) | |
244 | #define vdev_bdev_close(bdev, md) blkdev_put(bdev, (md) | FMODE_EXCL) | |
245 | #elif defined(HAVE_OPEN_BDEV_EXCLUSIVE) | |
246 | #define vdev_bdev_open(path, md, hld) open_bdev_exclusive(path, md, hld) | |
247 | #define vdev_bdev_close(bdev, md) close_bdev_exclusive(bdev, md) | |
248 | #else | |
249 | #define vdev_bdev_open(path, md, hld) open_bdev_excl(path, md, hld) | |
250 | #define vdev_bdev_close(bdev, md) close_bdev_excl(bdev) | |
251 | #endif /* HAVE_BLKDEV_GET_BY_PATH | HAVE_OPEN_BDEV_EXCLUSIVE */ | |
252 | ||
253 | /* | |
254 | * 2.6.22 API change | |
255 | * The function invalidate_bdev() lost it's second argument because | |
256 | * it was unused. | |
257 | */ | |
258 | #ifdef HAVE_1ARG_INVALIDATE_BDEV | |
259 | #define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev) | |
260 | #else | |
261 | #define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev, 1) | |
262 | #endif /* HAVE_1ARG_INVALIDATE_BDEV */ | |
263 | ||
264 | /* | |
265 | * 2.6.27 API change | |
51d97d8f | 266 | * The function was exported for use, prior to this it existed but the |
7bdf406d | 267 | * symbol was not exported. |
51d97d8f TG |
268 | * |
269 | * 4.4.0-6.21 API change for Ubuntu | |
270 | * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions. | |
7bdf406d | 271 | */ |
51d97d8f TG |
272 | #ifdef HAVE_1ARG_LOOKUP_BDEV |
273 | #define vdev_lookup_bdev(path) lookup_bdev(path) | |
274 | #else | |
275 | #ifdef HAVE_2ARGS_LOOKUP_BDEV | |
276 | #define vdev_lookup_bdev(path) lookup_bdev(path, 0) | |
277 | #else | |
278 | #define vdev_lookup_bdev(path) ERR_PTR(-ENOTSUP) | |
279 | #endif /* HAVE_2ARGS_LOOKUP_BDEV */ | |
280 | #endif /* HAVE_1ARG_LOOKUP_BDEV */ | |
7bdf406d TG |
281 | |
282 | /* | |
283 | * 2.6.30 API change | |
284 | * To ensure good performance preferentially use the physical block size | |
285 | * for proper alignment. The physical size is supposed to be the internal | |
286 | * sector size used by the device. This is often 4096 byte for AF devices, | |
287 | * while a smaller 512 byte logical size is supported for compatibility. | |
288 | * | |
289 | * Unfortunately, many drives still misreport their physical sector size. | |
290 | * For devices which are known to lie you may need to manually set this | |
291 | * at pool creation time with 'zpool create -o ashift=12 ...'. | |
292 | * | |
293 | * When the physical block size interface isn't available, we fall back to | |
294 | * the logical block size interface and then the older hard sector size. | |
295 | */ | |
296 | #ifdef HAVE_BDEV_PHYSICAL_BLOCK_SIZE | |
297 | #define vdev_bdev_block_size(bdev) bdev_physical_block_size(bdev) | |
298 | #else | |
299 | #ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE | |
300 | #define vdev_bdev_block_size(bdev) bdev_logical_block_size(bdev) | |
301 | #else | |
302 | #define vdev_bdev_block_size(bdev) bdev_hardsect_size(bdev) | |
303 | #endif /* HAVE_BDEV_LOGICAL_BLOCK_SIZE */ | |
304 | #endif /* HAVE_BDEV_PHYSICAL_BLOCK_SIZE */ | |
305 | ||
306 | /* | |
307 | * 2.6.37 API change | |
308 | * The WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags have been | |
309 | * introduced as a replacement for WRITE_BARRIER. This was done to | |
310 | * allow richer semantics to be expressed to the block layer. It is | |
311 | * the block layers responsibility to choose the correct way to | |
312 | * implement these semantics. | |
313 | */ | |
314 | #ifdef WRITE_FLUSH_FUA | |
315 | #define VDEV_WRITE_FLUSH_FUA WRITE_FLUSH_FUA | |
316 | #else | |
317 | #define VDEV_WRITE_FLUSH_FUA WRITE_BARRIER | |
318 | #endif | |
319 | ||
320 | /* | |
321 | * 4.8 - 4.x API, | |
322 | * REQ_OP_FLUSH | |
323 | * | |
324 | * 4.8-rc0 - 4.8-rc1, | |
325 | * REQ_PREFLUSH | |
326 | * | |
327 | * 2.6.36 - 4.7 API, | |
328 | * REQ_FLUSH | |
329 | * | |
330 | * 2.6.x - 2.6.35 API, | |
331 | * HAVE_BIO_RW_BARRIER | |
332 | * | |
333 | * Used to determine if a cache flush has been requested. This check has | |
334 | * been left intentionally broad in order to cover both a legacy flush | |
335 | * and the new preflush behavior introduced in Linux 4.8. This is correct | |
336 | * in all cases but may have a performance impact for some kernels. It | |
337 | * has the advantage of minimizing kernel specific changes in the zvol code. | |
51d97d8f TG |
338 | * |
339 | * Note that 2.6.32 era kernels provide both BIO_RW_BARRIER and REQ_FLUSH, | |
340 | * where BIO_RW_BARRIER is the correct interface. Therefore, it is important | |
341 | * that the HAVE_BIO_RW_BARRIER check occur before the REQ_FLUSH check. | |
7bdf406d TG |
342 | */ |
343 | static inline boolean_t | |
344 | bio_is_flush(struct bio *bio) | |
345 | { | |
346 | #if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF) | |
347 | return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH)); | |
348 | #elif defined(REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF) | |
349 | return (bio->bi_opf & REQ_PREFLUSH); | |
350 | #elif defined(REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF) | |
351 | return (bio->bi_rw & REQ_PREFLUSH); | |
7bdf406d TG |
352 | #elif defined(HAVE_BIO_RW_BARRIER) |
353 | return (bio->bi_rw & (1 << BIO_RW_BARRIER)); | |
51d97d8f TG |
354 | #elif defined(REQ_FLUSH) |
355 | return (bio->bi_rw & REQ_FLUSH); | |
7bdf406d TG |
356 | #else |
357 | #error "Allowing the build will cause flush requests to be ignored. Please " | |
358 | "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new" | |
359 | #endif | |
360 | } | |
361 | ||
362 | /* | |
363 | * 4.8 - 4.x API, | |
364 | * REQ_FUA flag moved to bio->bi_opf | |
365 | * | |
366 | * 2.6.x - 4.7 API, | |
367 | * REQ_FUA | |
368 | */ | |
369 | static inline boolean_t | |
370 | bio_is_fua(struct bio *bio) | |
371 | { | |
372 | #if defined(HAVE_BIO_BI_OPF) | |
373 | return (bio->bi_opf & REQ_FUA); | |
374 | #elif defined(REQ_FUA) | |
375 | return (bio->bi_rw & REQ_FUA); | |
376 | #else | |
377 | #error "Allowing the build will cause fua requests to be ignored. Please " | |
378 | "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new" | |
379 | #endif | |
380 | } | |
381 | ||
382 | /* | |
383 | * 4.8 - 4.x API, | |
384 | * REQ_OP_DISCARD | |
385 | * | |
386 | * 2.6.36 - 4.7 API, | |
387 | * REQ_DISCARD | |
388 | * | |
389 | * 2.6.28 - 2.6.35 API, | |
390 | * BIO_RW_DISCARD | |
391 | * | |
392 | * In all cases the normal I/O path is used for discards. The only | |
393 | * difference is how the kernel tags individual I/Os as discards. | |
51d97d8f TG |
394 | * |
395 | * Note that 2.6.32 era kernels provide both BIO_RW_DISCARD and REQ_DISCARD, | |
396 | * where BIO_RW_DISCARD is the correct interface. Therefore, it is important | |
397 | * that the HAVE_BIO_RW_DISCARD check occur before the REQ_DISCARD check. | |
7bdf406d TG |
398 | */ |
399 | static inline boolean_t | |
400 | bio_is_discard(struct bio *bio) | |
401 | { | |
402 | #if defined(HAVE_REQ_OP_DISCARD) | |
403 | return (bio_op(bio) == REQ_OP_DISCARD); | |
7bdf406d TG |
404 | #elif defined(HAVE_BIO_RW_DISCARD) |
405 | return (bio->bi_rw & (1 << BIO_RW_DISCARD)); | |
51d97d8f TG |
406 | #elif defined(REQ_DISCARD) |
407 | return (bio->bi_rw & REQ_DISCARD); | |
7bdf406d TG |
408 | #else |
409 | #error "Allowing the build will cause discard requests to become writes " | |
410 | "potentially triggering the DMU_MAX_ACCESS assertion. Please file " | |
411 | "an issue report at: https://github.com/zfsonlinux/zfs/issues/new" | |
412 | #endif | |
413 | } | |
414 | ||
415 | /* | |
416 | * 4.8 - 4.x API, | |
417 | * REQ_OP_SECURE_ERASE | |
418 | * | |
419 | * 2.6.36 - 4.7 API, | |
420 | * REQ_SECURE | |
421 | * | |
422 | * 2.6.x - 2.6.35 API, | |
423 | * Unsupported by kernel | |
424 | */ | |
425 | static inline boolean_t | |
426 | bio_is_secure_erase(struct bio *bio) | |
427 | { | |
428 | #if defined(HAVE_REQ_OP_SECURE_ERASE) | |
429 | return (bio_op(bio) == REQ_OP_SECURE_ERASE); | |
430 | #elif defined(REQ_SECURE) | |
431 | return (bio->bi_rw & REQ_SECURE); | |
432 | #else | |
433 | return (0); | |
434 | #endif | |
435 | } | |
436 | ||
437 | /* | |
438 | * 2.6.33 API change | |
439 | * Discard granularity and alignment restrictions may now be set. For | |
440 | * older kernels which do not support this it is safe to skip it. | |
441 | */ | |
442 | #ifdef HAVE_DISCARD_GRANULARITY | |
443 | static inline void | |
444 | blk_queue_discard_granularity(struct request_queue *q, unsigned int dg) | |
445 | { | |
446 | q->limits.discard_granularity = dg; | |
447 | } | |
448 | #else | |
449 | #define blk_queue_discard_granularity(x, dg) ((void)0) | |
450 | #endif /* HAVE_DISCARD_GRANULARITY */ | |
451 | ||
452 | /* | |
453 | * Default Linux IO Scheduler, | |
454 | * Setting the scheduler to noop will allow the Linux IO scheduler to | |
455 | * still perform front and back merging, while leaving the request | |
456 | * ordering and prioritization to the ZFS IO scheduler. | |
457 | */ | |
458 | #define VDEV_SCHEDULER "noop" | |
459 | ||
460 | /* | |
461 | * A common holder for vdev_bdev_open() is used to relax the exclusive open | |
462 | * semantics slightly. Internal vdev disk callers may pass VDEV_HOLDER to | |
463 | * allow them to open the device multiple times. Other kernel callers and | |
464 | * user space processes which don't pass this value will get EBUSY. This is | |
465 | * currently required for the correct operation of hot spares. | |
466 | */ | |
467 | #define VDEV_HOLDER ((void *)0x2401de7) | |
468 | ||
469 | #ifndef HAVE_GENERIC_IO_ACCT | |
470 | #define generic_start_io_acct(rw, slen, part) ((void)0) | |
471 | #define generic_end_io_acct(rw, part, start_jiffies) ((void)0) | |
472 | #endif | |
473 | ||
474 | #endif /* _ZFS_BLKDEV_H */ |