4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
23 * Copyright (c) 2016 by Delphix. All rights reserved.
27 * ARC buffer data (ABD).
29 * ABDs are an abstract data structure for the ARC which can use two
30 * different ways of storing the underlying data:
32 * (a) Linear buffer. In this case, all the data in the ABD is stored in one
33 * contiguous buffer in memory (from a zio_[data_]buf_* kmem cache).
35 * +-------------------+
38 * | abd_size = ... | +--------------------------------+
39 * | abd_buf ------------->| raw buffer of size abd_size |
40 * +-------------------+ +--------------------------------+
43 * (b) Scattered buffer. In this case, the data in the ABD is split into
44 * equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers
45 * to the chunks recorded in an array at the end of the ABD structure.
47 * +-------------------+
51 * | abd_offset = 0 | +-----------+
52 * | abd_chunks[0] ----------------------------->| chunk 0 |
53 * | abd_chunks[1] ---------------------+ +-----------+
54 * | ... | | +-----------+
55 * | abd_chunks[N-1] ---------+ +------->| chunk 1 |
56 * +-------------------+ | +-----------+
59 * +----------------->| chunk N-1 |
62 * Linear buffers act exactly like normal buffers and are always mapped into the
63 * kernel's virtual memory space, while scattered ABD data chunks are allocated
64 * as physical pages and then mapped in only while they are actually being
65 * accessed through one of the abd_* library functions. Using scattered ABDs
66 * provides several benefits:
68 * (1) They avoid use of kmem_*, preventing performance problems where running
69 * kmem_reap on very large memory systems never finishes and causes
70 * constant TLB shootdowns.
72 * (2) Fragmentation is less of an issue since when we are at the limit of
73 * allocatable space, we won't have to search around for a long free
74 * hole in the VA space for large ARC allocations. Each chunk is mapped in
75 * individually, so even if we weren't using segkpm (see next point) we
76 * wouldn't need to worry about finding a contiguous address range.
78 * (3) Use of segkpm will avoid the need for map / unmap / TLB shootdown costs
79 * on each ABD access. (If segkpm isn't available then we use all linear
80 * ABDs to avoid this penalty.) See seg_kpm.c for more details.
82 * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
83 * B_FALSE. However, it is not possible to use scattered ABDs if segkpm is not
84 * available, which is the case on all 32-bit systems and any 64-bit systems
85 * where kpm_enable is turned off.
87 * In addition to directly allocating a linear or scattered ABD, it is also
88 * possible to create an ABD by requesting the "sub-ABD" starting at an offset
89 * within an existing ABD. In linear buffers this is simple (set abd_buf of
90 * the new ABD to the starting point within the original raw buffer), but
91 * scattered ABDs are a little more complex. The new ABD makes a copy of the
92 * relevant abd_chunks pointers (but not the underlying data). However, to
93 * provide arbitrary rather than only chunk-aligned starting offsets, it also
94 * tracks an abd_offset field which represents the starting point of the data
95 * within the first chunk in abd_chunks. For both linear and scattered ABDs,
96 * creating an offset ABD marks the original ABD as the offset's parent, and the
97 * original ABD's abd_children refcount is incremented. This data allows us to
98 * ensure the root ABD isn't deleted before its children.
100 * Most consumers should never need to know what type of ABD they're using --
101 * the ABD public API ensures that it's possible to transparently switch from
102 * using a linear ABD to a scattered one when doing so would be beneficial.
104 * If you need to use the data within an ABD directly, if you know it's linear
105 * (because you allocated it) you can use abd_to_buf() to access the underlying
106 * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions
107 * which will allocate a raw buffer if necessary. Use the abd_return_buf*
108 * functions to return any raw buffers that are no longer necessary when you're
111 * There are a variety of ABD APIs that implement basic buffer operations:
112 * compare, copy, read, write, and fill with zeroes. If you need a custom
113 * function which progressively accesses the whole ABD, use the abd_iterate_*
118 #include <sys/param.h>
120 #include <sys/zfs_context.h>
121 #include <sys/zfs_znode.h>
123 #include <linux/scatterlist.h>
124 #include <linux/kmap_compat.h>
129 typedef struct abd_stats
{
130 kstat_named_t abdstat_struct_size
;
131 kstat_named_t abdstat_linear_cnt
;
132 kstat_named_t abdstat_linear_data_size
;
133 kstat_named_t abdstat_scatter_cnt
;
134 kstat_named_t abdstat_scatter_data_size
;
135 kstat_named_t abdstat_scatter_chunk_waste
;
136 kstat_named_t abdstat_scatter_orders
[MAX_ORDER
];
137 kstat_named_t abdstat_scatter_page_multi_chunk
;
138 kstat_named_t abdstat_scatter_page_multi_zone
;
139 kstat_named_t abdstat_scatter_page_alloc_retry
;
140 kstat_named_t abdstat_scatter_sg_table_retry
;
143 static abd_stats_t abd_stats
= {
144 /* Amount of memory occupied by all of the abd_t struct allocations */
145 { "struct_size", KSTAT_DATA_UINT64
},
147 * The number of linear ABDs which are currently allocated, excluding
148 * ABDs which don't own their data (for instance the ones which were
149 * allocated through abd_get_offset() and abd_get_from_buf()). If an
150 * ABD takes ownership of its buf then it will become tracked.
152 { "linear_cnt", KSTAT_DATA_UINT64
},
153 /* Amount of data stored in all linear ABDs tracked by linear_cnt */
154 { "linear_data_size", KSTAT_DATA_UINT64
},
156 * The number of scatter ABDs which are currently allocated, excluding
157 * ABDs which don't own their data (for instance the ones which were
158 * allocated through abd_get_offset()).
160 { "scatter_cnt", KSTAT_DATA_UINT64
},
161 /* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
162 { "scatter_data_size", KSTAT_DATA_UINT64
},
164 * The amount of space wasted at the end of the last chunk across all
165 * scatter ABDs tracked by scatter_cnt.
167 { "scatter_chunk_waste", KSTAT_DATA_UINT64
},
169 * The number of compound allocations of a given order. These
170 * allocations are spread over all currently allocated ABDs, and
171 * act as a measure of memory fragmentation.
173 { { "scatter_order_N", KSTAT_DATA_UINT64
} },
175 * The number of scatter ABDs which contain multiple chunks.
176 * ABDs are preferentially allocated from the minimum number of
177 * contiguous multi-page chunks, a single chunk is optimal.
179 { "scatter_page_multi_chunk", KSTAT_DATA_UINT64
},
181 * The number of scatter ABDs which are split across memory zones.
182 * ABDs are preferentially allocated using pages from a single zone.
184 { "scatter_page_multi_zone", KSTAT_DATA_UINT64
},
186 * The total number of retries encountered when attempting to
187 * allocate the pages to populate the scatter ABD.
189 { "scatter_page_alloc_retry", KSTAT_DATA_UINT64
},
191 * The total number of retries encountered when attempting to
192 * allocate the sg table for an ABD.
194 { "scatter_sg_table_retry", KSTAT_DATA_UINT64
},
197 #define ABDSTAT(stat) (abd_stats.stat.value.ui64)
198 #define ABDSTAT_INCR(stat, val) \
199 atomic_add_64(&abd_stats.stat.value.ui64, (val))
200 #define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1)
201 #define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1)
203 #define ABD_SCATTER(abd) (abd->abd_u.abd_scatter)
204 #define ABD_BUF(abd) (abd->abd_u.abd_linear.abd_buf)
205 #define abd_for_each_sg(abd, sg, n, i) \
206 for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)
208 /* see block comment above for description */
209 int zfs_abd_scatter_enabled
= B_TRUE
;
210 unsigned zfs_abd_scatter_max_order
= MAX_ORDER
- 1;
212 static kmem_cache_t
*abd_cache
= NULL
;
213 static kstat_t
*abd_ksp
;
216 abd_chunkcnt_for_bytes(size_t size
)
218 return (P2ROUNDUP(size
, PAGESIZE
) / PAGESIZE
);
222 #ifndef CONFIG_HIGHMEM
224 #ifndef __GFP_RECLAIM
225 #define __GFP_RECLAIM __GFP_WAIT
229 abd_alloc_chunk(int nid
, gfp_t gfp
, unsigned int order
)
233 page
= alloc_pages_node(nid
, gfp
, order
);
237 return ((unsigned long) page_address(page
));
241 * The goal is to minimize fragmentation by preferentially populating ABDs
242 * with higher order compound pages from a single zone. Allocation size is
243 * progressively decreased until it can be satisfied without performing
244 * reclaim or compaction. When necessary this function will degenerate to
245 * allocating individual pages and allowing reclaim to satisfy allocations.
248 abd_alloc_pages(abd_t
*abd
, size_t size
)
250 struct list_head pages
;
251 struct sg_table table
;
252 struct scatterlist
*sg
;
253 struct page
*page
, *tmp_page
= NULL
;
254 gfp_t gfp
= __GFP_NOWARN
| GFP_NOIO
;
255 gfp_t gfp_comp
= (gfp
| __GFP_NORETRY
| __GFP_COMP
) & ~__GFP_RECLAIM
;
256 int max_order
= MIN(zfs_abd_scatter_max_order
, MAX_ORDER
- 1);
257 int nr_pages
= abd_chunkcnt_for_bytes(size
);
258 int chunks
= 0, zones
= 0;
259 size_t remaining_size
;
260 int nid
= NUMA_NO_NODE
;
264 INIT_LIST_HEAD(&pages
);
266 while (alloc_pages
< nr_pages
) {
268 unsigned chunk_pages
;
270 order
= MIN(highbit64(nr_pages
- alloc_pages
) - 1, max_order
);
271 chunk_pages
= (1U << order
);
273 paddr
= abd_alloc_chunk(nid
, order
? gfp_comp
: gfp
, order
);
276 ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry
);
277 schedule_timeout_interruptible(1);
279 max_order
= MAX(0, order
- 1);
284 page
= virt_to_page(paddr
);
285 list_add_tail(&page
->lru
, &pages
);
287 if ((nid
!= NUMA_NO_NODE
) && (page_to_nid(page
) != nid
))
290 nid
= page_to_nid(page
);
291 ABDSTAT_BUMP(abdstat_scatter_orders
[order
]);
293 alloc_pages
+= chunk_pages
;
296 ASSERT3S(alloc_pages
, ==, nr_pages
);
298 while (sg_alloc_table(&table
, chunks
, gfp
)) {
299 ABDSTAT_BUMP(abdstat_scatter_sg_table_retry
);
300 schedule_timeout_interruptible(1);
304 remaining_size
= size
;
305 list_for_each_entry_safe(page
, tmp_page
, &pages
, lru
) {
306 size_t sg_size
= MIN(PAGESIZE
<< compound_order(page
),
308 sg_set_page(sg
, page
, sg_size
, 0);
309 remaining_size
-= sg_size
;
312 list_del(&page
->lru
);
316 ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk
);
317 abd
->abd_flags
|= ABD_FLAG_MULTI_CHUNK
;
320 ABDSTAT_BUMP(abdstat_scatter_page_multi_zone
);
321 abd
->abd_flags
|= ABD_FLAG_MULTI_ZONE
;
325 ABD_SCATTER(abd
).abd_sgl
= table
.sgl
;
326 ABD_SCATTER(abd
).abd_nents
= table
.nents
;
330 * Allocate N individual pages to construct a scatter ABD. This function
331 * makes no attempt to request contiguous pages and requires the minimal
332 * number of kernel interfaces. It's designed for maximum compatibility.
335 abd_alloc_pages(abd_t
*abd
, size_t size
)
337 struct scatterlist
*sg
= NULL
;
338 struct sg_table table
;
340 gfp_t gfp
= __GFP_NOWARN
| GFP_NOIO
;
341 int nr_pages
= abd_chunkcnt_for_bytes(size
);
344 while (sg_alloc_table(&table
, nr_pages
, gfp
)) {
345 ABDSTAT_BUMP(abdstat_scatter_sg_table_retry
);
346 schedule_timeout_interruptible(1);
349 ASSERT3U(table
.nents
, ==, nr_pages
);
350 ABD_SCATTER(abd
).abd_sgl
= table
.sgl
;
351 ABD_SCATTER(abd
).abd_nents
= nr_pages
;
353 abd_for_each_sg(abd
, sg
, nr_pages
, i
) {
354 while ((page
= __page_cache_alloc(gfp
)) == NULL
) {
355 ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry
);
356 schedule_timeout_interruptible(1);
359 ABDSTAT_BUMP(abdstat_scatter_orders
[0]);
360 sg_set_page(sg
, page
, PAGESIZE
, 0);
364 ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk
);
365 abd
->abd_flags
|= ABD_FLAG_MULTI_CHUNK
;
368 #endif /* !CONFIG_HIGHMEM */
371 abd_free_pages(abd_t
*abd
)
373 struct scatterlist
*sg
= NULL
;
374 struct sg_table table
;
376 int nr_pages
= ABD_SCATTER(abd
).abd_nents
;
379 if (abd
->abd_flags
& ABD_FLAG_MULTI_ZONE
)
380 ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone
);
382 if (abd
->abd_flags
& ABD_FLAG_MULTI_CHUNK
)
383 ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk
);
385 abd_for_each_sg(abd
, sg
, nr_pages
, i
) {
387 order
= compound_order(page
);
388 __free_pages(page
, order
);
389 ASSERT3U(sg
->length
, <=, PAGE_SIZE
<< order
);
390 ABDSTAT_BUMPDOWN(abdstat_scatter_orders
[order
]);
393 table
.sgl
= ABD_SCATTER(abd
).abd_sgl
;
394 table
.nents
= table
.orig_nents
= nr_pages
;
395 sg_free_table(&table
);
401 #define PAGE_SHIFT (highbit64(PAGESIZE)-1)
407 #define abd_alloc_chunk(o) \
408 ((struct page *)umem_alloc_aligned(PAGESIZE << (o), 64, KM_SLEEP))
409 #define abd_free_chunk(chunk, o) umem_free(chunk, PAGESIZE << (o))
410 #define zfs_kmap_atomic(chunk, km) ((void *)chunk)
411 #define zfs_kunmap_atomic(addr, km) do { (void)(addr); } while (0)
412 #define local_irq_save(flags) do { (void)(flags); } while (0)
413 #define local_irq_restore(flags) do { (void)(flags); } while (0)
414 #define nth_page(pg, i) \
415 ((struct page *)((void *)(pg) + (i) * PAGESIZE))
424 sg_init_table(struct scatterlist
*sg
, int nr
)
426 memset(sg
, 0, nr
* sizeof (struct scatterlist
));
430 #define for_each_sg(sgl, sg, nr, i) \
431 for ((i) = 0, (sg) = (sgl); (i) < (nr); (i)++, (sg) = sg_next(sg))
434 sg_set_page(struct scatterlist
*sg
, struct page
*page
, unsigned int len
,
437 /* currently we don't use offset */
443 static inline struct page
*
444 sg_page(struct scatterlist
*sg
)
449 static inline struct scatterlist
*
450 sg_next(struct scatterlist
*sg
)
459 abd_alloc_pages(abd_t
*abd
, size_t size
)
461 unsigned nr_pages
= abd_chunkcnt_for_bytes(size
);
462 struct scatterlist
*sg
;
465 ABD_SCATTER(abd
).abd_sgl
= vmem_alloc(nr_pages
*
466 sizeof (struct scatterlist
), KM_SLEEP
);
467 sg_init_table(ABD_SCATTER(abd
).abd_sgl
, nr_pages
);
469 abd_for_each_sg(abd
, sg
, nr_pages
, i
) {
470 struct page
*p
= abd_alloc_chunk(0);
471 sg_set_page(sg
, p
, PAGESIZE
, 0);
473 ABD_SCATTER(abd
).abd_nents
= nr_pages
;
477 abd_free_pages(abd_t
*abd
)
479 int i
, n
= ABD_SCATTER(abd
).abd_nents
;
480 struct scatterlist
*sg
;
483 abd_for_each_sg(abd
, sg
, n
, i
) {
484 for (j
= 0; j
< sg
->length
; j
+= PAGESIZE
) {
485 struct page
*p
= nth_page(sg_page(sg
), j
>>PAGE_SHIFT
);
486 abd_free_chunk(p
, 0);
490 vmem_free(ABD_SCATTER(abd
).abd_sgl
, n
* sizeof (struct scatterlist
));
500 abd_cache
= kmem_cache_create("abd_t", sizeof (abd_t
),
501 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
503 abd_ksp
= kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED
,
504 sizeof (abd_stats
) / sizeof (kstat_named_t
), KSTAT_FLAG_VIRTUAL
);
505 if (abd_ksp
!= NULL
) {
506 abd_ksp
->ks_data
= &abd_stats
;
507 kstat_install(abd_ksp
);
509 for (i
= 0; i
< MAX_ORDER
; i
++) {
510 snprintf(abd_stats
.abdstat_scatter_orders
[i
].name
,
511 KSTAT_STRLEN
, "scatter_order_%d", i
);
512 abd_stats
.abdstat_scatter_orders
[i
].data_type
=
521 if (abd_ksp
!= NULL
) {
522 kstat_delete(abd_ksp
);
527 kmem_cache_destroy(abd_cache
);
533 abd_verify(abd_t
*abd
)
535 ASSERT3U(abd
->abd_size
, >, 0);
536 ASSERT3U(abd
->abd_size
, <=, SPA_MAXBLOCKSIZE
);
537 ASSERT3U(abd
->abd_flags
, ==, abd
->abd_flags
& (ABD_FLAG_LINEAR
|
538 ABD_FLAG_OWNER
| ABD_FLAG_META
| ABD_FLAG_MULTI_ZONE
|
539 ABD_FLAG_MULTI_CHUNK
));
540 IMPLY(abd
->abd_parent
!= NULL
, !(abd
->abd_flags
& ABD_FLAG_OWNER
));
541 IMPLY(abd
->abd_flags
& ABD_FLAG_META
, abd
->abd_flags
& ABD_FLAG_OWNER
);
542 if (abd_is_linear(abd
)) {
543 ASSERT3P(abd
->abd_u
.abd_linear
.abd_buf
, !=, NULL
);
547 struct scatterlist
*sg
= NULL
;
549 ASSERT3U(ABD_SCATTER(abd
).abd_nents
, >, 0);
550 ASSERT3U(ABD_SCATTER(abd
).abd_offset
, <,
551 ABD_SCATTER(abd
).abd_sgl
->length
);
552 n
= ABD_SCATTER(abd
).abd_nents
;
553 abd_for_each_sg(abd
, sg
, n
, i
) {
554 ASSERT3P(sg_page(sg
), !=, NULL
);
559 static inline abd_t
*
560 abd_alloc_struct(void)
562 abd_t
*abd
= kmem_cache_alloc(abd_cache
, KM_PUSHPAGE
);
564 ASSERT3P(abd
, !=, NULL
);
565 ABDSTAT_INCR(abdstat_struct_size
, sizeof (abd_t
));
571 abd_free_struct(abd_t
*abd
)
573 kmem_cache_free(abd_cache
, abd
);
574 ABDSTAT_INCR(abdstat_struct_size
, -(int)sizeof (abd_t
));
578 * Allocate an ABD, along with its own underlying data buffers. Use this if you
579 * don't care whether the ABD is linear or not.
582 abd_alloc(size_t size
, boolean_t is_metadata
)
584 if (!zfs_abd_scatter_enabled
|| size
<= PAGESIZE
)
585 return (abd_alloc_linear(size
, is_metadata
));
587 VERIFY3U(size
, <=, SPA_MAXBLOCKSIZE
);
589 abd_t
*abd
= abd_alloc_struct();
590 abd
->abd_flags
= ABD_FLAG_OWNER
;
591 abd_alloc_pages(abd
, size
);
594 abd
->abd_flags
|= ABD_FLAG_META
;
596 abd
->abd_size
= size
;
597 abd
->abd_parent
= NULL
;
598 zfs_refcount_create(&abd
->abd_children
);
600 abd
->abd_u
.abd_scatter
.abd_offset
= 0;
602 ABDSTAT_BUMP(abdstat_scatter_cnt
);
603 ABDSTAT_INCR(abdstat_scatter_data_size
, size
);
604 ABDSTAT_INCR(abdstat_scatter_chunk_waste
,
605 P2ROUNDUP(size
, PAGESIZE
) - size
);
611 abd_free_scatter(abd_t
*abd
)
615 zfs_refcount_destroy(&abd
->abd_children
);
616 ABDSTAT_BUMPDOWN(abdstat_scatter_cnt
);
617 ABDSTAT_INCR(abdstat_scatter_data_size
, -(int)abd
->abd_size
);
618 ABDSTAT_INCR(abdstat_scatter_chunk_waste
,
619 (int)abd
->abd_size
- (int)P2ROUNDUP(abd
->abd_size
, PAGESIZE
));
621 abd_free_struct(abd
);
625 * Allocate an ABD that must be linear, along with its own underlying data
626 * buffer. Only use this when it would be very annoying to write your ABD
627 * consumer with a scattered ABD.
630 abd_alloc_linear(size_t size
, boolean_t is_metadata
)
632 abd_t
*abd
= abd_alloc_struct();
634 VERIFY3U(size
, <=, SPA_MAXBLOCKSIZE
);
636 abd
->abd_flags
= ABD_FLAG_LINEAR
| ABD_FLAG_OWNER
;
638 abd
->abd_flags
|= ABD_FLAG_META
;
640 abd
->abd_size
= size
;
641 abd
->abd_parent
= NULL
;
642 zfs_refcount_create(&abd
->abd_children
);
645 abd
->abd_u
.abd_linear
.abd_buf
= zio_buf_alloc(size
);
647 abd
->abd_u
.abd_linear
.abd_buf
= zio_data_buf_alloc(size
);
650 ABDSTAT_BUMP(abdstat_linear_cnt
);
651 ABDSTAT_INCR(abdstat_linear_data_size
, size
);
657 abd_free_linear(abd_t
*abd
)
659 if (abd
->abd_flags
& ABD_FLAG_META
) {
660 zio_buf_free(abd
->abd_u
.abd_linear
.abd_buf
, abd
->abd_size
);
662 zio_data_buf_free(abd
->abd_u
.abd_linear
.abd_buf
, abd
->abd_size
);
665 zfs_refcount_destroy(&abd
->abd_children
);
666 ABDSTAT_BUMPDOWN(abdstat_linear_cnt
);
667 ABDSTAT_INCR(abdstat_linear_data_size
, -(int)abd
->abd_size
);
669 abd_free_struct(abd
);
673 * Free an ABD. Only use this on ABDs allocated with abd_alloc() or
674 * abd_alloc_linear().
680 ASSERT3P(abd
->abd_parent
, ==, NULL
);
681 ASSERT(abd
->abd_flags
& ABD_FLAG_OWNER
);
682 if (abd_is_linear(abd
))
683 abd_free_linear(abd
);
685 abd_free_scatter(abd
);
689 * Allocate an ABD of the same format (same metadata flag, same scatterize
690 * setting) as another ABD.
693 abd_alloc_sametype(abd_t
*sabd
, size_t size
)
695 boolean_t is_metadata
= (sabd
->abd_flags
& ABD_FLAG_META
) != 0;
696 if (abd_is_linear(sabd
)) {
697 return (abd_alloc_linear(size
, is_metadata
));
699 return (abd_alloc(size
, is_metadata
));
704 * If we're going to use this ABD for doing I/O using the block layer, the
705 * consumer of the ABD data doesn't care if it's scattered or not, and we don't
706 * plan to store this ABD in memory for a long period of time, we should
707 * allocate the ABD type that requires the least data copying to do the I/O.
709 * On Illumos this is linear ABDs, however if ldi_strategy() can ever issue I/Os
710 * using a scatter/gather list we should switch to that and replace this call
711 * with vanilla abd_alloc().
713 * On Linux the optimal thing to do would be to use abd_get_offset() and
714 * construct a new ABD which shares the original pages thereby eliminating
715 * the copy. But for the moment a new linear ABD is allocated until this
716 * performance optimization can be implemented.
719 abd_alloc_for_io(size_t size
, boolean_t is_metadata
)
721 return (abd_alloc(size
, is_metadata
));
725 * Allocate a new ABD to point to offset off of sabd. It shares the underlying
726 * buffer data with sabd. Use abd_put() to free. sabd must not be freed while
727 * any derived ABDs exist.
729 static inline abd_t
*
730 abd_get_offset_impl(abd_t
*sabd
, size_t off
, size_t size
)
735 ASSERT3U(off
, <=, sabd
->abd_size
);
737 if (abd_is_linear(sabd
)) {
738 abd
= abd_alloc_struct();
741 * Even if this buf is filesystem metadata, we only track that
742 * if we own the underlying data buffer, which is not true in
743 * this case. Therefore, we don't ever use ABD_FLAG_META here.
745 abd
->abd_flags
= ABD_FLAG_LINEAR
;
747 abd
->abd_u
.abd_linear
.abd_buf
=
748 (char *)sabd
->abd_u
.abd_linear
.abd_buf
+ off
;
751 struct scatterlist
*sg
= NULL
;
752 size_t new_offset
= sabd
->abd_u
.abd_scatter
.abd_offset
+ off
;
754 abd
= abd_alloc_struct();
757 * Even if this buf is filesystem metadata, we only track that
758 * if we own the underlying data buffer, which is not true in
759 * this case. Therefore, we don't ever use ABD_FLAG_META here.
763 abd_for_each_sg(sabd
, sg
, ABD_SCATTER(sabd
).abd_nents
, i
) {
764 if (new_offset
< sg
->length
)
766 new_offset
-= sg
->length
;
769 ABD_SCATTER(abd
).abd_sgl
= sg
;
770 ABD_SCATTER(abd
).abd_offset
= new_offset
;
771 ABD_SCATTER(abd
).abd_nents
= ABD_SCATTER(sabd
).abd_nents
- i
;
774 abd
->abd_size
= size
;
775 abd
->abd_parent
= sabd
;
776 zfs_refcount_create(&abd
->abd_children
);
777 (void) zfs_refcount_add_many(&sabd
->abd_children
, abd
->abd_size
, abd
);
783 abd_get_offset(abd_t
*sabd
, size_t off
)
785 size_t size
= sabd
->abd_size
> off
? sabd
->abd_size
- off
: 0;
787 VERIFY3U(size
, >, 0);
789 return (abd_get_offset_impl(sabd
, off
, size
));
793 abd_get_offset_size(abd_t
*sabd
, size_t off
, size_t size
)
795 ASSERT3U(off
+ size
, <=, sabd
->abd_size
);
797 return (abd_get_offset_impl(sabd
, off
, size
));
801 * Allocate a linear ABD structure for buf. You must free this with abd_put()
802 * since the resulting ABD doesn't own its own buffer.
805 abd_get_from_buf(void *buf
, size_t size
)
807 abd_t
*abd
= abd_alloc_struct();
809 VERIFY3U(size
, <=, SPA_MAXBLOCKSIZE
);
812 * Even if this buf is filesystem metadata, we only track that if we
813 * own the underlying data buffer, which is not true in this case.
814 * Therefore, we don't ever use ABD_FLAG_META here.
816 abd
->abd_flags
= ABD_FLAG_LINEAR
;
817 abd
->abd_size
= size
;
818 abd
->abd_parent
= NULL
;
819 zfs_refcount_create(&abd
->abd_children
);
821 abd
->abd_u
.abd_linear
.abd_buf
= buf
;
827 * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not
828 * free the underlying scatterlist or buffer.
834 ASSERT(!(abd
->abd_flags
& ABD_FLAG_OWNER
));
836 if (abd
->abd_parent
!= NULL
) {
837 (void) zfs_refcount_remove_many(&abd
->abd_parent
->abd_children
,
841 zfs_refcount_destroy(&abd
->abd_children
);
842 abd_free_struct(abd
);
846 * Get the raw buffer associated with a linear ABD.
849 abd_to_buf(abd_t
*abd
)
851 ASSERT(abd_is_linear(abd
));
853 return (abd
->abd_u
.abd_linear
.abd_buf
);
857 * Borrow a raw buffer from an ABD without copying the contents of the ABD
858 * into the buffer. If the ABD is scattered, this will allocate a raw buffer
859 * whose contents are undefined. To copy over the existing data in the ABD, use
860 * abd_borrow_buf_copy() instead.
863 abd_borrow_buf(abd_t
*abd
, size_t n
)
867 ASSERT3U(abd
->abd_size
, >=, n
);
868 if (abd_is_linear(abd
)) {
869 buf
= abd_to_buf(abd
);
871 buf
= zio_buf_alloc(n
);
873 (void) zfs_refcount_add_many(&abd
->abd_children
, n
, buf
);
879 abd_borrow_buf_copy(abd_t
*abd
, size_t n
)
881 void *buf
= abd_borrow_buf(abd
, n
);
882 if (!abd_is_linear(abd
)) {
883 abd_copy_to_buf(buf
, abd
, n
);
889 * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
890 * not change the contents of the ABD and will ASSERT that you didn't modify
891 * the buffer since it was borrowed. If you want any changes you made to buf to
892 * be copied back to abd, use abd_return_buf_copy() instead.
895 abd_return_buf(abd_t
*abd
, void *buf
, size_t n
)
898 ASSERT3U(abd
->abd_size
, >=, n
);
899 if (abd_is_linear(abd
)) {
900 ASSERT3P(buf
, ==, abd_to_buf(abd
));
902 ASSERT0(abd_cmp_buf(abd
, buf
, n
));
903 zio_buf_free(buf
, n
);
905 (void) zfs_refcount_remove_many(&abd
->abd_children
, n
, buf
);
909 abd_return_buf_copy(abd_t
*abd
, void *buf
, size_t n
)
911 if (!abd_is_linear(abd
)) {
912 abd_copy_from_buf(abd
, buf
, n
);
914 abd_return_buf(abd
, buf
, n
);
918 * Give this ABD ownership of the buffer that it's storing. Can only be used on
919 * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
920 * with abd_alloc_linear() which subsequently released ownership of their buf
921 * with abd_release_ownership_of_buf().
924 abd_take_ownership_of_buf(abd_t
*abd
, boolean_t is_metadata
)
926 ASSERT(abd_is_linear(abd
));
927 ASSERT(!(abd
->abd_flags
& ABD_FLAG_OWNER
));
930 abd
->abd_flags
|= ABD_FLAG_OWNER
;
932 abd
->abd_flags
|= ABD_FLAG_META
;
935 ABDSTAT_BUMP(abdstat_linear_cnt
);
936 ABDSTAT_INCR(abdstat_linear_data_size
, abd
->abd_size
);
940 abd_release_ownership_of_buf(abd_t
*abd
)
942 ASSERT(abd_is_linear(abd
));
943 ASSERT(abd
->abd_flags
& ABD_FLAG_OWNER
);
946 abd
->abd_flags
&= ~ABD_FLAG_OWNER
;
947 /* Disable this flag since we no longer own the data buffer */
948 abd
->abd_flags
&= ~ABD_FLAG_META
;
950 ABDSTAT_BUMPDOWN(abdstat_linear_cnt
);
951 ABDSTAT_INCR(abdstat_linear_data_size
, -(int)abd
->abd_size
);
954 #ifndef HAVE_1ARG_KMAP_ATOMIC
955 #define NR_KM_TYPE (6)
957 int km_table
[NR_KM_TYPE
] = {
969 /* public interface */
970 void *iter_mapaddr
; /* addr corresponding to iter_pos */
971 size_t iter_mapsize
; /* length of data valid at mapaddr */
974 abd_t
*iter_abd
; /* ABD being iterated through */
976 size_t iter_offset
; /* offset in current sg/abd_buf, */
977 /* abd_offset included */
978 struct scatterlist
*iter_sg
; /* current sg */
979 #ifndef HAVE_1ARG_KMAP_ATOMIC
980 int iter_km
; /* KM_* for kmap_atomic */
985 * Initialize the abd_iter.
988 abd_iter_init(struct abd_iter
*aiter
, abd_t
*abd
, int km_type
)
991 aiter
->iter_abd
= abd
;
992 aiter
->iter_mapaddr
= NULL
;
993 aiter
->iter_mapsize
= 0;
995 if (abd_is_linear(abd
)) {
996 aiter
->iter_offset
= 0;
997 aiter
->iter_sg
= NULL
;
999 aiter
->iter_offset
= ABD_SCATTER(abd
).abd_offset
;
1000 aiter
->iter_sg
= ABD_SCATTER(abd
).abd_sgl
;
1002 #ifndef HAVE_1ARG_KMAP_ATOMIC
1003 ASSERT3U(km_type
, <, NR_KM_TYPE
);
1004 aiter
->iter_km
= km_type
;
1009 * Advance the iterator by a certain amount. Cannot be called when a chunk is
1010 * in use. This can be safely called when the aiter has already exhausted, in
1011 * which case this does nothing.
1014 abd_iter_advance(struct abd_iter
*aiter
, size_t amount
)
1016 ASSERT3P(aiter
->iter_mapaddr
, ==, NULL
);
1017 ASSERT0(aiter
->iter_mapsize
);
1019 /* There's nothing left to advance to, so do nothing */
1020 if (aiter
->iter_pos
== aiter
->iter_abd
->abd_size
)
1023 aiter
->iter_pos
+= amount
;
1024 aiter
->iter_offset
+= amount
;
1025 if (!abd_is_linear(aiter
->iter_abd
)) {
1026 while (aiter
->iter_offset
>= aiter
->iter_sg
->length
) {
1027 aiter
->iter_offset
-= aiter
->iter_sg
->length
;
1028 aiter
->iter_sg
= sg_next(aiter
->iter_sg
);
1029 if (aiter
->iter_sg
== NULL
) {
1030 ASSERT0(aiter
->iter_offset
);
1038 * Map the current chunk into aiter. This can be safely called when the aiter
1039 * has already exhausted, in which case this does nothing.
1042 abd_iter_map(struct abd_iter
*aiter
)
1047 ASSERT3P(aiter
->iter_mapaddr
, ==, NULL
);
1048 ASSERT0(aiter
->iter_mapsize
);
1050 /* There's nothing left to iterate over, so do nothing */
1051 if (aiter
->iter_pos
== aiter
->iter_abd
->abd_size
)
1054 if (abd_is_linear(aiter
->iter_abd
)) {
1055 ASSERT3U(aiter
->iter_pos
, ==, aiter
->iter_offset
);
1056 offset
= aiter
->iter_offset
;
1057 aiter
->iter_mapsize
= aiter
->iter_abd
->abd_size
- offset
;
1058 paddr
= aiter
->iter_abd
->abd_u
.abd_linear
.abd_buf
;
1060 offset
= aiter
->iter_offset
;
1061 aiter
->iter_mapsize
= MIN(aiter
->iter_sg
->length
- offset
,
1062 aiter
->iter_abd
->abd_size
- aiter
->iter_pos
);
1064 paddr
= zfs_kmap_atomic(sg_page(aiter
->iter_sg
),
1065 km_table
[aiter
->iter_km
]);
1068 aiter
->iter_mapaddr
= (char *)paddr
+ offset
;
1072 * Unmap the current chunk from aiter. This can be safely called when the aiter
1073 * has already exhausted, in which case this does nothing.
1076 abd_iter_unmap(struct abd_iter
*aiter
)
1078 /* There's nothing left to unmap, so do nothing */
1079 if (aiter
->iter_pos
== aiter
->iter_abd
->abd_size
)
1082 if (!abd_is_linear(aiter
->iter_abd
)) {
1083 /* LINTED E_FUNC_SET_NOT_USED */
1084 zfs_kunmap_atomic(aiter
->iter_mapaddr
- aiter
->iter_offset
,
1085 km_table
[aiter
->iter_km
]);
1088 ASSERT3P(aiter
->iter_mapaddr
, !=, NULL
);
1089 ASSERT3U(aiter
->iter_mapsize
, >, 0);
1091 aiter
->iter_mapaddr
= NULL
;
1092 aiter
->iter_mapsize
= 0;
1096 abd_iterate_func(abd_t
*abd
, size_t off
, size_t size
,
1097 abd_iter_func_t
*func
, void *private)
1100 struct abd_iter aiter
;
1103 ASSERT3U(off
+ size
, <=, abd
->abd_size
);
1105 abd_iter_init(&aiter
, abd
, 0);
1106 abd_iter_advance(&aiter
, off
);
1109 abd_iter_map(&aiter
);
1111 size_t len
= MIN(aiter
.iter_mapsize
, size
);
1112 ASSERT3U(len
, >, 0);
1114 ret
= func(aiter
.iter_mapaddr
, len
, private);
1116 abd_iter_unmap(&aiter
);
1122 abd_iter_advance(&aiter
, len
);
1133 abd_copy_to_buf_off_cb(void *buf
, size_t size
, void *private)
1135 struct buf_arg
*ba_ptr
= private;
1137 (void) memcpy(ba_ptr
->arg_buf
, buf
, size
);
1138 ba_ptr
->arg_buf
= (char *)ba_ptr
->arg_buf
+ size
;
1144 * Copy abd to buf. (off is the offset in abd.)
1147 abd_copy_to_buf_off(void *buf
, abd_t
*abd
, size_t off
, size_t size
)
1149 struct buf_arg ba_ptr
= { buf
};
1151 (void) abd_iterate_func(abd
, off
, size
, abd_copy_to_buf_off_cb
,
1156 abd_cmp_buf_off_cb(void *buf
, size_t size
, void *private)
1159 struct buf_arg
*ba_ptr
= private;
1161 ret
= memcmp(buf
, ba_ptr
->arg_buf
, size
);
1162 ba_ptr
->arg_buf
= (char *)ba_ptr
->arg_buf
+ size
;
1168 * Compare the contents of abd to buf. (off is the offset in abd.)
1171 abd_cmp_buf_off(abd_t
*abd
, const void *buf
, size_t off
, size_t size
)
1173 struct buf_arg ba_ptr
= { (void *) buf
};
1175 return (abd_iterate_func(abd
, off
, size
, abd_cmp_buf_off_cb
, &ba_ptr
));
1179 abd_copy_from_buf_off_cb(void *buf
, size_t size
, void *private)
1181 struct buf_arg
*ba_ptr
= private;
1183 (void) memcpy(buf
, ba_ptr
->arg_buf
, size
);
1184 ba_ptr
->arg_buf
= (char *)ba_ptr
->arg_buf
+ size
;
1190 * Copy from buf to abd. (off is the offset in abd.)
1193 abd_copy_from_buf_off(abd_t
*abd
, const void *buf
, size_t off
, size_t size
)
1195 struct buf_arg ba_ptr
= { (void *) buf
};
1197 (void) abd_iterate_func(abd
, off
, size
, abd_copy_from_buf_off_cb
,
1203 abd_zero_off_cb(void *buf
, size_t size
, void *private)
1205 (void) memset(buf
, 0, size
);
1210 * Zero out the abd from a particular offset to the end.
1213 abd_zero_off(abd_t
*abd
, size_t off
, size_t size
)
1215 (void) abd_iterate_func(abd
, off
, size
, abd_zero_off_cb
, NULL
);
1219 * Iterate over two ABDs and call func incrementally on the two ABDs' data in
1220 * equal-sized chunks (passed to func as raw buffers). func could be called many
1221 * times during this iteration.
1224 abd_iterate_func2(abd_t
*dabd
, abd_t
*sabd
, size_t doff
, size_t soff
,
1225 size_t size
, abd_iter_func2_t
*func
, void *private)
1228 struct abd_iter daiter
, saiter
;
1233 ASSERT3U(doff
+ size
, <=, dabd
->abd_size
);
1234 ASSERT3U(soff
+ size
, <=, sabd
->abd_size
);
1236 abd_iter_init(&daiter
, dabd
, 0);
1237 abd_iter_init(&saiter
, sabd
, 1);
1238 abd_iter_advance(&daiter
, doff
);
1239 abd_iter_advance(&saiter
, soff
);
1242 abd_iter_map(&daiter
);
1243 abd_iter_map(&saiter
);
1245 size_t dlen
= MIN(daiter
.iter_mapsize
, size
);
1246 size_t slen
= MIN(saiter
.iter_mapsize
, size
);
1247 size_t len
= MIN(dlen
, slen
);
1248 ASSERT(dlen
> 0 || slen
> 0);
1250 ret
= func(daiter
.iter_mapaddr
, saiter
.iter_mapaddr
, len
,
1253 abd_iter_unmap(&saiter
);
1254 abd_iter_unmap(&daiter
);
1260 abd_iter_advance(&daiter
, len
);
1261 abd_iter_advance(&saiter
, len
);
1269 abd_copy_off_cb(void *dbuf
, void *sbuf
, size_t size
, void *private)
1271 (void) memcpy(dbuf
, sbuf
, size
);
1276 * Copy from sabd to dabd starting from soff and doff.
1279 abd_copy_off(abd_t
*dabd
, abd_t
*sabd
, size_t doff
, size_t soff
, size_t size
)
1281 (void) abd_iterate_func2(dabd
, sabd
, doff
, soff
, size
,
1282 abd_copy_off_cb
, NULL
);
1287 abd_cmp_cb(void *bufa
, void *bufb
, size_t size
, void *private)
1289 return (memcmp(bufa
, bufb
, size
));
1293 * Compares the contents of two ABDs.
1296 abd_cmp(abd_t
*dabd
, abd_t
*sabd
)
1298 ASSERT3U(dabd
->abd_size
, ==, sabd
->abd_size
);
1299 return (abd_iterate_func2(dabd
, sabd
, 0, 0, dabd
->abd_size
,
1304 * Iterate over code ABDs and a data ABD and call @func_raidz_gen.
1306 * @cabds parity ABDs, must have equal size
1307 * @dabd data ABD. Can be NULL (in this case @dsize = 0)
1308 * @func_raidz_gen should be implemented so that its behaviour
1309 * is the same when taking linear and when taking scatter
1312 abd_raidz_gen_iterate(abd_t
**cabds
, abd_t
*dabd
,
1313 ssize_t csize
, ssize_t dsize
, const unsigned parity
,
1314 void (*func_raidz_gen
)(void **, const void *, size_t, size_t))
1318 struct abd_iter caiters
[3];
1319 struct abd_iter daiter
= {0};
1321 unsigned long flags
;
1323 ASSERT3U(parity
, <=, 3);
1325 for (i
= 0; i
< parity
; i
++)
1326 abd_iter_init(&caiters
[i
], cabds
[i
], i
);
1329 abd_iter_init(&daiter
, dabd
, i
);
1331 ASSERT3S(dsize
, >=, 0);
1333 local_irq_save(flags
);
1337 if (dabd
&& dsize
> 0)
1338 abd_iter_map(&daiter
);
1340 for (i
= 0; i
< parity
; i
++) {
1341 abd_iter_map(&caiters
[i
]);
1342 caddrs
[i
] = caiters
[i
].iter_mapaddr
;
1347 len
= MIN(caiters
[2].iter_mapsize
, len
);
1349 len
= MIN(caiters
[1].iter_mapsize
, len
);
1351 len
= MIN(caiters
[0].iter_mapsize
, len
);
1354 /* must be progressive */
1355 ASSERT3S(len
, >, 0);
1357 if (dabd
&& dsize
> 0) {
1358 /* this needs precise iter.length */
1359 len
= MIN(daiter
.iter_mapsize
, len
);
1364 /* must be progressive */
1365 ASSERT3S(len
, >, 0);
1367 * The iterated function likely will not do well if each
1368 * segment except the last one is not multiple of 512 (raidz).
1370 ASSERT3U(((uint64_t)len
& 511ULL), ==, 0);
1372 func_raidz_gen(caddrs
, daiter
.iter_mapaddr
, len
, dlen
);
1374 for (i
= parity
-1; i
>= 0; i
--) {
1375 abd_iter_unmap(&caiters
[i
]);
1376 abd_iter_advance(&caiters
[i
], len
);
1379 if (dabd
&& dsize
> 0) {
1380 abd_iter_unmap(&daiter
);
1381 abd_iter_advance(&daiter
, dlen
);
1387 ASSERT3S(dsize
, >=, 0);
1388 ASSERT3S(csize
, >=, 0);
1390 local_irq_restore(flags
);
1394 * Iterate over code ABDs and data reconstruction target ABDs and call
1395 * @func_raidz_rec. Function maps at most 6 pages atomically.
1397 * @cabds parity ABDs, must have equal size
1398 * @tabds rec target ABDs, at most 3
1399 * @tsize size of data target columns
1400 * @func_raidz_rec expects syndrome data in target columns. Function
1401 * reconstructs data and overwrites target columns.
1404 abd_raidz_rec_iterate(abd_t
**cabds
, abd_t
**tabds
,
1405 ssize_t tsize
, const unsigned parity
,
1406 void (*func_raidz_rec
)(void **t
, const size_t tsize
, void **c
,
1407 const unsigned *mul
),
1408 const unsigned *mul
)
1412 struct abd_iter citers
[3];
1413 struct abd_iter xiters
[3];
1414 void *caddrs
[3], *xaddrs
[3];
1415 unsigned long flags
;
1417 ASSERT3U(parity
, <=, 3);
1419 for (i
= 0; i
< parity
; i
++) {
1420 abd_iter_init(&citers
[i
], cabds
[i
], 2*i
);
1421 abd_iter_init(&xiters
[i
], tabds
[i
], 2*i
+1);
1424 local_irq_save(flags
);
1427 for (i
= 0; i
< parity
; i
++) {
1428 abd_iter_map(&citers
[i
]);
1429 abd_iter_map(&xiters
[i
]);
1430 caddrs
[i
] = citers
[i
].iter_mapaddr
;
1431 xaddrs
[i
] = xiters
[i
].iter_mapaddr
;
1437 len
= MIN(xiters
[2].iter_mapsize
, len
);
1438 len
= MIN(citers
[2].iter_mapsize
, len
);
1440 len
= MIN(xiters
[1].iter_mapsize
, len
);
1441 len
= MIN(citers
[1].iter_mapsize
, len
);
1443 len
= MIN(xiters
[0].iter_mapsize
, len
);
1444 len
= MIN(citers
[0].iter_mapsize
, len
);
1446 /* must be progressive */
1447 ASSERT3S(len
, >, 0);
1449 * The iterated function likely will not do well if each
1450 * segment except the last one is not multiple of 512 (raidz).
1452 ASSERT3U(((uint64_t)len
& 511ULL), ==, 0);
1454 func_raidz_rec(xaddrs
, len
, caddrs
, mul
);
1456 for (i
= parity
-1; i
>= 0; i
--) {
1457 abd_iter_unmap(&xiters
[i
]);
1458 abd_iter_unmap(&citers
[i
]);
1459 abd_iter_advance(&xiters
[i
], len
);
1460 abd_iter_advance(&citers
[i
], len
);
1464 ASSERT3S(tsize
, >=, 0);
1466 local_irq_restore(flags
);
1469 #if defined(_KERNEL)
1471 * bio_nr_pages for ABD.
1472 * @off is the offset in @abd
1475 abd_nr_pages_off(abd_t
*abd
, unsigned int size
, size_t off
)
1479 if (abd_is_linear(abd
))
1480 pos
= (unsigned long)abd_to_buf(abd
) + off
;
1482 pos
= abd
->abd_u
.abd_scatter
.abd_offset
+ off
;
1484 return ((pos
+ size
+ PAGESIZE
- 1) >> PAGE_SHIFT
) -
1485 (pos
>> PAGE_SHIFT
);
1489 * bio_map for scatter ABD.
1490 * @off is the offset in @abd
1491 * Remaining IO size is returned
1494 abd_scatter_bio_map_off(struct bio
*bio
, abd_t
*abd
,
1495 unsigned int io_size
, size_t off
)
1498 struct abd_iter aiter
;
1500 ASSERT(!abd_is_linear(abd
));
1501 ASSERT3U(io_size
, <=, abd
->abd_size
- off
);
1503 abd_iter_init(&aiter
, abd
, 0);
1504 abd_iter_advance(&aiter
, off
);
1506 for (i
= 0; i
< bio
->bi_max_vecs
; i
++) {
1508 size_t len
, sgoff
, pgoff
;
1509 struct scatterlist
*sg
;
1515 sgoff
= aiter
.iter_offset
;
1516 pgoff
= sgoff
& (PAGESIZE
- 1);
1517 len
= MIN(io_size
, PAGESIZE
- pgoff
);
1520 pg
= nth_page(sg_page(sg
), sgoff
>> PAGE_SHIFT
);
1521 if (bio_add_page(bio
, pg
, len
, pgoff
) != len
)
1525 abd_iter_advance(&aiter
, len
);
1531 /* Tunable Parameters */
1532 module_param(zfs_abd_scatter_enabled
, int, 0644);
1533 MODULE_PARM_DESC(zfs_abd_scatter_enabled
,
1534 "Toggle whether ABD allocations must be linear.");
1536 module_param(zfs_abd_scatter_max_order
, uint
, 0644);
1537 MODULE_PARM_DESC(zfs_abd_scatter_max_order
,
1538 "Maximum order allocation used for a scatter ABD.");