]>
Commit | Line | Data |
---|---|---|
9f0a21e6 | 1 | /* |
fc551d7e | 2 | * CDDL HEADER START |
9f0a21e6 | 3 | * |
fc551d7e BA |
4 | * The contents of this file are subject to the terms of the |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
fc551d7e BA |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
9f0a21e6 | 20 | */ |
9f0a21e6 MM |
21 | /* |
22 | * Copyright (c) 2014 by Chunwei Chen. All rights reserved. | |
fc551d7e | 23 | * Copyright (c) 2019 by Delphix. All rights reserved. |
9f0a21e6 MM |
24 | */ |
25 | ||
26 | /* | |
27 | * ARC buffer data (ABD). | |
28 | * | |
29 | * ABDs are an abstract data structure for the ARC which can use two | |
30 | * different ways of storing the underlying data: | |
31 | * | |
32 | * (a) Linear buffer. In this case, all the data in the ABD is stored in one | |
33 | * contiguous buffer in memory (from a zio_[data_]buf_* kmem cache). | |
34 | * | |
35 | * +-------------------+ | |
36 | * | ABD (linear) | | |
37 | * | abd_flags = ... | | |
38 | * | abd_size = ... | +--------------------------------+ | |
39 | * | abd_buf ------------->| raw buffer of size abd_size | | |
40 | * +-------------------+ +--------------------------------+ | |
41 | * no abd_chunks | |
42 | * | |
43 | * (b) Scattered buffer. In this case, the data in the ABD is split into | |
44 | * equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers | |
45 | * to the chunks recorded in an array at the end of the ABD structure. | |
46 | * | |
47 | * +-------------------+ | |
48 | * | ABD (scattered) | | |
49 | * | abd_flags = ... | | |
50 | * | abd_size = ... | | |
51 | * | abd_offset = 0 | +-----------+ | |
52 | * | abd_chunks[0] ----------------------------->| chunk 0 | | |
53 | * | abd_chunks[1] ---------------------+ +-----------+ | |
54 | * | ... | | +-----------+ | |
55 | * | abd_chunks[N-1] ---------+ +------->| chunk 1 | | |
56 | * +-------------------+ | +-----------+ | |
57 | * | ... | |
58 | * | +-----------+ | |
59 | * +----------------->| chunk N-1 | | |
60 | * +-----------+ | |
61 | * | |
9f0a21e6 MM |
62 | * In addition to directly allocating a linear or scattered ABD, it is also |
63 | * possible to create an ABD by requesting the "sub-ABD" starting at an offset | |
64 | * within an existing ABD. In linear buffers this is simple (set abd_buf of | |
65 | * the new ABD to the starting point within the original raw buffer), but | |
66 | * scattered ABDs are a little more complex. The new ABD makes a copy of the | |
67 | * relevant abd_chunks pointers (but not the underlying data). However, to | |
68 | * provide arbitrary rather than only chunk-aligned starting offsets, it also | |
69 | * tracks an abd_offset field which represents the starting point of the data | |
70 | * within the first chunk in abd_chunks. For both linear and scattered ABDs, | |
71 | * creating an offset ABD marks the original ABD as the offset's parent, and the | |
72 | * original ABD's abd_children refcount is incremented. This data allows us to | |
73 | * ensure the root ABD isn't deleted before its children. | |
74 | * | |
75 | * Most consumers should never need to know what type of ABD they're using -- | |
76 | * the ABD public API ensures that it's possible to transparently switch from | |
77 | * using a linear ABD to a scattered one when doing so would be beneficial. | |
78 | * | |
79 | * If you need to use the data within an ABD directly, if you know it's linear | |
80 | * (because you allocated it) you can use abd_to_buf() to access the underlying | |
81 | * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions | |
82 | * which will allocate a raw buffer if necessary. Use the abd_return_buf* | |
83 | * functions to return any raw buffers that are no longer necessary when you're | |
84 | * done using them. | |
85 | * | |
86 | * There are a variety of ABD APIs that implement basic buffer operations: | |
87 | * compare, copy, read, write, and fill with zeroes. If you need a custom | |
88 | * function which progressively accesses the whole ABD, use the abd_iterate_* | |
89 | * functions. | |
fc551d7e | 90 | * |
fb822260 BA |
91 | * As an additional feature, linear and scatter ABD's can be stitched together |
92 | * by using the gang ABD type (abd_alloc_gang_abd()). This allows for | |
93 | * multiple ABDs to be viewed as a singular ABD. | |
94 | * | |
fc551d7e BA |
95 | * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to |
96 | * B_FALSE. | |
9f0a21e6 MM |
97 | */ |
98 | ||
fc551d7e | 99 | #include <sys/abd_impl.h> |
9f0a21e6 MM |
100 | #include <sys/param.h> |
101 | #include <sys/zio.h> | |
102 | #include <sys/zfs_context.h> | |
103 | #include <sys/zfs_znode.h> | |
104 | ||
fc551d7e BA |
105 | /* see block comment above for description */ |
106 | int zfs_abd_scatter_enabled = B_TRUE; | |
9f0a21e6 | 107 | |
9f0a21e6 | 108 | void |
9f0a21e6 MM |
109 | abd_verify(abd_t *abd) |
110 | { | |
5e2c8338 | 111 | #ifdef ZFS_DEBUG |
9f0a21e6 MM |
112 | ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); |
113 | ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | | |
fc551d7e | 114 | ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE | |
fb822260 | 115 | ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG | |
e2af2acc | 116 | ABD_FLAG_GANG_FREE | ABD_FLAG_ZEROS | ABD_FLAG_ALLOCD)); |
9f0a21e6 MM |
117 | IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); |
118 | IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); | |
119 | if (abd_is_linear(abd)) { | |
bba7cbf0 | 120 | ASSERT3U(abd->abd_size, >, 0); |
fc551d7e | 121 | ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL); |
fb822260 | 122 | } else if (abd_is_gang(abd)) { |
6fba7bfd | 123 | uint_t child_sizes = 0; |
fb822260 BA |
124 | for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain); |
125 | cabd != NULL; | |
126 | cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) { | |
e4d3d776 | 127 | ASSERT(list_link_active(&cabd->abd_gang_link)); |
6fba7bfd | 128 | child_sizes += cabd->abd_size; |
fb822260 BA |
129 | abd_verify(cabd); |
130 | } | |
6fba7bfd | 131 | ASSERT3U(abd->abd_size, ==, child_sizes); |
9f0a21e6 | 132 | } else { |
bba7cbf0 | 133 | ASSERT3U(abd->abd_size, >, 0); |
fc551d7e | 134 | abd_verify_scatter(abd); |
9f0a21e6 | 135 | } |
5e2c8338 | 136 | #endif |
9f0a21e6 MM |
137 | } |
138 | ||
e2af2acc MA |
139 | static void |
140 | abd_init_struct(abd_t *abd) | |
9f0a21e6 | 141 | { |
e2af2acc MA |
142 | list_link_init(&abd->abd_gang_link); |
143 | mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL); | |
e2af2acc | 144 | abd->abd_flags = 0; |
2d4bbd14 MA |
145 | #ifdef ZFS_DEBUG |
146 | zfs_refcount_create(&abd->abd_children); | |
e2af2acc | 147 | abd->abd_parent = NULL; |
2d4bbd14 | 148 | #endif |
e2af2acc MA |
149 | abd->abd_size = 0; |
150 | } | |
151 | ||
152 | static void | |
153 | abd_fini_struct(abd_t *abd) | |
154 | { | |
155 | mutex_destroy(&abd->abd_mtx); | |
156 | ASSERT(!list_link_active(&abd->abd_gang_link)); | |
2d4bbd14 | 157 | #ifdef ZFS_DEBUG |
e2af2acc | 158 | zfs_refcount_destroy(&abd->abd_children); |
2d4bbd14 | 159 | #endif |
e2af2acc MA |
160 | } |
161 | ||
162 | abd_t * | |
163 | abd_alloc_struct(size_t size) | |
164 | { | |
165 | abd_t *abd = abd_alloc_struct_impl(size); | |
166 | abd_init_struct(abd); | |
167 | abd->abd_flags |= ABD_FLAG_ALLOCD; | |
168 | return (abd); | |
169 | } | |
170 | ||
171 | void | |
172 | abd_free_struct(abd_t *abd) | |
173 | { | |
174 | abd_fini_struct(abd); | |
175 | abd_free_struct_impl(abd); | |
9f0a21e6 MM |
176 | } |
177 | ||
178 | /* | |
179 | * Allocate an ABD, along with its own underlying data buffers. Use this if you | |
180 | * don't care whether the ABD is linear or not. | |
181 | */ | |
182 | abd_t * | |
183 | abd_alloc(size_t size, boolean_t is_metadata) | |
184 | { | |
7eebcd2b | 185 | if (abd_size_alloc_linear(size)) |
9f0a21e6 MM |
186 | return (abd_alloc_linear(size, is_metadata)); |
187 | ||
188 | VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); | |
189 | ||
fc551d7e | 190 | abd_t *abd = abd_alloc_struct(size); |
e2af2acc | 191 | abd->abd_flags |= ABD_FLAG_OWNER; |
fc551d7e BA |
192 | abd->abd_u.abd_scatter.abd_offset = 0; |
193 | abd_alloc_chunks(abd, size); | |
194 | ||
9f0a21e6 MM |
195 | if (is_metadata) { |
196 | abd->abd_flags |= ABD_FLAG_META; | |
197 | } | |
198 | abd->abd_size = size; | |
9f0a21e6 | 199 | |
fc551d7e | 200 | abd_update_scatter_stats(abd, ABDSTAT_INCR); |
9f0a21e6 MM |
201 | |
202 | return (abd); | |
203 | } | |
204 | ||
9f0a21e6 MM |
205 | /* |
206 | * Allocate an ABD that must be linear, along with its own underlying data | |
207 | * buffer. Only use this when it would be very annoying to write your ABD | |
208 | * consumer with a scattered ABD. | |
209 | */ | |
210 | abd_t * | |
211 | abd_alloc_linear(size_t size, boolean_t is_metadata) | |
212 | { | |
213 | abd_t *abd = abd_alloc_struct(0); | |
214 | ||
215 | VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); | |
216 | ||
e2af2acc | 217 | abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_OWNER; |
9f0a21e6 MM |
218 | if (is_metadata) { |
219 | abd->abd_flags |= ABD_FLAG_META; | |
220 | } | |
221 | abd->abd_size = size; | |
9f0a21e6 MM |
222 | |
223 | if (is_metadata) { | |
fc551d7e | 224 | ABD_LINEAR_BUF(abd) = zio_buf_alloc(size); |
9f0a21e6 | 225 | } else { |
fc551d7e | 226 | ABD_LINEAR_BUF(abd) = zio_data_buf_alloc(size); |
9f0a21e6 MM |
227 | } |
228 | ||
fc551d7e | 229 | abd_update_linear_stats(abd, ABDSTAT_INCR); |
9f0a21e6 MM |
230 | |
231 | return (abd); | |
232 | } | |
233 | ||
234 | static void | |
235 | abd_free_linear(abd_t *abd) | |
236 | { | |
fc551d7e BA |
237 | if (abd_is_linear_page(abd)) { |
238 | abd_free_linear_page(abd); | |
239 | return; | |
240 | } | |
9f0a21e6 | 241 | if (abd->abd_flags & ABD_FLAG_META) { |
fc551d7e | 242 | zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size); |
9f0a21e6 | 243 | } else { |
fc551d7e | 244 | zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size); |
9f0a21e6 MM |
245 | } |
246 | ||
fc551d7e | 247 | abd_update_linear_stats(abd, ABDSTAT_DECR); |
9f0a21e6 MM |
248 | } |
249 | ||
fb822260 | 250 | static void |
416015ef | 251 | abd_free_gang(abd_t *abd) |
fb822260 BA |
252 | { |
253 | ASSERT(abd_is_gang(abd)); | |
e2af2acc | 254 | abd_t *cabd; |
fb822260 | 255 | |
e2af2acc | 256 | while ((cabd = list_head(&ABD_GANG(abd).abd_gang_chain)) != NULL) { |
e4d3d776 BA |
257 | /* |
258 | * We must acquire the child ABDs mutex to ensure that if it | |
259 | * is being added to another gang ABD we will set the link | |
260 | * as inactive when removing it from this gang ABD and before | |
261 | * adding it to the other gang ABD. | |
262 | */ | |
263 | mutex_enter(&cabd->abd_mtx); | |
264 | ASSERT(list_link_active(&cabd->abd_gang_link)); | |
265 | list_remove(&ABD_GANG(abd).abd_gang_chain, cabd); | |
266 | mutex_exit(&cabd->abd_mtx); | |
e2af2acc MA |
267 | if (cabd->abd_flags & ABD_FLAG_GANG_FREE) |
268 | abd_free(cabd); | |
fb822260 | 269 | } |
fb822260 | 270 | list_destroy(&ABD_GANG(abd).abd_gang_chain); |
e2af2acc MA |
271 | } |
272 | ||
273 | static void | |
274 | abd_free_scatter(abd_t *abd) | |
275 | { | |
276 | abd_free_chunks(abd); | |
277 | abd_update_scatter_stats(abd, ABDSTAT_DECR); | |
fb822260 BA |
278 | } |
279 | ||
9f0a21e6 | 280 | /* |
e2af2acc MA |
281 | * Free an ABD. Use with any kind of abd: those created with abd_alloc_*() |
282 | * and abd_get_*(), including abd_get_offset_struct(). | |
283 | * | |
284 | * If the ABD was created with abd_alloc_*(), the underlying data | |
285 | * (scatterlist or linear buffer) will also be freed. (Subject to ownership | |
286 | * changes via abd_*_ownership_of_buf().) | |
287 | * | |
288 | * Unless the ABD was created with abd_get_offset_struct(), the abd_t will | |
289 | * also be freed. | |
9f0a21e6 MM |
290 | */ |
291 | void | |
292 | abd_free(abd_t *abd) | |
293 | { | |
294 | if (abd == NULL) | |
295 | return; | |
296 | ||
297 | abd_verify(abd); | |
2d4bbd14 | 298 | #ifdef ZFS_DEBUG |
e2af2acc | 299 | IMPLY(abd->abd_flags & ABD_FLAG_OWNER, abd->abd_parent == NULL); |
2d4bbd14 | 300 | #endif |
e2af2acc MA |
301 | |
302 | if (abd_is_gang(abd)) { | |
416015ef | 303 | abd_free_gang(abd); |
e2af2acc MA |
304 | } else if (abd_is_linear(abd)) { |
305 | if (abd->abd_flags & ABD_FLAG_OWNER) | |
306 | abd_free_linear(abd); | |
307 | } else { | |
308 | if (abd->abd_flags & ABD_FLAG_OWNER) | |
309 | abd_free_scatter(abd); | |
310 | } | |
311 | ||
2d4bbd14 | 312 | #ifdef ZFS_DEBUG |
2993698e BA |
313 | if (abd->abd_parent != NULL) { |
314 | (void) zfs_refcount_remove_many(&abd->abd_parent->abd_children, | |
315 | abd->abd_size, abd); | |
316 | } | |
2d4bbd14 | 317 | #endif |
2993698e | 318 | |
e2af2acc MA |
319 | abd_fini_struct(abd); |
320 | if (abd->abd_flags & ABD_FLAG_ALLOCD) | |
321 | abd_free_struct_impl(abd); | |
9f0a21e6 MM |
322 | } |
323 | ||
324 | /* | |
325 | * Allocate an ABD of the same format (same metadata flag, same scatterize | |
326 | * setting) as another ABD. | |
327 | */ | |
328 | abd_t * | |
329 | abd_alloc_sametype(abd_t *sabd, size_t size) | |
330 | { | |
331 | boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; | |
fc551d7e BA |
332 | if (abd_is_linear(sabd) && |
333 | !abd_is_linear_page(sabd)) { | |
9f0a21e6 MM |
334 | return (abd_alloc_linear(size, is_metadata)); |
335 | } else { | |
336 | return (abd_alloc(size, is_metadata)); | |
337 | } | |
338 | } | |
339 | ||
fb822260 BA |
340 | /* |
341 | * Create gang ABD that will be the head of a list of ABD's. This is used | |
342 | * to "chain" scatter/gather lists together when constructing aggregated | |
343 | * IO's. To free this abd, abd_free() must be called. | |
344 | */ | |
345 | abd_t * | |
e2af2acc | 346 | abd_alloc_gang(void) |
fb822260 | 347 | { |
e2af2acc MA |
348 | abd_t *abd = abd_alloc_struct(0); |
349 | abd->abd_flags |= ABD_FLAG_GANG | ABD_FLAG_OWNER; | |
fb822260 BA |
350 | list_create(&ABD_GANG(abd).abd_gang_chain, |
351 | sizeof (abd_t), offsetof(abd_t, abd_gang_link)); | |
fb822260 BA |
352 | return (abd); |
353 | } | |
354 | ||
6fba7bfd BA |
355 | /* |
356 | * Add a child gang ABD to a parent gang ABDs chained list. | |
357 | */ | |
358 | static void | |
359 | abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free) | |
360 | { | |
361 | ASSERT(abd_is_gang(pabd)); | |
362 | ASSERT(abd_is_gang(cabd)); | |
363 | ||
364 | if (free_on_free) { | |
365 | /* | |
366 | * If the parent is responsible for freeing the child gang | |
e2af2acc MA |
367 | * ABD we will just splice the child's children ABD list to |
368 | * the parent's list and immediately free the child gang ABD | |
6fba7bfd BA |
369 | * struct. The parent gang ABDs children from the child gang |
370 | * will retain all the free_on_free settings after being | |
371 | * added to the parents list. | |
372 | */ | |
190290a9 AM |
373 | #ifdef ZFS_DEBUG |
374 | /* | |
375 | * If cabd had abd_parent, we have to drop it here. We can't | |
376 | * transfer it to pabd, nor we can clear abd_size leaving it. | |
377 | */ | |
378 | if (cabd->abd_parent != NULL) { | |
379 | (void) zfs_refcount_remove_many( | |
380 | &cabd->abd_parent->abd_children, | |
381 | cabd->abd_size, cabd); | |
382 | cabd->abd_parent = NULL; | |
383 | } | |
384 | #endif | |
6fba7bfd | 385 | pabd->abd_size += cabd->abd_size; |
190290a9 | 386 | cabd->abd_size = 0; |
6fba7bfd BA |
387 | list_move_tail(&ABD_GANG(pabd).abd_gang_chain, |
388 | &ABD_GANG(cabd).abd_gang_chain); | |
389 | ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain)); | |
390 | abd_verify(pabd); | |
2993698e | 391 | abd_free(cabd); |
6fba7bfd BA |
392 | } else { |
393 | for (abd_t *child = list_head(&ABD_GANG(cabd).abd_gang_chain); | |
394 | child != NULL; | |
395 | child = list_next(&ABD_GANG(cabd).abd_gang_chain, child)) { | |
396 | /* | |
397 | * We always pass B_FALSE for free_on_free as it is the | |
bf169e9f | 398 | * original child gang ABDs responsibility to determine |
6fba7bfd BA |
399 | * if any of its child ABDs should be free'd on the call |
400 | * to abd_free(). | |
401 | */ | |
402 | abd_gang_add(pabd, child, B_FALSE); | |
403 | } | |
404 | abd_verify(pabd); | |
405 | } | |
406 | } | |
407 | ||
fb822260 BA |
408 | /* |
409 | * Add a child ABD to a gang ABD's chained list. | |
410 | */ | |
411 | void | |
412 | abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free) | |
413 | { | |
414 | ASSERT(abd_is_gang(pabd)); | |
415 | abd_t *child_abd = NULL; | |
416 | ||
6fba7bfd BA |
417 | /* |
418 | * If the child being added is a gang ABD, we will add the | |
e2af2acc | 419 | * child's ABDs to the parent gang ABD. This allows us to account |
6fba7bfd BA |
420 | * for the offset correctly in the parent gang ABD. |
421 | */ | |
422 | if (abd_is_gang(cabd)) { | |
423 | ASSERT(!list_link_active(&cabd->abd_gang_link)); | |
6fba7bfd BA |
424 | return (abd_gang_add_gang(pabd, cabd, free_on_free)); |
425 | } | |
426 | ASSERT(!abd_is_gang(cabd)); | |
427 | ||
fb822260 BA |
428 | /* |
429 | * In order to verify that an ABD is not already part of | |
430 | * another gang ABD, we must lock the child ABD's abd_mtx | |
431 | * to check its abd_gang_link status. We unlock the abd_mtx | |
432 | * only after it is has been added to a gang ABD, which | |
433 | * will update the abd_gang_link's status. See comment below | |
434 | * for how an ABD can be in multiple gang ABD's simultaneously. | |
435 | */ | |
436 | mutex_enter(&cabd->abd_mtx); | |
437 | if (list_link_active(&cabd->abd_gang_link)) { | |
438 | /* | |
439 | * If the child ABD is already part of another | |
440 | * gang ABD then we must allocate a new | |
dd4bc569 | 441 | * ABD to use a separate link. We mark the newly |
fb822260 BA |
442 | * allocated ABD with ABD_FLAG_GANG_FREE, before |
443 | * adding it to the gang ABD's list, to make the | |
444 | * gang ABD aware that it is responsible to call | |
416015ef | 445 | * abd_free(). We use abd_get_offset() in order |
fb822260 BA |
446 | * to just allocate a new ABD but avoid copying the |
447 | * data over into the newly allocated ABD. | |
448 | * | |
449 | * An ABD may become part of multiple gang ABD's. For | |
dd4bc569 | 450 | * example, when writing ditto bocks, the same ABD |
fb822260 BA |
451 | * is used to write 2 or 3 locations with 2 or 3 |
452 | * zio_t's. Each of the zio's may be aggregated with | |
453 | * different adjacent zio's. zio aggregation uses gang | |
454 | * zio's, so the single ABD can become part of multiple | |
455 | * gang zio's. | |
456 | * | |
457 | * The ASSERT below is to make sure that if | |
458 | * free_on_free is passed as B_TRUE, the ABD can | |
dd4bc569 | 459 | * not be in multiple gang ABD's. The gang ABD |
fb822260 BA |
460 | * can not be responsible for cleaning up the child |
461 | * ABD memory allocation if the ABD can be in | |
462 | * multiple gang ABD's at one time. | |
463 | */ | |
464 | ASSERT3B(free_on_free, ==, B_FALSE); | |
465 | child_abd = abd_get_offset(cabd, 0); | |
466 | child_abd->abd_flags |= ABD_FLAG_GANG_FREE; | |
467 | } else { | |
468 | child_abd = cabd; | |
469 | if (free_on_free) | |
470 | child_abd->abd_flags |= ABD_FLAG_GANG_FREE; | |
471 | } | |
472 | ASSERT3P(child_abd, !=, NULL); | |
473 | ||
474 | list_insert_tail(&ABD_GANG(pabd).abd_gang_chain, child_abd); | |
475 | mutex_exit(&cabd->abd_mtx); | |
476 | pabd->abd_size += child_abd->abd_size; | |
477 | } | |
478 | ||
479 | /* | |
480 | * Locate the ABD for the supplied offset in the gang ABD. | |
481 | * Return a new offset relative to the returned ABD. | |
482 | */ | |
483 | abd_t * | |
484 | abd_gang_get_offset(abd_t *abd, size_t *off) | |
485 | { | |
486 | abd_t *cabd; | |
487 | ||
488 | ASSERT(abd_is_gang(abd)); | |
489 | ASSERT3U(*off, <, abd->abd_size); | |
490 | for (cabd = list_head(&ABD_GANG(abd).abd_gang_chain); cabd != NULL; | |
491 | cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) { | |
492 | if (*off >= cabd->abd_size) | |
493 | *off -= cabd->abd_size; | |
494 | else | |
495 | return (cabd); | |
496 | } | |
497 | VERIFY3P(cabd, !=, NULL); | |
498 | return (cabd); | |
499 | } | |
500 | ||
9f0a21e6 | 501 | /* |
e2af2acc MA |
502 | * Allocate a new ABD, using the provided struct (if non-NULL, and if |
503 | * circumstances allow - otherwise allocate the struct). The returned ABD will | |
504 | * point to offset off of sabd. It shares the underlying buffer data with sabd. | |
505 | * Use abd_free() to free. sabd must not be freed while any derived ABDs exist. | |
9f0a21e6 | 506 | */ |
fc551d7e | 507 | static abd_t * |
e2af2acc | 508 | abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size) |
9f0a21e6 | 509 | { |
9f0a21e6 | 510 | abd_verify(sabd); |
e2af2acc | 511 | ASSERT3U(off + size, <=, sabd->abd_size); |
9f0a21e6 MM |
512 | |
513 | if (abd_is_linear(sabd)) { | |
e2af2acc MA |
514 | if (abd == NULL) |
515 | abd = abd_alloc_struct(0); | |
9f0a21e6 MM |
516 | /* |
517 | * Even if this buf is filesystem metadata, we only track that | |
518 | * if we own the underlying data buffer, which is not true in | |
519 | * this case. Therefore, we don't ever use ABD_FLAG_META here. | |
520 | */ | |
e2af2acc | 521 | abd->abd_flags |= ABD_FLAG_LINEAR; |
9f0a21e6 | 522 | |
fc551d7e | 523 | ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off; |
fb822260 BA |
524 | } else if (abd_is_gang(sabd)) { |
525 | size_t left = size; | |
e2af2acc MA |
526 | if (abd == NULL) { |
527 | abd = abd_alloc_gang(); | |
528 | } else { | |
529 | abd->abd_flags |= ABD_FLAG_GANG; | |
530 | list_create(&ABD_GANG(abd).abd_gang_chain, | |
531 | sizeof (abd_t), offsetof(abd_t, abd_gang_link)); | |
532 | } | |
533 | ||
fb822260 BA |
534 | abd->abd_flags &= ~ABD_FLAG_OWNER; |
535 | for (abd_t *cabd = abd_gang_get_offset(sabd, &off); | |
536 | cabd != NULL && left > 0; | |
537 | cabd = list_next(&ABD_GANG(sabd).abd_gang_chain, cabd)) { | |
538 | int csize = MIN(left, cabd->abd_size - off); | |
539 | ||
e2af2acc MA |
540 | abd_t *nabd = abd_get_offset_size(cabd, off, csize); |
541 | abd_gang_add(abd, nabd, B_TRUE); | |
fb822260 BA |
542 | left -= csize; |
543 | off = 0; | |
544 | } | |
545 | ASSERT3U(left, ==, 0); | |
9f0a21e6 | 546 | } else { |
c6d1112b | 547 | abd = abd_get_offset_scatter(abd, sabd, off, size); |
9f0a21e6 MM |
548 | } |
549 | ||
4a90d4d6 | 550 | ASSERT3P(abd, !=, NULL); |
fc551d7e | 551 | abd->abd_size = size; |
2d4bbd14 | 552 | #ifdef ZFS_DEBUG |
9f0a21e6 | 553 | abd->abd_parent = sabd; |
9f0a21e6 | 554 | (void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd); |
2d4bbd14 | 555 | #endif |
9f0a21e6 MM |
556 | return (abd); |
557 | } | |
558 | ||
e2af2acc MA |
559 | /* |
560 | * Like abd_get_offset_size(), but memory for the abd_t is provided by the | |
561 | * caller. Using this routine can improve performance by avoiding the cost | |
562 | * of allocating memory for the abd_t struct, and updating the abd stats. | |
563 | * Usually, the provided abd is returned, but in some circumstances (FreeBSD, | |
564 | * if sabd is scatter and size is more than 2 pages) a new abd_t may need to | |
565 | * be allocated. Therefore callers should be careful to use the returned | |
566 | * abd_t*. | |
567 | */ | |
568 | abd_t * | |
569 | abd_get_offset_struct(abd_t *abd, abd_t *sabd, size_t off, size_t size) | |
570 | { | |
8a6d4448 | 571 | abd_t *result; |
e2af2acc | 572 | abd_init_struct(abd); |
8a6d4448 JL |
573 | result = abd_get_offset_impl(abd, sabd, off, size); |
574 | if (result != abd) | |
575 | abd_fini_struct(abd); | |
576 | return (result); | |
e2af2acc MA |
577 | } |
578 | ||
9f0a21e6 MM |
579 | abd_t * |
580 | abd_get_offset(abd_t *sabd, size_t off) | |
581 | { | |
fc551d7e BA |
582 | size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0; |
583 | VERIFY3U(size, >, 0); | |
e2af2acc | 584 | return (abd_get_offset_impl(NULL, sabd, off, size)); |
9f0a21e6 MM |
585 | } |
586 | ||
587 | abd_t * | |
588 | abd_get_offset_size(abd_t *sabd, size_t off, size_t size) | |
589 | { | |
590 | ASSERT3U(off + size, <=, sabd->abd_size); | |
e2af2acc | 591 | return (abd_get_offset_impl(NULL, sabd, off, size)); |
9f0a21e6 MM |
592 | } |
593 | ||
fb822260 | 594 | /* |
416015ef | 595 | * Return a size scatter ABD containing only zeros. |
fb822260 BA |
596 | */ |
597 | abd_t * | |
598 | abd_get_zeros(size_t size) | |
599 | { | |
600 | ASSERT3P(abd_zero_scatter, !=, NULL); | |
601 | ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); | |
602 | return (abd_get_offset_size(abd_zero_scatter, 0, size)); | |
603 | } | |
604 | ||
9f0a21e6 | 605 | /* |
416015ef | 606 | * Allocate a linear ABD structure for buf. |
9f0a21e6 MM |
607 | */ |
608 | abd_t * | |
609 | abd_get_from_buf(void *buf, size_t size) | |
610 | { | |
611 | abd_t *abd = abd_alloc_struct(0); | |
612 | ||
613 | VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); | |
614 | ||
615 | /* | |
616 | * Even if this buf is filesystem metadata, we only track that if we | |
617 | * own the underlying data buffer, which is not true in this case. | |
618 | * Therefore, we don't ever use ABD_FLAG_META here. | |
619 | */ | |
e2af2acc | 620 | abd->abd_flags |= ABD_FLAG_LINEAR; |
9f0a21e6 | 621 | abd->abd_size = size; |
9f0a21e6 | 622 | |
fc551d7e | 623 | ABD_LINEAR_BUF(abd) = buf; |
9f0a21e6 MM |
624 | |
625 | return (abd); | |
626 | } | |
627 | ||
9f0a21e6 MM |
628 | /* |
629 | * Get the raw buffer associated with a linear ABD. | |
630 | */ | |
631 | void * | |
632 | abd_to_buf(abd_t *abd) | |
633 | { | |
634 | ASSERT(abd_is_linear(abd)); | |
635 | abd_verify(abd); | |
fc551d7e | 636 | return (ABD_LINEAR_BUF(abd)); |
9f0a21e6 MM |
637 | } |
638 | ||
639 | /* | |
640 | * Borrow a raw buffer from an ABD without copying the contents of the ABD | |
641 | * into the buffer. If the ABD is scattered, this will allocate a raw buffer | |
642 | * whose contents are undefined. To copy over the existing data in the ABD, use | |
643 | * abd_borrow_buf_copy() instead. | |
644 | */ | |
645 | void * | |
646 | abd_borrow_buf(abd_t *abd, size_t n) | |
647 | { | |
648 | void *buf; | |
649 | abd_verify(abd); | |
650 | ASSERT3U(abd->abd_size, >=, n); | |
651 | if (abd_is_linear(abd)) { | |
652 | buf = abd_to_buf(abd); | |
653 | } else { | |
654 | buf = zio_buf_alloc(n); | |
655 | } | |
2d4bbd14 | 656 | #ifdef ZFS_DEBUG |
9f0a21e6 | 657 | (void) zfs_refcount_add_many(&abd->abd_children, n, buf); |
2d4bbd14 | 658 | #endif |
9f0a21e6 MM |
659 | return (buf); |
660 | } | |
661 | ||
662 | void * | |
663 | abd_borrow_buf_copy(abd_t *abd, size_t n) | |
664 | { | |
665 | void *buf = abd_borrow_buf(abd, n); | |
666 | if (!abd_is_linear(abd)) { | |
667 | abd_copy_to_buf(buf, abd, n); | |
668 | } | |
669 | return (buf); | |
670 | } | |
671 | ||
672 | /* | |
673 | * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will | |
674 | * not change the contents of the ABD and will ASSERT that you didn't modify | |
675 | * the buffer since it was borrowed. If you want any changes you made to buf to | |
676 | * be copied back to abd, use abd_return_buf_copy() instead. | |
677 | */ | |
678 | void | |
679 | abd_return_buf(abd_t *abd, void *buf, size_t n) | |
680 | { | |
681 | abd_verify(abd); | |
682 | ASSERT3U(abd->abd_size, >=, n); | |
d692e6c3 RY |
683 | #ifdef ZFS_DEBUG |
684 | (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); | |
685 | #endif | |
9f0a21e6 MM |
686 | if (abd_is_linear(abd)) { |
687 | ASSERT3P(buf, ==, abd_to_buf(abd)); | |
688 | } else { | |
689 | ASSERT0(abd_cmp_buf(abd, buf, n)); | |
690 | zio_buf_free(buf, n); | |
691 | } | |
9f0a21e6 MM |
692 | } |
693 | ||
694 | void | |
695 | abd_return_buf_copy(abd_t *abd, void *buf, size_t n) | |
696 | { | |
697 | if (!abd_is_linear(abd)) { | |
698 | abd_copy_from_buf(abd, buf, n); | |
699 | } | |
700 | abd_return_buf(abd, buf, n); | |
701 | } | |
702 | ||
9f0a21e6 MM |
703 | void |
704 | abd_release_ownership_of_buf(abd_t *abd) | |
705 | { | |
706 | ASSERT(abd_is_linear(abd)); | |
707 | ASSERT(abd->abd_flags & ABD_FLAG_OWNER); | |
fc551d7e BA |
708 | |
709 | /* | |
710 | * abd_free() needs to handle LINEAR_PAGE ABD's specially. | |
711 | * Since that flag does not survive the | |
712 | * abd_release_ownership_of_buf() -> abd_get_from_buf() -> | |
713 | * abd_take_ownership_of_buf() sequence, we don't allow releasing | |
714 | * these "linear but not zio_[data_]buf_alloc()'ed" ABD's. | |
715 | */ | |
716 | ASSERT(!abd_is_linear_page(abd)); | |
717 | ||
9f0a21e6 MM |
718 | abd_verify(abd); |
719 | ||
720 | abd->abd_flags &= ~ABD_FLAG_OWNER; | |
721 | /* Disable this flag since we no longer own the data buffer */ | |
722 | abd->abd_flags &= ~ABD_FLAG_META; | |
723 | ||
fc551d7e | 724 | abd_update_linear_stats(abd, ABDSTAT_DECR); |
9f0a21e6 MM |
725 | } |
726 | ||
9f0a21e6 MM |
727 | |
728 | /* | |
fc551d7e BA |
729 | * Give this ABD ownership of the buffer that it's storing. Can only be used on |
730 | * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated | |
731 | * with abd_alloc_linear() which subsequently released ownership of their buf | |
732 | * with abd_release_ownership_of_buf(). | |
9f0a21e6 | 733 | */ |
fc551d7e BA |
734 | void |
735 | abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata) | |
9f0a21e6 | 736 | { |
fc551d7e BA |
737 | ASSERT(abd_is_linear(abd)); |
738 | ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); | |
9f0a21e6 | 739 | abd_verify(abd); |
9f0a21e6 | 740 | |
fc551d7e BA |
741 | abd->abd_flags |= ABD_FLAG_OWNER; |
742 | if (is_metadata) { | |
743 | abd->abd_flags |= ABD_FLAG_META; | |
9f0a21e6 | 744 | } |
9f0a21e6 | 745 | |
fc551d7e | 746 | abd_update_linear_stats(abd, ABDSTAT_INCR); |
9f0a21e6 MM |
747 | } |
748 | ||
fb822260 BA |
749 | /* |
750 | * Initializes an abd_iter based on whether the abd is a gang ABD | |
751 | * or just a single ABD. | |
752 | */ | |
753 | static inline abd_t * | |
754 | abd_init_abd_iter(abd_t *abd, struct abd_iter *aiter, size_t off) | |
755 | { | |
756 | abd_t *cabd = NULL; | |
757 | ||
758 | if (abd_is_gang(abd)) { | |
759 | cabd = abd_gang_get_offset(abd, &off); | |
760 | if (cabd) { | |
761 | abd_iter_init(aiter, cabd); | |
762 | abd_iter_advance(aiter, off); | |
763 | } | |
764 | } else { | |
765 | abd_iter_init(aiter, abd); | |
766 | abd_iter_advance(aiter, off); | |
767 | } | |
768 | return (cabd); | |
769 | } | |
770 | ||
771 | /* | |
772 | * Advances an abd_iter. We have to be careful with gang ABD as | |
773 | * advancing could mean that we are at the end of a particular ABD and | |
774 | * must grab the ABD in the gang ABD's list. | |
775 | */ | |
776 | static inline abd_t * | |
777 | abd_advance_abd_iter(abd_t *abd, abd_t *cabd, struct abd_iter *aiter, | |
778 | size_t len) | |
779 | { | |
780 | abd_iter_advance(aiter, len); | |
781 | if (abd_is_gang(abd) && abd_iter_at_end(aiter)) { | |
782 | ASSERT3P(cabd, !=, NULL); | |
783 | cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd); | |
784 | if (cabd) { | |
785 | abd_iter_init(aiter, cabd); | |
786 | abd_iter_advance(aiter, 0); | |
787 | } | |
788 | } | |
789 | return (cabd); | |
790 | } | |
791 | ||
9f0a21e6 MM |
792 | int |
793 | abd_iterate_func(abd_t *abd, size_t off, size_t size, | |
794 | abd_iter_func_t *func, void *private) | |
795 | { | |
9f0a21e6 | 796 | struct abd_iter aiter; |
b2255edc BB |
797 | int ret = 0; |
798 | ||
799 | if (size == 0) | |
800 | return (0); | |
9f0a21e6 MM |
801 | |
802 | abd_verify(abd); | |
803 | ASSERT3U(off + size, <=, abd->abd_size); | |
804 | ||
b2255edc | 805 | abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off); |
9f0a21e6 MM |
806 | |
807 | while (size > 0) { | |
e007908a | 808 | IMPLY(abd_is_gang(abd), c_abd != NULL); |
fb822260 | 809 | |
9f0a21e6 MM |
810 | abd_iter_map(&aiter); |
811 | ||
812 | size_t len = MIN(aiter.iter_mapsize, size); | |
813 | ASSERT3U(len, >, 0); | |
814 | ||
815 | ret = func(aiter.iter_mapaddr, len, private); | |
816 | ||
817 | abd_iter_unmap(&aiter); | |
818 | ||
819 | if (ret != 0) | |
820 | break; | |
821 | ||
822 | size -= len; | |
fb822260 | 823 | c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len); |
9f0a21e6 MM |
824 | } |
825 | ||
826 | return (ret); | |
827 | } | |
828 | ||
390b4487 RN |
829 | #if defined(__linux__) && defined(_KERNEL) |
830 | int | |
831 | abd_iterate_page_func(abd_t *abd, size_t off, size_t size, | |
832 | abd_iter_page_func_t *func, void *private) | |
833 | { | |
834 | struct abd_iter aiter; | |
835 | int ret = 0; | |
836 | ||
837 | if (size == 0) | |
838 | return (0); | |
839 | ||
840 | abd_verify(abd); | |
841 | ASSERT3U(off + size, <=, abd->abd_size); | |
842 | ||
843 | abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off); | |
844 | ||
845 | while (size > 0) { | |
846 | IMPLY(abd_is_gang(abd), c_abd != NULL); | |
847 | ||
848 | abd_iter_page(&aiter); | |
849 | ||
850 | size_t len = MIN(aiter.iter_page_dsize, size); | |
851 | ASSERT3U(len, >, 0); | |
852 | ||
853 | ret = func(aiter.iter_page, aiter.iter_page_doff, | |
854 | len, private); | |
855 | ||
856 | aiter.iter_page = NULL; | |
857 | aiter.iter_page_doff = 0; | |
858 | aiter.iter_page_dsize = 0; | |
859 | ||
860 | if (ret != 0) | |
861 | break; | |
862 | ||
863 | size -= len; | |
864 | c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len); | |
865 | } | |
866 | ||
867 | return (ret); | |
868 | } | |
869 | #endif | |
870 | ||
9f0a21e6 MM |
871 | struct buf_arg { |
872 | void *arg_buf; | |
873 | }; | |
874 | ||
875 | static int | |
876 | abd_copy_to_buf_off_cb(void *buf, size_t size, void *private) | |
877 | { | |
878 | struct buf_arg *ba_ptr = private; | |
879 | ||
880 | (void) memcpy(ba_ptr->arg_buf, buf, size); | |
881 | ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; | |
882 | ||
883 | return (0); | |
884 | } | |
885 | ||
886 | /* | |
887 | * Copy abd to buf. (off is the offset in abd.) | |
888 | */ | |
889 | void | |
890 | abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size) | |
891 | { | |
892 | struct buf_arg ba_ptr = { buf }; | |
893 | ||
894 | (void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb, | |
895 | &ba_ptr); | |
896 | } | |
897 | ||
898 | static int | |
899 | abd_cmp_buf_off_cb(void *buf, size_t size, void *private) | |
900 | { | |
901 | int ret; | |
902 | struct buf_arg *ba_ptr = private; | |
903 | ||
904 | ret = memcmp(buf, ba_ptr->arg_buf, size); | |
905 | ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; | |
906 | ||
907 | return (ret); | |
908 | } | |
909 | ||
910 | /* | |
911 | * Compare the contents of abd to buf. (off is the offset in abd.) | |
912 | */ | |
913 | int | |
914 | abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) | |
915 | { | |
916 | struct buf_arg ba_ptr = { (void *) buf }; | |
917 | ||
918 | return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr)); | |
919 | } | |
920 | ||
921 | static int | |
922 | abd_copy_from_buf_off_cb(void *buf, size_t size, void *private) | |
923 | { | |
924 | struct buf_arg *ba_ptr = private; | |
925 | ||
926 | (void) memcpy(buf, ba_ptr->arg_buf, size); | |
927 | ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; | |
928 | ||
929 | return (0); | |
930 | } | |
931 | ||
932 | /* | |
933 | * Copy from buf to abd. (off is the offset in abd.) | |
934 | */ | |
935 | void | |
936 | abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) | |
937 | { | |
938 | struct buf_arg ba_ptr = { (void *) buf }; | |
939 | ||
940 | (void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb, | |
941 | &ba_ptr); | |
942 | } | |
943 | ||
9f0a21e6 MM |
944 | static int |
945 | abd_zero_off_cb(void *buf, size_t size, void *private) | |
946 | { | |
14e4e3cb | 947 | (void) private; |
9f0a21e6 MM |
948 | (void) memset(buf, 0, size); |
949 | return (0); | |
950 | } | |
951 | ||
952 | /* | |
953 | * Zero out the abd from a particular offset to the end. | |
954 | */ | |
955 | void | |
956 | abd_zero_off(abd_t *abd, size_t off, size_t size) | |
957 | { | |
958 | (void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL); | |
959 | } | |
960 | ||
961 | /* | |
962 | * Iterate over two ABDs and call func incrementally on the two ABDs' data in | |
963 | * equal-sized chunks (passed to func as raw buffers). func could be called many | |
964 | * times during this iteration. | |
965 | */ | |
966 | int | |
967 | abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, | |
968 | size_t size, abd_iter_func2_t *func, void *private) | |
969 | { | |
970 | int ret = 0; | |
971 | struct abd_iter daiter, saiter; | |
fb822260 | 972 | abd_t *c_dabd, *c_sabd; |
9f0a21e6 | 973 | |
b2255edc BB |
974 | if (size == 0) |
975 | return (0); | |
976 | ||
9f0a21e6 MM |
977 | abd_verify(dabd); |
978 | abd_verify(sabd); | |
979 | ||
980 | ASSERT3U(doff + size, <=, dabd->abd_size); | |
981 | ASSERT3U(soff + size, <=, sabd->abd_size); | |
982 | ||
fb822260 BA |
983 | c_dabd = abd_init_abd_iter(dabd, &daiter, doff); |
984 | c_sabd = abd_init_abd_iter(sabd, &saiter, soff); | |
9f0a21e6 MM |
985 | |
986 | while (size > 0) { | |
e007908a AM |
987 | IMPLY(abd_is_gang(dabd), c_dabd != NULL); |
988 | IMPLY(abd_is_gang(sabd), c_sabd != NULL); | |
fb822260 | 989 | |
9f0a21e6 MM |
990 | abd_iter_map(&daiter); |
991 | abd_iter_map(&saiter); | |
992 | ||
993 | size_t dlen = MIN(daiter.iter_mapsize, size); | |
994 | size_t slen = MIN(saiter.iter_mapsize, size); | |
995 | size_t len = MIN(dlen, slen); | |
996 | ASSERT(dlen > 0 || slen > 0); | |
997 | ||
998 | ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len, | |
999 | private); | |
1000 | ||
1001 | abd_iter_unmap(&saiter); | |
1002 | abd_iter_unmap(&daiter); | |
1003 | ||
1004 | if (ret != 0) | |
1005 | break; | |
1006 | ||
1007 | size -= len; | |
fb822260 BA |
1008 | c_dabd = |
1009 | abd_advance_abd_iter(dabd, c_dabd, &daiter, len); | |
1010 | c_sabd = | |
1011 | abd_advance_abd_iter(sabd, c_sabd, &saiter, len); | |
9f0a21e6 MM |
1012 | } |
1013 | ||
1014 | return (ret); | |
1015 | } | |
1016 | ||
9f0a21e6 MM |
1017 | static int |
1018 | abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private) | |
1019 | { | |
14e4e3cb | 1020 | (void) private; |
9f0a21e6 MM |
1021 | (void) memcpy(dbuf, sbuf, size); |
1022 | return (0); | |
1023 | } | |
1024 | ||
1025 | /* | |
1026 | * Copy from sabd to dabd starting from soff and doff. | |
1027 | */ | |
1028 | void | |
1029 | abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size) | |
1030 | { | |
1031 | (void) abd_iterate_func2(dabd, sabd, doff, soff, size, | |
1032 | abd_copy_off_cb, NULL); | |
1033 | } | |
1034 | ||
9f0a21e6 MM |
1035 | static int |
1036 | abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private) | |
1037 | { | |
14e4e3cb | 1038 | (void) private; |
9f0a21e6 MM |
1039 | return (memcmp(bufa, bufb, size)); |
1040 | } | |
1041 | ||
1042 | /* | |
1043 | * Compares the contents of two ABDs. | |
1044 | */ | |
1045 | int | |
1046 | abd_cmp(abd_t *dabd, abd_t *sabd) | |
1047 | { | |
1048 | ASSERT3U(dabd->abd_size, ==, sabd->abd_size); | |
1049 | return (abd_iterate_func2(dabd, sabd, 0, 0, dabd->abd_size, | |
1050 | abd_cmp_cb, NULL)); | |
1051 | } | |
1052 | ||
1053 | /* | |
1054 | * Iterate over code ABDs and a data ABD and call @func_raidz_gen. | |
1055 | * | |
1056 | * @cabds parity ABDs, must have equal size | |
1057 | * @dabd data ABD. Can be NULL (in this case @dsize = 0) | |
1058 | * @func_raidz_gen should be implemented so that its behaviour | |
1059 | * is the same when taking linear and when taking scatter | |
1060 | */ | |
1061 | void | |
05a7348a AM |
1062 | abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off, |
1063 | size_t csize, size_t dsize, const unsigned parity, | |
9f0a21e6 MM |
1064 | void (*func_raidz_gen)(void **, const void *, size_t, size_t)) |
1065 | { | |
1066 | int i; | |
05a7348a | 1067 | size_t len, dlen; |
9f0a21e6 | 1068 | struct abd_iter caiters[3]; |
e007908a | 1069 | struct abd_iter daiter; |
f4cd1bac | 1070 | void *caddrs[3], *daddr; |
2ade659e | 1071 | unsigned long flags __maybe_unused = 0; |
fb822260 BA |
1072 | abd_t *c_cabds[3]; |
1073 | abd_t *c_dabd = NULL; | |
9f0a21e6 MM |
1074 | |
1075 | ASSERT3U(parity, <=, 3); | |
fb822260 | 1076 | for (i = 0; i < parity; i++) { |
e007908a | 1077 | abd_verify(cabds[i]); |
05a7348a AM |
1078 | ASSERT3U(off + csize, <=, cabds[i]->abd_size); |
1079 | c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], off); | |
fb822260 | 1080 | } |
9f0a21e6 | 1081 | |
e007908a AM |
1082 | if (dsize > 0) { |
1083 | ASSERT(dabd); | |
1084 | abd_verify(dabd); | |
05a7348a AM |
1085 | ASSERT3U(off + dsize, <=, dabd->abd_size); |
1086 | c_dabd = abd_init_abd_iter(dabd, &daiter, off); | |
fb822260 | 1087 | } |
9f0a21e6 | 1088 | |
fc551d7e | 1089 | abd_enter_critical(flags); |
9f0a21e6 | 1090 | while (csize > 0) { |
e007908a | 1091 | len = csize; |
9f0a21e6 | 1092 | for (i = 0; i < parity; i++) { |
e007908a | 1093 | IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL); |
9f0a21e6 MM |
1094 | abd_iter_map(&caiters[i]); |
1095 | caddrs[i] = caiters[i].iter_mapaddr; | |
e007908a | 1096 | len = MIN(caiters[i].iter_mapsize, len); |
9f0a21e6 MM |
1097 | } |
1098 | ||
e007908a AM |
1099 | if (dsize > 0) { |
1100 | IMPLY(abd_is_gang(dabd), c_dabd != NULL); | |
fb822260 | 1101 | abd_iter_map(&daiter); |
f4cd1bac | 1102 | daddr = daiter.iter_mapaddr; |
9f0a21e6 MM |
1103 | len = MIN(daiter.iter_mapsize, len); |
1104 | dlen = len; | |
f4cd1bac MJ |
1105 | } else { |
1106 | daddr = NULL; | |
9f0a21e6 | 1107 | dlen = 0; |
f4cd1bac | 1108 | } |
9f0a21e6 MM |
1109 | |
1110 | /* must be progressive */ | |
05a7348a | 1111 | ASSERT3U(len, >, 0); |
9f0a21e6 MM |
1112 | /* |
1113 | * The iterated function likely will not do well if each | |
1114 | * segment except the last one is not multiple of 512 (raidz). | |
1115 | */ | |
1116 | ASSERT3U(((uint64_t)len & 511ULL), ==, 0); | |
1117 | ||
f4cd1bac | 1118 | func_raidz_gen(caddrs, daddr, len, dlen); |
9f0a21e6 MM |
1119 | |
1120 | for (i = parity-1; i >= 0; i--) { | |
1121 | abd_iter_unmap(&caiters[i]); | |
fb822260 BA |
1122 | c_cabds[i] = |
1123 | abd_advance_abd_iter(cabds[i], c_cabds[i], | |
1124 | &caiters[i], len); | |
9f0a21e6 MM |
1125 | } |
1126 | ||
e007908a | 1127 | if (dsize > 0) { |
9f0a21e6 | 1128 | abd_iter_unmap(&daiter); |
fb822260 BA |
1129 | c_dabd = |
1130 | abd_advance_abd_iter(dabd, c_dabd, &daiter, | |
1131 | dlen); | |
9f0a21e6 MM |
1132 | dsize -= dlen; |
1133 | } | |
1134 | ||
1135 | csize -= len; | |
9f0a21e6 | 1136 | } |
fc551d7e | 1137 | abd_exit_critical(flags); |
9f0a21e6 MM |
1138 | } |
1139 | ||
1140 | /* | |
1141 | * Iterate over code ABDs and data reconstruction target ABDs and call | |
1142 | * @func_raidz_rec. Function maps at most 6 pages atomically. | |
1143 | * | |
1144 | * @cabds parity ABDs, must have equal size | |
1145 | * @tabds rec target ABDs, at most 3 | |
1146 | * @tsize size of data target columns | |
1147 | * @func_raidz_rec expects syndrome data in target columns. Function | |
1148 | * reconstructs data and overwrites target columns. | |
1149 | */ | |
1150 | void | |
1151 | abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, | |
05a7348a | 1152 | size_t tsize, const unsigned parity, |
9f0a21e6 MM |
1153 | void (*func_raidz_rec)(void **t, const size_t tsize, void **c, |
1154 | const unsigned *mul), | |
1155 | const unsigned *mul) | |
1156 | { | |
1157 | int i; | |
05a7348a | 1158 | size_t len; |
9f0a21e6 MM |
1159 | struct abd_iter citers[3]; |
1160 | struct abd_iter xiters[3]; | |
1161 | void *caddrs[3], *xaddrs[3]; | |
2ade659e | 1162 | unsigned long flags __maybe_unused = 0; |
fb822260 BA |
1163 | abd_t *c_cabds[3]; |
1164 | abd_t *c_tabds[3]; | |
9f0a21e6 MM |
1165 | |
1166 | ASSERT3U(parity, <=, 3); | |
1167 | ||
1168 | for (i = 0; i < parity; i++) { | |
e007908a AM |
1169 | abd_verify(cabds[i]); |
1170 | abd_verify(tabds[i]); | |
1171 | ASSERT3U(tsize, <=, cabds[i]->abd_size); | |
1172 | ASSERT3U(tsize, <=, tabds[i]->abd_size); | |
fb822260 BA |
1173 | c_cabds[i] = |
1174 | abd_init_abd_iter(cabds[i], &citers[i], 0); | |
1175 | c_tabds[i] = | |
1176 | abd_init_abd_iter(tabds[i], &xiters[i], 0); | |
9f0a21e6 MM |
1177 | } |
1178 | ||
fc551d7e | 1179 | abd_enter_critical(flags); |
9f0a21e6 | 1180 | while (tsize > 0) { |
e007908a | 1181 | len = tsize; |
9f0a21e6 | 1182 | for (i = 0; i < parity; i++) { |
e007908a AM |
1183 | IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL); |
1184 | IMPLY(abd_is_gang(tabds[i]), c_tabds[i] != NULL); | |
9f0a21e6 MM |
1185 | abd_iter_map(&citers[i]); |
1186 | abd_iter_map(&xiters[i]); | |
1187 | caddrs[i] = citers[i].iter_mapaddr; | |
1188 | xaddrs[i] = xiters[i].iter_mapaddr; | |
e007908a AM |
1189 | len = MIN(citers[i].iter_mapsize, len); |
1190 | len = MIN(xiters[i].iter_mapsize, len); | |
9f0a21e6 MM |
1191 | } |
1192 | ||
9f0a21e6 MM |
1193 | /* must be progressive */ |
1194 | ASSERT3S(len, >, 0); | |
1195 | /* | |
1196 | * The iterated function likely will not do well if each | |
1197 | * segment except the last one is not multiple of 512 (raidz). | |
1198 | */ | |
1199 | ASSERT3U(((uint64_t)len & 511ULL), ==, 0); | |
1200 | ||
1201 | func_raidz_rec(xaddrs, len, caddrs, mul); | |
1202 | ||
1203 | for (i = parity-1; i >= 0; i--) { | |
1204 | abd_iter_unmap(&xiters[i]); | |
1205 | abd_iter_unmap(&citers[i]); | |
fb822260 BA |
1206 | c_tabds[i] = |
1207 | abd_advance_abd_iter(tabds[i], c_tabds[i], | |
1208 | &xiters[i], len); | |
1209 | c_cabds[i] = | |
1210 | abd_advance_abd_iter(cabds[i], c_cabds[i], | |
1211 | &citers[i], len); | |
9f0a21e6 MM |
1212 | } |
1213 | ||
1214 | tsize -= len; | |
1215 | ASSERT3S(tsize, >=, 0); | |
1216 | } | |
fc551d7e | 1217 | abd_exit_critical(flags); |
9f0a21e6 | 1218 | } |