]>
Commit | Line | Data |
---|---|---|
9f0a21e6 | 1 | /* |
fc551d7e | 2 | * CDDL HEADER START |
9f0a21e6 | 3 | * |
fc551d7e BA |
4 | * The contents of this file are subject to the terms of the |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
9f0a21e6 | 20 | */ |
9f0a21e6 MM |
21 | /* |
22 | * Copyright (c) 2014 by Chunwei Chen. All rights reserved. | |
fc551d7e | 23 | * Copyright (c) 2019 by Delphix. All rights reserved. |
9f0a21e6 MM |
24 | */ |
25 | ||
26 | /* | |
27 | * ARC buffer data (ABD). | |
28 | * | |
29 | * ABDs are an abstract data structure for the ARC which can use two | |
30 | * different ways of storing the underlying data: | |
31 | * | |
32 | * (a) Linear buffer. In this case, all the data in the ABD is stored in one | |
33 | * contiguous buffer in memory (from a zio_[data_]buf_* kmem cache). | |
34 | * | |
35 | * +-------------------+ | |
36 | * | ABD (linear) | | |
37 | * | abd_flags = ... | | |
38 | * | abd_size = ... | +--------------------------------+ | |
39 | * | abd_buf ------------->| raw buffer of size abd_size | | |
40 | * +-------------------+ +--------------------------------+ | |
41 | * no abd_chunks | |
42 | * | |
43 | * (b) Scattered buffer. In this case, the data in the ABD is split into | |
44 | * equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers | |
45 | * to the chunks recorded in an array at the end of the ABD structure. | |
46 | * | |
47 | * +-------------------+ | |
48 | * | ABD (scattered) | | |
49 | * | abd_flags = ... | | |
50 | * | abd_size = ... | | |
51 | * | abd_offset = 0 | +-----------+ | |
52 | * | abd_chunks[0] ----------------------------->| chunk 0 | | |
53 | * | abd_chunks[1] ---------------------+ +-----------+ | |
54 | * | ... | | +-----------+ | |
55 | * | abd_chunks[N-1] ---------+ +------->| chunk 1 | | |
56 | * +-------------------+ | +-----------+ | |
57 | * | ... | |
58 | * | +-----------+ | |
59 | * +----------------->| chunk N-1 | | |
60 | * +-----------+ | |
61 | * | |
9f0a21e6 MM |
62 | * In addition to directly allocating a linear or scattered ABD, it is also |
63 | * possible to create an ABD by requesting the "sub-ABD" starting at an offset | |
64 | * within an existing ABD. In linear buffers this is simple (set abd_buf of | |
65 | * the new ABD to the starting point within the original raw buffer), but | |
66 | * scattered ABDs are a little more complex. The new ABD makes a copy of the | |
67 | * relevant abd_chunks pointers (but not the underlying data). However, to | |
68 | * provide arbitrary rather than only chunk-aligned starting offsets, it also | |
69 | * tracks an abd_offset field which represents the starting point of the data | |
70 | * within the first chunk in abd_chunks. For both linear and scattered ABDs, | |
71 | * creating an offset ABD marks the original ABD as the offset's parent, and the | |
72 | * original ABD's abd_children refcount is incremented. This data allows us to | |
73 | * ensure the root ABD isn't deleted before its children. | |
74 | * | |
75 | * Most consumers should never need to know what type of ABD they're using -- | |
76 | * the ABD public API ensures that it's possible to transparently switch from | |
77 | * using a linear ABD to a scattered one when doing so would be beneficial. | |
78 | * | |
79 | * If you need to use the data within an ABD directly, if you know it's linear | |
80 | * (because you allocated it) you can use abd_to_buf() to access the underlying | |
81 | * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions | |
82 | * which will allocate a raw buffer if necessary. Use the abd_return_buf* | |
83 | * functions to return any raw buffers that are no longer necessary when you're | |
84 | * done using them. | |
85 | * | |
86 | * There are a variety of ABD APIs that implement basic buffer operations: | |
87 | * compare, copy, read, write, and fill with zeroes. If you need a custom | |
88 | * function which progressively accesses the whole ABD, use the abd_iterate_* | |
89 | * functions. | |
fc551d7e BA |
90 | * |
91 | * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to | |
92 | * B_FALSE. | |
9f0a21e6 MM |
93 | */ |
94 | ||
fc551d7e | 95 | #include <sys/abd_impl.h> |
9f0a21e6 MM |
96 | #include <sys/param.h> |
97 | #include <sys/zio.h> | |
98 | #include <sys/zfs_context.h> | |
99 | #include <sys/zfs_znode.h> | |
100 | ||
fc551d7e BA |
101 | /* see block comment above for description */ |
102 | int zfs_abd_scatter_enabled = B_TRUE; | |
9f0a21e6 | 103 | |
fc551d7e BA |
104 | boolean_t |
105 | abd_is_linear(abd_t *abd) | |
9f0a21e6 | 106 | { |
fc551d7e | 107 | return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE); |
9f0a21e6 MM |
108 | } |
109 | ||
fc551d7e BA |
110 | boolean_t |
111 | abd_is_linear_page(abd_t *abd) | |
9f0a21e6 | 112 | { |
fc551d7e BA |
113 | return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ? |
114 | B_TRUE : B_FALSE); | |
9f0a21e6 MM |
115 | } |
116 | ||
117 | void | |
9f0a21e6 MM |
118 | abd_verify(abd_t *abd) |
119 | { | |
120 | ASSERT3U(abd->abd_size, >, 0); | |
121 | ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); | |
122 | ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | | |
fc551d7e BA |
123 | ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE | |
124 | ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE)); | |
9f0a21e6 MM |
125 | IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); |
126 | IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); | |
127 | if (abd_is_linear(abd)) { | |
fc551d7e | 128 | ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL); |
9f0a21e6 | 129 | } else { |
fc551d7e | 130 | abd_verify_scatter(abd); |
9f0a21e6 MM |
131 | } |
132 | } | |
133 | ||
fc551d7e BA |
134 | uint_t |
135 | abd_get_size(abd_t *abd) | |
9f0a21e6 | 136 | { |
fc551d7e BA |
137 | abd_verify(abd); |
138 | return (abd->abd_size); | |
9f0a21e6 MM |
139 | } |
140 | ||
141 | /* | |
142 | * Allocate an ABD, along with its own underlying data buffers. Use this if you | |
143 | * don't care whether the ABD is linear or not. | |
144 | */ | |
145 | abd_t * | |
146 | abd_alloc(size_t size, boolean_t is_metadata) | |
147 | { | |
fc551d7e | 148 | if (!zfs_abd_scatter_enabled || abd_size_alloc_linear(size)) |
9f0a21e6 MM |
149 | return (abd_alloc_linear(size, is_metadata)); |
150 | ||
151 | VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); | |
152 | ||
fc551d7e | 153 | abd_t *abd = abd_alloc_struct(size); |
9f0a21e6 | 154 | abd->abd_flags = ABD_FLAG_OWNER; |
fc551d7e BA |
155 | abd->abd_u.abd_scatter.abd_offset = 0; |
156 | abd_alloc_chunks(abd, size); | |
157 | ||
9f0a21e6 MM |
158 | if (is_metadata) { |
159 | abd->abd_flags |= ABD_FLAG_META; | |
160 | } | |
161 | abd->abd_size = size; | |
162 | abd->abd_parent = NULL; | |
163 | zfs_refcount_create(&abd->abd_children); | |
164 | ||
fc551d7e | 165 | abd_update_scatter_stats(abd, ABDSTAT_INCR); |
9f0a21e6 MM |
166 | |
167 | return (abd); | |
168 | } | |
169 | ||
170 | static void | |
171 | abd_free_scatter(abd_t *abd) | |
172 | { | |
fc551d7e | 173 | abd_free_chunks(abd); |
9f0a21e6 MM |
174 | |
175 | zfs_refcount_destroy(&abd->abd_children); | |
fc551d7e BA |
176 | abd_update_scatter_stats(abd, ABDSTAT_DECR); |
177 | abd_free_struct(abd); | |
178 | } | |
9f0a21e6 | 179 | |
fc551d7e BA |
180 | /* |
181 | * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not | |
182 | * free the underlying scatterlist or buffer. | |
183 | */ | |
184 | void | |
185 | abd_put(abd_t *abd) | |
186 | { | |
187 | if (abd == NULL) | |
188 | return; | |
189 | ||
190 | abd_verify(abd); | |
191 | ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); | |
192 | ||
193 | if (abd->abd_parent != NULL) { | |
194 | (void) zfs_refcount_remove_many(&abd->abd_parent->abd_children, | |
195 | abd->abd_size, abd); | |
196 | } | |
197 | ||
198 | zfs_refcount_destroy(&abd->abd_children); | |
9f0a21e6 MM |
199 | abd_free_struct(abd); |
200 | } | |
201 | ||
202 | /* | |
203 | * Allocate an ABD that must be linear, along with its own underlying data | |
204 | * buffer. Only use this when it would be very annoying to write your ABD | |
205 | * consumer with a scattered ABD. | |
206 | */ | |
207 | abd_t * | |
208 | abd_alloc_linear(size_t size, boolean_t is_metadata) | |
209 | { | |
210 | abd_t *abd = abd_alloc_struct(0); | |
211 | ||
212 | VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); | |
213 | ||
214 | abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER; | |
215 | if (is_metadata) { | |
216 | abd->abd_flags |= ABD_FLAG_META; | |
217 | } | |
218 | abd->abd_size = size; | |
219 | abd->abd_parent = NULL; | |
220 | zfs_refcount_create(&abd->abd_children); | |
221 | ||
222 | if (is_metadata) { | |
fc551d7e | 223 | ABD_LINEAR_BUF(abd) = zio_buf_alloc(size); |
9f0a21e6 | 224 | } else { |
fc551d7e | 225 | ABD_LINEAR_BUF(abd) = zio_data_buf_alloc(size); |
9f0a21e6 MM |
226 | } |
227 | ||
fc551d7e | 228 | abd_update_linear_stats(abd, ABDSTAT_INCR); |
9f0a21e6 MM |
229 | |
230 | return (abd); | |
231 | } | |
232 | ||
233 | static void | |
234 | abd_free_linear(abd_t *abd) | |
235 | { | |
fc551d7e BA |
236 | if (abd_is_linear_page(abd)) { |
237 | abd_free_linear_page(abd); | |
238 | return; | |
239 | } | |
9f0a21e6 | 240 | if (abd->abd_flags & ABD_FLAG_META) { |
fc551d7e | 241 | zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size); |
9f0a21e6 | 242 | } else { |
fc551d7e | 243 | zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size); |
9f0a21e6 MM |
244 | } |
245 | ||
246 | zfs_refcount_destroy(&abd->abd_children); | |
fc551d7e | 247 | abd_update_linear_stats(abd, ABDSTAT_DECR); |
9f0a21e6 MM |
248 | |
249 | abd_free_struct(abd); | |
250 | } | |
251 | ||
252 | /* | |
253 | * Free an ABD. Only use this on ABDs allocated with abd_alloc() or | |
254 | * abd_alloc_linear(). | |
255 | */ | |
256 | void | |
257 | abd_free(abd_t *abd) | |
258 | { | |
259 | if (abd == NULL) | |
260 | return; | |
261 | ||
262 | abd_verify(abd); | |
263 | ASSERT3P(abd->abd_parent, ==, NULL); | |
264 | ASSERT(abd->abd_flags & ABD_FLAG_OWNER); | |
265 | if (abd_is_linear(abd)) | |
266 | abd_free_linear(abd); | |
267 | else | |
268 | abd_free_scatter(abd); | |
269 | } | |
270 | ||
271 | /* | |
272 | * Allocate an ABD of the same format (same metadata flag, same scatterize | |
273 | * setting) as another ABD. | |
274 | */ | |
275 | abd_t * | |
276 | abd_alloc_sametype(abd_t *sabd, size_t size) | |
277 | { | |
278 | boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; | |
fc551d7e BA |
279 | if (abd_is_linear(sabd) && |
280 | !abd_is_linear_page(sabd)) { | |
9f0a21e6 MM |
281 | return (abd_alloc_linear(size, is_metadata)); |
282 | } else { | |
283 | return (abd_alloc(size, is_metadata)); | |
284 | } | |
285 | } | |
286 | ||
9f0a21e6 MM |
287 | /* |
288 | * Allocate a new ABD to point to offset off of sabd. It shares the underlying | |
289 | * buffer data with sabd. Use abd_put() to free. sabd must not be freed while | |
290 | * any derived ABDs exist. | |
291 | */ | |
fc551d7e | 292 | static abd_t * |
9f0a21e6 MM |
293 | abd_get_offset_impl(abd_t *sabd, size_t off, size_t size) |
294 | { | |
fc551d7e | 295 | abd_t *abd = NULL; |
9f0a21e6 MM |
296 | |
297 | abd_verify(sabd); | |
298 | ASSERT3U(off, <=, sabd->abd_size); | |
299 | ||
300 | if (abd_is_linear(sabd)) { | |
301 | abd = abd_alloc_struct(0); | |
302 | ||
303 | /* | |
304 | * Even if this buf is filesystem metadata, we only track that | |
305 | * if we own the underlying data buffer, which is not true in | |
306 | * this case. Therefore, we don't ever use ABD_FLAG_META here. | |
307 | */ | |
308 | abd->abd_flags = ABD_FLAG_LINEAR; | |
309 | ||
fc551d7e | 310 | ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off; |
9f0a21e6 | 311 | } else { |
fc551d7e | 312 | abd = abd_get_offset_scatter(sabd, off); |
9f0a21e6 MM |
313 | } |
314 | ||
fc551d7e | 315 | abd->abd_size = size; |
9f0a21e6 MM |
316 | abd->abd_parent = sabd; |
317 | zfs_refcount_create(&abd->abd_children); | |
318 | (void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd); | |
9f0a21e6 MM |
319 | return (abd); |
320 | } | |
321 | ||
322 | abd_t * | |
323 | abd_get_offset(abd_t *sabd, size_t off) | |
324 | { | |
fc551d7e BA |
325 | size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0; |
326 | VERIFY3U(size, >, 0); | |
327 | return (abd_get_offset_impl(sabd, off, size)); | |
9f0a21e6 MM |
328 | } |
329 | ||
330 | abd_t * | |
331 | abd_get_offset_size(abd_t *sabd, size_t off, size_t size) | |
332 | { | |
333 | ASSERT3U(off + size, <=, sabd->abd_size); | |
9f0a21e6 MM |
334 | return (abd_get_offset_impl(sabd, off, size)); |
335 | } | |
336 | ||
9f0a21e6 MM |
337 | /* |
338 | * Allocate a linear ABD structure for buf. You must free this with abd_put() | |
339 | * since the resulting ABD doesn't own its own buffer. | |
340 | */ | |
341 | abd_t * | |
342 | abd_get_from_buf(void *buf, size_t size) | |
343 | { | |
344 | abd_t *abd = abd_alloc_struct(0); | |
345 | ||
346 | VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); | |
347 | ||
348 | /* | |
349 | * Even if this buf is filesystem metadata, we only track that if we | |
350 | * own the underlying data buffer, which is not true in this case. | |
351 | * Therefore, we don't ever use ABD_FLAG_META here. | |
352 | */ | |
353 | abd->abd_flags = ABD_FLAG_LINEAR; | |
354 | abd->abd_size = size; | |
355 | abd->abd_parent = NULL; | |
356 | zfs_refcount_create(&abd->abd_children); | |
357 | ||
fc551d7e | 358 | ABD_LINEAR_BUF(abd) = buf; |
9f0a21e6 MM |
359 | |
360 | return (abd); | |
361 | } | |
362 | ||
9f0a21e6 MM |
363 | /* |
364 | * Get the raw buffer associated with a linear ABD. | |
365 | */ | |
366 | void * | |
367 | abd_to_buf(abd_t *abd) | |
368 | { | |
369 | ASSERT(abd_is_linear(abd)); | |
370 | abd_verify(abd); | |
fc551d7e | 371 | return (ABD_LINEAR_BUF(abd)); |
9f0a21e6 MM |
372 | } |
373 | ||
374 | /* | |
375 | * Borrow a raw buffer from an ABD without copying the contents of the ABD | |
376 | * into the buffer. If the ABD is scattered, this will allocate a raw buffer | |
377 | * whose contents are undefined. To copy over the existing data in the ABD, use | |
378 | * abd_borrow_buf_copy() instead. | |
379 | */ | |
380 | void * | |
381 | abd_borrow_buf(abd_t *abd, size_t n) | |
382 | { | |
383 | void *buf; | |
384 | abd_verify(abd); | |
385 | ASSERT3U(abd->abd_size, >=, n); | |
386 | if (abd_is_linear(abd)) { | |
387 | buf = abd_to_buf(abd); | |
388 | } else { | |
389 | buf = zio_buf_alloc(n); | |
390 | } | |
391 | (void) zfs_refcount_add_many(&abd->abd_children, n, buf); | |
9f0a21e6 MM |
392 | return (buf); |
393 | } | |
394 | ||
395 | void * | |
396 | abd_borrow_buf_copy(abd_t *abd, size_t n) | |
397 | { | |
398 | void *buf = abd_borrow_buf(abd, n); | |
399 | if (!abd_is_linear(abd)) { | |
400 | abd_copy_to_buf(buf, abd, n); | |
401 | } | |
402 | return (buf); | |
403 | } | |
404 | ||
405 | /* | |
406 | * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will | |
407 | * not change the contents of the ABD and will ASSERT that you didn't modify | |
408 | * the buffer since it was borrowed. If you want any changes you made to buf to | |
409 | * be copied back to abd, use abd_return_buf_copy() instead. | |
410 | */ | |
411 | void | |
412 | abd_return_buf(abd_t *abd, void *buf, size_t n) | |
413 | { | |
414 | abd_verify(abd); | |
415 | ASSERT3U(abd->abd_size, >=, n); | |
416 | if (abd_is_linear(abd)) { | |
417 | ASSERT3P(buf, ==, abd_to_buf(abd)); | |
418 | } else { | |
419 | ASSERT0(abd_cmp_buf(abd, buf, n)); | |
420 | zio_buf_free(buf, n); | |
421 | } | |
422 | (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); | |
423 | } | |
424 | ||
425 | void | |
426 | abd_return_buf_copy(abd_t *abd, void *buf, size_t n) | |
427 | { | |
428 | if (!abd_is_linear(abd)) { | |
429 | abd_copy_from_buf(abd, buf, n); | |
430 | } | |
431 | abd_return_buf(abd, buf, n); | |
432 | } | |
433 | ||
9f0a21e6 MM |
434 | void |
435 | abd_release_ownership_of_buf(abd_t *abd) | |
436 | { | |
437 | ASSERT(abd_is_linear(abd)); | |
438 | ASSERT(abd->abd_flags & ABD_FLAG_OWNER); | |
fc551d7e BA |
439 | |
440 | /* | |
441 | * abd_free() needs to handle LINEAR_PAGE ABD's specially. | |
442 | * Since that flag does not survive the | |
443 | * abd_release_ownership_of_buf() -> abd_get_from_buf() -> | |
444 | * abd_take_ownership_of_buf() sequence, we don't allow releasing | |
445 | * these "linear but not zio_[data_]buf_alloc()'ed" ABD's. | |
446 | */ | |
447 | ASSERT(!abd_is_linear_page(abd)); | |
448 | ||
9f0a21e6 MM |
449 | abd_verify(abd); |
450 | ||
451 | abd->abd_flags &= ~ABD_FLAG_OWNER; | |
452 | /* Disable this flag since we no longer own the data buffer */ | |
453 | abd->abd_flags &= ~ABD_FLAG_META; | |
454 | ||
fc551d7e | 455 | abd_update_linear_stats(abd, ABDSTAT_DECR); |
9f0a21e6 MM |
456 | } |
457 | ||
9f0a21e6 MM |
458 | |
459 | /* | |
fc551d7e BA |
460 | * Give this ABD ownership of the buffer that it's storing. Can only be used on |
461 | * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated | |
462 | * with abd_alloc_linear() which subsequently released ownership of their buf | |
463 | * with abd_release_ownership_of_buf(). | |
9f0a21e6 | 464 | */ |
fc551d7e BA |
465 | void |
466 | abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata) | |
9f0a21e6 | 467 | { |
fc551d7e BA |
468 | ASSERT(abd_is_linear(abd)); |
469 | ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); | |
9f0a21e6 | 470 | abd_verify(abd); |
9f0a21e6 | 471 | |
fc551d7e BA |
472 | abd->abd_flags |= ABD_FLAG_OWNER; |
473 | if (is_metadata) { | |
474 | abd->abd_flags |= ABD_FLAG_META; | |
9f0a21e6 | 475 | } |
9f0a21e6 | 476 | |
fc551d7e | 477 | abd_update_linear_stats(abd, ABDSTAT_INCR); |
9f0a21e6 MM |
478 | } |
479 | ||
480 | int | |
481 | abd_iterate_func(abd_t *abd, size_t off, size_t size, | |
482 | abd_iter_func_t *func, void *private) | |
483 | { | |
484 | int ret = 0; | |
485 | struct abd_iter aiter; | |
486 | ||
487 | abd_verify(abd); | |
488 | ASSERT3U(off + size, <=, abd->abd_size); | |
489 | ||
490 | abd_iter_init(&aiter, abd); | |
491 | abd_iter_advance(&aiter, off); | |
492 | ||
493 | while (size > 0) { | |
494 | abd_iter_map(&aiter); | |
495 | ||
496 | size_t len = MIN(aiter.iter_mapsize, size); | |
497 | ASSERT3U(len, >, 0); | |
498 | ||
499 | ret = func(aiter.iter_mapaddr, len, private); | |
500 | ||
501 | abd_iter_unmap(&aiter); | |
502 | ||
503 | if (ret != 0) | |
504 | break; | |
505 | ||
506 | size -= len; | |
507 | abd_iter_advance(&aiter, len); | |
508 | } | |
509 | ||
510 | return (ret); | |
511 | } | |
512 | ||
513 | struct buf_arg { | |
514 | void *arg_buf; | |
515 | }; | |
516 | ||
517 | static int | |
518 | abd_copy_to_buf_off_cb(void *buf, size_t size, void *private) | |
519 | { | |
520 | struct buf_arg *ba_ptr = private; | |
521 | ||
522 | (void) memcpy(ba_ptr->arg_buf, buf, size); | |
523 | ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; | |
524 | ||
525 | return (0); | |
526 | } | |
527 | ||
528 | /* | |
529 | * Copy abd to buf. (off is the offset in abd.) | |
530 | */ | |
531 | void | |
532 | abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size) | |
533 | { | |
534 | struct buf_arg ba_ptr = { buf }; | |
535 | ||
536 | (void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb, | |
537 | &ba_ptr); | |
538 | } | |
539 | ||
540 | static int | |
541 | abd_cmp_buf_off_cb(void *buf, size_t size, void *private) | |
542 | { | |
543 | int ret; | |
544 | struct buf_arg *ba_ptr = private; | |
545 | ||
546 | ret = memcmp(buf, ba_ptr->arg_buf, size); | |
547 | ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; | |
548 | ||
549 | return (ret); | |
550 | } | |
551 | ||
552 | /* | |
553 | * Compare the contents of abd to buf. (off is the offset in abd.) | |
554 | */ | |
555 | int | |
556 | abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) | |
557 | { | |
558 | struct buf_arg ba_ptr = { (void *) buf }; | |
559 | ||
560 | return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr)); | |
561 | } | |
562 | ||
563 | static int | |
564 | abd_copy_from_buf_off_cb(void *buf, size_t size, void *private) | |
565 | { | |
566 | struct buf_arg *ba_ptr = private; | |
567 | ||
568 | (void) memcpy(buf, ba_ptr->arg_buf, size); | |
569 | ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; | |
570 | ||
571 | return (0); | |
572 | } | |
573 | ||
574 | /* | |
575 | * Copy from buf to abd. (off is the offset in abd.) | |
576 | */ | |
577 | void | |
578 | abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) | |
579 | { | |
580 | struct buf_arg ba_ptr = { (void *) buf }; | |
581 | ||
582 | (void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb, | |
583 | &ba_ptr); | |
584 | } | |
585 | ||
586 | /*ARGSUSED*/ | |
587 | static int | |
588 | abd_zero_off_cb(void *buf, size_t size, void *private) | |
589 | { | |
590 | (void) memset(buf, 0, size); | |
591 | return (0); | |
592 | } | |
593 | ||
594 | /* | |
595 | * Zero out the abd from a particular offset to the end. | |
596 | */ | |
597 | void | |
598 | abd_zero_off(abd_t *abd, size_t off, size_t size) | |
599 | { | |
600 | (void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL); | |
601 | } | |
602 | ||
603 | /* | |
604 | * Iterate over two ABDs and call func incrementally on the two ABDs' data in | |
605 | * equal-sized chunks (passed to func as raw buffers). func could be called many | |
606 | * times during this iteration. | |
607 | */ | |
608 | int | |
609 | abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, | |
610 | size_t size, abd_iter_func2_t *func, void *private) | |
611 | { | |
612 | int ret = 0; | |
613 | struct abd_iter daiter, saiter; | |
614 | ||
615 | abd_verify(dabd); | |
616 | abd_verify(sabd); | |
617 | ||
618 | ASSERT3U(doff + size, <=, dabd->abd_size); | |
619 | ASSERT3U(soff + size, <=, sabd->abd_size); | |
620 | ||
621 | abd_iter_init(&daiter, dabd); | |
622 | abd_iter_init(&saiter, sabd); | |
623 | abd_iter_advance(&daiter, doff); | |
624 | abd_iter_advance(&saiter, soff); | |
625 | ||
626 | while (size > 0) { | |
627 | abd_iter_map(&daiter); | |
628 | abd_iter_map(&saiter); | |
629 | ||
630 | size_t dlen = MIN(daiter.iter_mapsize, size); | |
631 | size_t slen = MIN(saiter.iter_mapsize, size); | |
632 | size_t len = MIN(dlen, slen); | |
633 | ASSERT(dlen > 0 || slen > 0); | |
634 | ||
635 | ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len, | |
636 | private); | |
637 | ||
638 | abd_iter_unmap(&saiter); | |
639 | abd_iter_unmap(&daiter); | |
640 | ||
641 | if (ret != 0) | |
642 | break; | |
643 | ||
644 | size -= len; | |
645 | abd_iter_advance(&daiter, len); | |
646 | abd_iter_advance(&saiter, len); | |
647 | } | |
648 | ||
649 | return (ret); | |
650 | } | |
651 | ||
652 | /*ARGSUSED*/ | |
653 | static int | |
654 | abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private) | |
655 | { | |
656 | (void) memcpy(dbuf, sbuf, size); | |
657 | return (0); | |
658 | } | |
659 | ||
660 | /* | |
661 | * Copy from sabd to dabd starting from soff and doff. | |
662 | */ | |
663 | void | |
664 | abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size) | |
665 | { | |
666 | (void) abd_iterate_func2(dabd, sabd, doff, soff, size, | |
667 | abd_copy_off_cb, NULL); | |
668 | } | |
669 | ||
670 | /*ARGSUSED*/ | |
671 | static int | |
672 | abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private) | |
673 | { | |
674 | return (memcmp(bufa, bufb, size)); | |
675 | } | |
676 | ||
677 | /* | |
678 | * Compares the contents of two ABDs. | |
679 | */ | |
680 | int | |
681 | abd_cmp(abd_t *dabd, abd_t *sabd) | |
682 | { | |
683 | ASSERT3U(dabd->abd_size, ==, sabd->abd_size); | |
684 | return (abd_iterate_func2(dabd, sabd, 0, 0, dabd->abd_size, | |
685 | abd_cmp_cb, NULL)); | |
686 | } | |
687 | ||
688 | /* | |
689 | * Iterate over code ABDs and a data ABD and call @func_raidz_gen. | |
690 | * | |
691 | * @cabds parity ABDs, must have equal size | |
692 | * @dabd data ABD. Can be NULL (in this case @dsize = 0) | |
693 | * @func_raidz_gen should be implemented so that its behaviour | |
694 | * is the same when taking linear and when taking scatter | |
695 | */ | |
696 | void | |
697 | abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, | |
698 | ssize_t csize, ssize_t dsize, const unsigned parity, | |
699 | void (*func_raidz_gen)(void **, const void *, size_t, size_t)) | |
700 | { | |
701 | int i; | |
702 | ssize_t len, dlen; | |
703 | struct abd_iter caiters[3]; | |
704 | struct abd_iter daiter = {0}; | |
705 | void *caddrs[3]; | |
fc551d7e | 706 | unsigned long flags = 0; |
9f0a21e6 MM |
707 | |
708 | ASSERT3U(parity, <=, 3); | |
709 | ||
710 | for (i = 0; i < parity; i++) | |
711 | abd_iter_init(&caiters[i], cabds[i]); | |
712 | ||
713 | if (dabd) | |
714 | abd_iter_init(&daiter, dabd); | |
715 | ||
716 | ASSERT3S(dsize, >=, 0); | |
717 | ||
fc551d7e | 718 | abd_enter_critical(flags); |
9f0a21e6 MM |
719 | while (csize > 0) { |
720 | len = csize; | |
721 | ||
722 | if (dabd && dsize > 0) | |
723 | abd_iter_map(&daiter); | |
724 | ||
725 | for (i = 0; i < parity; i++) { | |
726 | abd_iter_map(&caiters[i]); | |
727 | caddrs[i] = caiters[i].iter_mapaddr; | |
728 | } | |
729 | ||
fc551d7e | 730 | |
9f0a21e6 MM |
731 | switch (parity) { |
732 | case 3: | |
733 | len = MIN(caiters[2].iter_mapsize, len); | |
fc551d7e | 734 | /* falls through */ |
9f0a21e6 MM |
735 | case 2: |
736 | len = MIN(caiters[1].iter_mapsize, len); | |
fc551d7e | 737 | /* falls through */ |
9f0a21e6 MM |
738 | case 1: |
739 | len = MIN(caiters[0].iter_mapsize, len); | |
740 | } | |
741 | ||
742 | /* must be progressive */ | |
743 | ASSERT3S(len, >, 0); | |
744 | ||
745 | if (dabd && dsize > 0) { | |
746 | /* this needs precise iter.length */ | |
747 | len = MIN(daiter.iter_mapsize, len); | |
748 | dlen = len; | |
749 | } else | |
750 | dlen = 0; | |
751 | ||
752 | /* must be progressive */ | |
753 | ASSERT3S(len, >, 0); | |
754 | /* | |
755 | * The iterated function likely will not do well if each | |
756 | * segment except the last one is not multiple of 512 (raidz). | |
757 | */ | |
758 | ASSERT3U(((uint64_t)len & 511ULL), ==, 0); | |
759 | ||
760 | func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen); | |
761 | ||
762 | for (i = parity-1; i >= 0; i--) { | |
763 | abd_iter_unmap(&caiters[i]); | |
764 | abd_iter_advance(&caiters[i], len); | |
765 | } | |
766 | ||
767 | if (dabd && dsize > 0) { | |
768 | abd_iter_unmap(&daiter); | |
769 | abd_iter_advance(&daiter, dlen); | |
770 | dsize -= dlen; | |
771 | } | |
772 | ||
773 | csize -= len; | |
774 | ||
775 | ASSERT3S(dsize, >=, 0); | |
776 | ASSERT3S(csize, >=, 0); | |
777 | } | |
fc551d7e | 778 | abd_exit_critical(flags); |
9f0a21e6 MM |
779 | } |
780 | ||
781 | /* | |
782 | * Iterate over code ABDs and data reconstruction target ABDs and call | |
783 | * @func_raidz_rec. Function maps at most 6 pages atomically. | |
784 | * | |
785 | * @cabds parity ABDs, must have equal size | |
786 | * @tabds rec target ABDs, at most 3 | |
787 | * @tsize size of data target columns | |
788 | * @func_raidz_rec expects syndrome data in target columns. Function | |
789 | * reconstructs data and overwrites target columns. | |
790 | */ | |
791 | void | |
792 | abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, | |
793 | ssize_t tsize, const unsigned parity, | |
794 | void (*func_raidz_rec)(void **t, const size_t tsize, void **c, | |
795 | const unsigned *mul), | |
796 | const unsigned *mul) | |
797 | { | |
798 | int i; | |
799 | ssize_t len; | |
800 | struct abd_iter citers[3]; | |
801 | struct abd_iter xiters[3]; | |
802 | void *caddrs[3], *xaddrs[3]; | |
fc551d7e | 803 | unsigned long flags = 0; |
9f0a21e6 MM |
804 | |
805 | ASSERT3U(parity, <=, 3); | |
806 | ||
807 | for (i = 0; i < parity; i++) { | |
808 | abd_iter_init(&citers[i], cabds[i]); | |
809 | abd_iter_init(&xiters[i], tabds[i]); | |
810 | } | |
811 | ||
fc551d7e | 812 | abd_enter_critical(flags); |
9f0a21e6 MM |
813 | while (tsize > 0) { |
814 | ||
815 | for (i = 0; i < parity; i++) { | |
816 | abd_iter_map(&citers[i]); | |
817 | abd_iter_map(&xiters[i]); | |
818 | caddrs[i] = citers[i].iter_mapaddr; | |
819 | xaddrs[i] = xiters[i].iter_mapaddr; | |
820 | } | |
821 | ||
822 | len = tsize; | |
823 | switch (parity) { | |
824 | case 3: | |
825 | len = MIN(xiters[2].iter_mapsize, len); | |
826 | len = MIN(citers[2].iter_mapsize, len); | |
fc551d7e | 827 | /* falls through */ |
9f0a21e6 MM |
828 | case 2: |
829 | len = MIN(xiters[1].iter_mapsize, len); | |
830 | len = MIN(citers[1].iter_mapsize, len); | |
fc551d7e | 831 | /* falls through */ |
9f0a21e6 MM |
832 | case 1: |
833 | len = MIN(xiters[0].iter_mapsize, len); | |
834 | len = MIN(citers[0].iter_mapsize, len); | |
835 | } | |
836 | /* must be progressive */ | |
837 | ASSERT3S(len, >, 0); | |
838 | /* | |
839 | * The iterated function likely will not do well if each | |
840 | * segment except the last one is not multiple of 512 (raidz). | |
841 | */ | |
842 | ASSERT3U(((uint64_t)len & 511ULL), ==, 0); | |
843 | ||
844 | func_raidz_rec(xaddrs, len, caddrs, mul); | |
845 | ||
846 | for (i = parity-1; i >= 0; i--) { | |
847 | abd_iter_unmap(&xiters[i]); | |
848 | abd_iter_unmap(&citers[i]); | |
849 | abd_iter_advance(&xiters[i], len); | |
850 | abd_iter_advance(&citers[i], len); | |
851 | } | |
852 | ||
853 | tsize -= len; | |
854 | ASSERT3S(tsize, >=, 0); | |
855 | } | |
fc551d7e | 856 | abd_exit_critical(flags); |
9f0a21e6 | 857 | } |