]> git.proxmox.com Git - mirror_zfs.git/blame - module/os/freebsd/zfs/abd_os.c
Remove bcopy(), bzero(), bcmp()
[mirror_zfs.git] / module / os / freebsd / zfs / abd_os.c
CommitLineData
fc551d7e
BA
1/*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12/*
13 * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
14 * Copyright (c) 2016 by Delphix. All rights reserved.
15 */
16
17/*
18 * See abd.c for a general overview of the arc buffered data (ABD).
19 *
20 * Using a large proportion of scattered ABDs decreases ARC fragmentation since
21 * when we are at the limit of allocatable space, using equal-size chunks will
22 * allow us to quickly reclaim enough space for a new large allocation (assuming
23 * it is also scattered).
24 *
25 * ABDs are allocated scattered by default unless the caller uses
26 * abd_alloc_linear() or zfs_abd_scatter_enabled is disabled.
27 */
28
29#include <sys/abd_impl.h>
30#include <sys/param.h>
e64cc495 31#include <sys/types.h>
fc551d7e
BA
32#include <sys/zio.h>
33#include <sys/zfs_context.h>
34#include <sys/zfs_znode.h>
35
36typedef struct abd_stats {
37 kstat_named_t abdstat_struct_size;
38 kstat_named_t abdstat_scatter_cnt;
39 kstat_named_t abdstat_scatter_data_size;
40 kstat_named_t abdstat_scatter_chunk_waste;
41 kstat_named_t abdstat_linear_cnt;
42 kstat_named_t abdstat_linear_data_size;
43} abd_stats_t;
44
45static abd_stats_t abd_stats = {
46 /* Amount of memory occupied by all of the abd_t struct allocations */
47 { "struct_size", KSTAT_DATA_UINT64 },
48 /*
49 * The number of scatter ABDs which are currently allocated, excluding
50 * ABDs which don't own their data (for instance the ones which were
51 * allocated through abd_get_offset()).
52 */
53 { "scatter_cnt", KSTAT_DATA_UINT64 },
54 /* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
55 { "scatter_data_size", KSTAT_DATA_UINT64 },
56 /*
57 * The amount of space wasted at the end of the last chunk across all
58 * scatter ABDs tracked by scatter_cnt.
59 */
60 { "scatter_chunk_waste", KSTAT_DATA_UINT64 },
61 /*
62 * The number of linear ABDs which are currently allocated, excluding
63 * ABDs which don't own their data (for instance the ones which were
64 * allocated through abd_get_offset() and abd_get_from_buf()). If an
65 * ABD takes ownership of its buf then it will become tracked.
66 */
67 { "linear_cnt", KSTAT_DATA_UINT64 },
68 /* Amount of data stored in all linear ABDs tracked by linear_cnt */
69 { "linear_data_size", KSTAT_DATA_UINT64 },
70};
71
c4c162c1
AM
72struct {
73 wmsum_t abdstat_struct_size;
74 wmsum_t abdstat_scatter_cnt;
75 wmsum_t abdstat_scatter_data_size;
76 wmsum_t abdstat_scatter_chunk_waste;
77 wmsum_t abdstat_linear_cnt;
78 wmsum_t abdstat_linear_data_size;
79} abd_sums;
80
fc551d7e 81/*
bdd11cbb
AM
82 * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
83 * ABD's for. Smaller allocations will use linear ABD's which use
84 * zio_[data_]buf_alloc().
85 *
86 * Scatter ABD's use at least one page each, so sub-page allocations waste
87 * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
88 * half of each page). Using linear ABD's for small allocations means that
89 * they will be put on slabs which contain many allocations.
90 *
91 * Linear ABDs for multi-page allocations are easier to use, and in some cases
92 * it allows to avoid buffer copying. But allocation and especially free
93 * of multi-page linear ABDs are expensive operations due to KVA mapping and
94 * unmapping, and with time they cause KVA fragmentations.
fc551d7e 95 */
18168da7 96static size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1;
fc551d7e
BA
97
98#if defined(_KERNEL)
99SYSCTL_DECL(_vfs_zfs);
100
101SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
102 &zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
bdd11cbb
AM
103SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN,
104 &zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations.");
fc551d7e
BA
105#endif
106
107kmem_cache_t *abd_chunk_cache;
108static kstat_t *abd_ksp;
109
fb822260
BA
110/*
111 * We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
bdd11cbb
AM
112 * just a single zero'd page-sized buffer. This allows us to conserve
113 * memory by only using a single zero buffer for the scatter chunks.
fb822260
BA
114 */
115abd_t *abd_zero_scatter = NULL;
116static char *abd_zero_buf = NULL;
117
6366ef22 118static uint_t
fc551d7e
BA
119abd_chunkcnt_for_bytes(size_t size)
120{
bdd11cbb 121 return ((size + PAGE_MASK) >> PAGE_SHIFT);
fc551d7e
BA
122}
123
6366ef22 124static inline uint_t
fc551d7e
BA
125abd_scatter_chunkcnt(abd_t *abd)
126{
127 ASSERT(!abd_is_linear(abd));
128 return (abd_chunkcnt_for_bytes(
129 ABD_SCATTER(abd).abd_offset + abd->abd_size));
130}
131
132boolean_t
133abd_size_alloc_linear(size_t size)
134{
7eebcd2b 135 return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);
fc551d7e
BA
136}
137
138void
139abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
140{
6366ef22 141 uint_t n = abd_scatter_chunkcnt(abd);
fc551d7e 142 ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
bdd11cbb 143 int waste = (n << PAGE_SHIFT) - abd->abd_size;
fc551d7e
BA
144 if (op == ABDSTAT_INCR) {
145 ABDSTAT_BUMP(abdstat_scatter_cnt);
146 ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
85ec5cba
MA
147 ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);
148 arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);
fc551d7e
BA
149 } else {
150 ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
151 ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
85ec5cba
MA
152 ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);
153 arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);
fc551d7e
BA
154 }
155}
156
157void
158abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
159{
160 ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
161 if (op == ABDSTAT_INCR) {
162 ABDSTAT_BUMP(abdstat_linear_cnt);
163 ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
164 } else {
165 ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
166 ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
167 }
168}
169
170void
171abd_verify_scatter(abd_t *abd)
172{
6366ef22
AM
173 uint_t i, n;
174
fc551d7e 175 /*
bdd11cbb
AM
176 * There is no scatter linear pages in FreeBSD so there is
177 * an error if the ABD has been marked as a linear page.
fc551d7e 178 */
6366ef22 179 ASSERT(!abd_is_linear_page(abd));
bdd11cbb 180 ASSERT3U(ABD_SCATTER(abd).abd_offset, <, PAGE_SIZE);
6366ef22
AM
181 n = abd_scatter_chunkcnt(abd);
182 for (i = 0; i < n; i++) {
183 ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
fc551d7e
BA
184 }
185}
186
187void
188abd_alloc_chunks(abd_t *abd, size_t size)
189{
6366ef22
AM
190 uint_t i, n;
191
192 n = abd_chunkcnt_for_bytes(size);
193 for (i = 0; i < n; i++) {
bdd11cbb
AM
194 ABD_SCATTER(abd).abd_chunks[i] =
195 kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
fc551d7e 196 }
fc551d7e
BA
197}
198
199void
200abd_free_chunks(abd_t *abd)
201{
6366ef22
AM
202 uint_t i, n;
203
204 n = abd_scatter_chunkcnt(abd);
205 for (i = 0; i < n; i++) {
bdd11cbb
AM
206 kmem_cache_free(abd_chunk_cache,
207 ABD_SCATTER(abd).abd_chunks[i]);
fc551d7e
BA
208 }
209}
210
211abd_t *
e2af2acc 212abd_alloc_struct_impl(size_t size)
fc551d7e 213{
6366ef22 214 uint_t chunkcnt = abd_chunkcnt_for_bytes(size);
0a03495e
BA
215 /*
216 * In the event we are allocating a gang ABD, the size passed in
217 * will be 0. We must make sure to set abd_size to the size of an
218 * ABD struct as opposed to an ABD scatter with 0 chunks. The gang
219 * ABD struct allocation accounts for an additional 24 bytes over
220 * a scatter ABD with 0 chunks.
221 */
222 size_t abd_size = MAX(sizeof (abd_t),
223 offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
fc551d7e
BA
224 abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
225 ASSERT3P(abd, !=, NULL);
226 ABDSTAT_INCR(abdstat_struct_size, abd_size);
227
228 return (abd);
229}
230
231void
e2af2acc 232abd_free_struct_impl(abd_t *abd)
fc551d7e 233{
6366ef22 234 uint_t chunkcnt = abd_is_linear(abd) || abd_is_gang(abd) ? 0 :
0a03495e 235 abd_scatter_chunkcnt(abd);
6366ef22 236 ssize_t size = MAX(sizeof (abd_t),
0a03495e 237 offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
fc551d7e
BA
238 kmem_free(abd, size);
239 ABDSTAT_INCR(abdstat_struct_size, -size);
240}
241
fb822260
BA
242/*
243 * Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where
244 * each chunk in the scatterlist will be set to abd_zero_buf.
245 */
246static void
247abd_alloc_zero_scatter(void)
248{
6366ef22
AM
249 uint_t i, n;
250
251 n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
bdd11cbb 252 abd_zero_buf = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
861166b0 253 memset(abd_zero_buf, 0, PAGE_SIZE);
fb822260
BA
254 abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
255
e2af2acc 256 abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
fb822260 257 abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
fb822260
BA
258
259 ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
fb822260 260
6366ef22 261 for (i = 0; i < n; i++) {
fb822260
BA
262 ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
263 abd_zero_buf;
264 }
265
266 ABDSTAT_BUMP(abdstat_scatter_cnt);
bdd11cbb 267 ABDSTAT_INCR(abdstat_scatter_data_size, PAGE_SIZE);
fb822260
BA
268}
269
270static void
271abd_free_zero_scatter(void)
272{
fb822260 273 ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
bdd11cbb 274 ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGE_SIZE);
fb822260
BA
275
276 abd_free_struct(abd_zero_scatter);
277 abd_zero_scatter = NULL;
bdd11cbb 278 kmem_cache_free(abd_chunk_cache, abd_zero_buf);
fb822260
BA
279}
280
c4c162c1
AM
281static int
282abd_kstats_update(kstat_t *ksp, int rw)
283{
284 abd_stats_t *as = ksp->ks_data;
285
286 if (rw == KSTAT_WRITE)
287 return (EACCES);
288 as->abdstat_struct_size.value.ui64 =
289 wmsum_value(&abd_sums.abdstat_struct_size);
290 as->abdstat_scatter_cnt.value.ui64 =
291 wmsum_value(&abd_sums.abdstat_scatter_cnt);
292 as->abdstat_scatter_data_size.value.ui64 =
293 wmsum_value(&abd_sums.abdstat_scatter_data_size);
294 as->abdstat_scatter_chunk_waste.value.ui64 =
295 wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
296 as->abdstat_linear_cnt.value.ui64 =
297 wmsum_value(&abd_sums.abdstat_linear_cnt);
298 as->abdstat_linear_data_size.value.ui64 =
299 wmsum_value(&abd_sums.abdstat_linear_data_size);
300 return (0);
301}
302
fc551d7e
BA
303void
304abd_init(void)
305{
bdd11cbb 306 abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0,
f68af67a 307 NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
fc551d7e 308
c4c162c1
AM
309 wmsum_init(&abd_sums.abdstat_struct_size, 0);
310 wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
311 wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
312 wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
313 wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
314 wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
315
fc551d7e
BA
316 abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
317 sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
318 if (abd_ksp != NULL) {
319 abd_ksp->ks_data = &abd_stats;
c4c162c1 320 abd_ksp->ks_update = abd_kstats_update;
fc551d7e
BA
321 kstat_install(abd_ksp);
322 }
fb822260
BA
323
324 abd_alloc_zero_scatter();
fc551d7e
BA
325}
326
327void
328abd_fini(void)
329{
fb822260
BA
330 abd_free_zero_scatter();
331
fc551d7e
BA
332 if (abd_ksp != NULL) {
333 kstat_delete(abd_ksp);
334 abd_ksp = NULL;
335 }
336
c4c162c1
AM
337 wmsum_fini(&abd_sums.abdstat_struct_size);
338 wmsum_fini(&abd_sums.abdstat_scatter_cnt);
339 wmsum_fini(&abd_sums.abdstat_scatter_data_size);
340 wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
341 wmsum_fini(&abd_sums.abdstat_linear_cnt);
342 wmsum_fini(&abd_sums.abdstat_linear_data_size);
343
fc551d7e
BA
344 kmem_cache_destroy(abd_chunk_cache);
345 abd_chunk_cache = NULL;
346}
347
348void
349abd_free_linear_page(abd_t *abd)
350{
351 /*
bf169e9f 352 * FreeBSD does not have scatter linear pages
fc551d7e
BA
353 * so there is an error.
354 */
355 VERIFY(0);
356}
357
358/*
359 * If we're going to use this ABD for doing I/O using the block layer, the
360 * consumer of the ABD data doesn't care if it's scattered or not, and we don't
361 * plan to store this ABD in memory for a long period of time, we should
362 * allocate the ABD type that requires the least data copying to do the I/O.
363 *
364 * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os
365 * using a scatter/gather list we should switch to that and replace this call
366 * with vanilla abd_alloc().
367 */
368abd_t *
369abd_alloc_for_io(size_t size, boolean_t is_metadata)
370{
371 return (abd_alloc_linear(size, is_metadata));
372}
373
fc551d7e 374abd_t *
c6d1112b
JL
375abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
376 size_t size)
fc551d7e 377{
fc551d7e
BA
378 abd_verify(sabd);
379 ASSERT3U(off, <=, sabd->abd_size);
380
381 size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
c6d1112b 382 size_t chunkcnt = abd_chunkcnt_for_bytes(
bdd11cbb 383 (new_offset & PAGE_MASK) + size);
c6d1112b
JL
384
385 ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd));
fc551d7e 386
e2af2acc
MA
387 /*
388 * If an abd struct is provided, it is only the minimum size. If we
389 * need additional chunks, we need to allocate a new struct.
390 */
391 if (abd != NULL &&
392 offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]) >
393 sizeof (abd_t)) {
394 abd = NULL;
395 }
396
397 if (abd == NULL)
bdd11cbb 398 abd = abd_alloc_struct(chunkcnt << PAGE_SHIFT);
fc551d7e
BA
399
400 /*
401 * Even if this buf is filesystem metadata, we only track that
402 * if we own the underlying data buffer, which is not true in
403 * this case. Therefore, we don't ever use ABD_FLAG_META here.
404 */
fc551d7e 405
bdd11cbb 406 ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK;
fc551d7e
BA
407
408 /* Copy the scatterlist starting at the correct offset */
409 (void) memcpy(&ABD_SCATTER(abd).abd_chunks,
bdd11cbb 410 &ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT],
fc551d7e
BA
411 chunkcnt * sizeof (void *));
412
413 return (abd);
414}
415
fc551d7e
BA
416/*
417 * Initialize the abd_iter.
418 */
419void
420abd_iter_init(struct abd_iter *aiter, abd_t *abd)
421{
fb822260 422 ASSERT(!abd_is_gang(abd));
fc551d7e
BA
423 abd_verify(abd);
424 aiter->iter_abd = abd;
425 aiter->iter_pos = 0;
426 aiter->iter_mapaddr = NULL;
427 aiter->iter_mapsize = 0;
428}
429
430/*
431 * This is just a helper function to see if we have exhausted the
432 * abd_iter and reached the end.
433 */
434boolean_t
435abd_iter_at_end(struct abd_iter *aiter)
436{
437 return (aiter->iter_pos == aiter->iter_abd->abd_size);
438}
439
440/*
441 * Advance the iterator by a certain amount. Cannot be called when a chunk is
442 * in use. This can be safely called when the aiter has already exhausted, in
443 * which case this does nothing.
444 */
445void
446abd_iter_advance(struct abd_iter *aiter, size_t amount)
447{
448 ASSERT3P(aiter->iter_mapaddr, ==, NULL);
449 ASSERT0(aiter->iter_mapsize);
450
451 /* There's nothing left to advance to, so do nothing */
452 if (abd_iter_at_end(aiter))
453 return;
454
455 aiter->iter_pos += amount;
456}
457
458/*
459 * Map the current chunk into aiter. This can be safely called when the aiter
460 * has already exhausted, in which case this does nothing.
461 */
462void
463abd_iter_map(struct abd_iter *aiter)
464{
465 void *paddr;
fc551d7e
BA
466
467 ASSERT3P(aiter->iter_mapaddr, ==, NULL);
468 ASSERT0(aiter->iter_mapsize);
469
fc551d7e
BA
470 /* There's nothing left to iterate over, so do nothing */
471 if (abd_iter_at_end(aiter))
472 return;
473
bdd11cbb
AM
474 abd_t *abd = aiter->iter_abd;
475 size_t offset = aiter->iter_pos;
476 if (abd_is_linear(abd)) {
477 aiter->iter_mapsize = abd->abd_size - offset;
478 paddr = ABD_LINEAR_BUF(abd);
fc551d7e 479 } else {
bdd11cbb
AM
480 offset += ABD_SCATTER(abd).abd_offset;
481 paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT];
482 offset &= PAGE_MASK;
483 aiter->iter_mapsize = MIN(PAGE_SIZE - offset,
484 abd->abd_size - aiter->iter_pos);
fc551d7e
BA
485 }
486 aiter->iter_mapaddr = (char *)paddr + offset;
487}
488
489/*
490 * Unmap the current chunk from aiter. This can be safely called when the aiter
491 * has already exhausted, in which case this does nothing.
492 */
493void
494abd_iter_unmap(struct abd_iter *aiter)
495{
bdd11cbb
AM
496 if (!abd_iter_at_end(aiter)) {
497 ASSERT3P(aiter->iter_mapaddr, !=, NULL);
498 ASSERT3U(aiter->iter_mapsize, >, 0);
499 }
fc551d7e
BA
500
501 aiter->iter_mapaddr = NULL;
502 aiter->iter_mapsize = 0;
503}
7564073e
MM
504
505void
506abd_cache_reap_now(void)
507{
508 kmem_cache_reap_soon(abd_chunk_cache);
509}