]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
34dc7c2f BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
9babb374 | 22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
34dc7c2f | 23 | * Use is subject to license terms. |
fc897b24 | 24 | * Copyright (C) 2016 Gvozden Nešković. All rights reserved. |
34dc7c2f | 25 | */ |
3c67d83a TH |
26 | /* |
27 | * Copyright 2013 Saso Kiselkov. All rights reserved. | |
28 | */ | |
34dc7c2f | 29 | |
a6255b7f DQ |
30 | /* |
31 | * Copyright (c) 2016 by Delphix. All rights reserved. | |
32 | */ | |
33 | ||
9babb374 BB |
34 | /* |
35 | * Fletcher Checksums | |
36 | * ------------------ | |
37 | * | |
38 | * ZFS's 2nd and 4th order Fletcher checksums are defined by the following | |
39 | * recurrence relations: | |
40 | * | |
41 | * a = a + f | |
42 | * i i-1 i-1 | |
43 | * | |
44 | * b = b + a | |
45 | * i i-1 i | |
46 | * | |
47 | * c = c + b (fletcher-4 only) | |
48 | * i i-1 i | |
49 | * | |
50 | * d = d + c (fletcher-4 only) | |
51 | * i i-1 i | |
52 | * | |
53 | * Where | |
54 | * a_0 = b_0 = c_0 = d_0 = 0 | |
55 | * and | |
56 | * f_0 .. f_(n-1) are the input data. | |
57 | * | |
58 | * Using standard techniques, these translate into the following series: | |
59 | * | |
60 | * __n_ __n_ | |
61 | * \ | \ | | |
62 | * a = > f b = > i * f | |
63 | * n /___| n - i n /___| n - i | |
64 | * i = 1 i = 1 | |
65 | * | |
66 | * | |
67 | * __n_ __n_ | |
68 | * \ | i*(i+1) \ | i*(i+1)*(i+2) | |
69 | * c = > ------- f d = > ------------- f | |
70 | * n /___| 2 n - i n /___| 6 n - i | |
71 | * i = 1 i = 1 | |
72 | * | |
73 | * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators. | |
74 | * Since the additions are done mod (2^64), errors in the high bits may not | |
75 | * be noticed. For this reason, fletcher-2 is deprecated. | |
76 | * | |
77 | * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators. | |
78 | * A conservative estimate of how big the buffer can get before we overflow | |
79 | * can be estimated using f_i = 0xffffffff for all i: | |
80 | * | |
81 | * % bc | |
82 | * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4 | |
83 | * 2264 | |
84 | * quit | |
85 | * % | |
86 | * | |
87 | * So blocks of up to 2k will not overflow. Our largest block size is | |
88 | * 128k, which has 32k 4-byte words, so we can compute the largest possible | |
89 | * accumulators, then divide by 2^64 to figure the max amount of overflow: | |
90 | * | |
91 | * % bc | |
92 | * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c } | |
93 | * a/2^64;b/2^64;c/2^64;d/2^64 | |
94 | * 0 | |
95 | * 0 | |
96 | * 1365 | |
97 | * 11186858 | |
98 | * quit | |
99 | * % | |
100 | * | |
101 | * So a and b cannot overflow. To make sure each bit of input has some | |
102 | * effect on the contents of c and d, we can look at what the factors of | |
103 | * the coefficients in the equations for c_n and d_n are. The number of 2s | |
104 | * in the factors determines the lowest set bit in the multiplier. Running | |
105 | * through the cases for n*(n+1)/2 reveals that the highest power of 2 is | |
106 | * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow | |
107 | * the 64-bit accumulators, every bit of every f_i effects every accumulator, | |
108 | * even for 128k blocks. | |
109 | * | |
110 | * If we wanted to make a stronger version of fletcher4 (fletcher4c?), | |
111 | * we could do our calculations mod (2^32 - 1) by adding in the carries | |
112 | * periodically, and store the number of carries in the top 32-bits. | |
113 | * | |
114 | * -------------------- | |
115 | * Checksum Performance | |
116 | * -------------------- | |
117 | * | |
118 | * There are two interesting components to checksum performance: cached and | |
119 | * uncached performance. With cached data, fletcher-2 is about four times | |
120 | * faster than fletcher-4. With uncached data, the performance difference is | |
121 | * negligible, since the cost of a cache fill dominates the processing time. | |
122 | * Even though fletcher-4 is slower than fletcher-2, it is still a pretty | |
123 | * efficient pass over the data. | |
124 | * | |
125 | * In normal operation, the data which is being checksummed is in a buffer | |
126 | * which has been filled either by: | |
127 | * | |
128 | * 1. a compression step, which will be mostly cached, or | |
861166b0 AZ |
129 | * 2. a memcpy() or copyin(), which will be uncached |
130 | * (because the copy is cache-bypassing). | |
9babb374 BB |
131 | * |
132 | * For both cached and uncached data, both fletcher checksums are much faster | |
133 | * than sha-256, and slower than 'off', which doesn't touch the data at all. | |
134 | */ | |
34dc7c2f BB |
135 | |
136 | #include <sys/types.h> | |
137 | #include <sys/sysmacros.h> | |
138 | #include <sys/byteorder.h> | |
006e9a40 | 139 | #include <sys/simd.h> |
a3f82aec | 140 | #include <sys/spa.h> |
fc897b24 | 141 | #include <sys/zio_checksum.h> |
1eeb4562 JX |
142 | #include <sys/zfs_context.h> |
143 | #include <zfs_fletcher.h> | |
144 | ||
2fe36b0b | 145 | #define FLETCHER_MIN_SIMD_SIZE 64 |
fc897b24 | 146 | |
5bf703b8 GN |
147 | static void fletcher_4_scalar_init(fletcher_4_ctx_t *ctx); |
148 | static void fletcher_4_scalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp); | |
149 | static void fletcher_4_scalar_native(fletcher_4_ctx_t *ctx, | |
150 | const void *buf, uint64_t size); | |
151 | static void fletcher_4_scalar_byteswap(fletcher_4_ctx_t *ctx, | |
152 | const void *buf, uint64_t size); | |
1eeb4562 JX |
153 | static boolean_t fletcher_4_scalar_valid(void); |
154 | ||
155 | static const fletcher_4_ops_t fletcher_4_scalar_ops = { | |
fc897b24 | 156 | .init_native = fletcher_4_scalar_init, |
5bf703b8 | 157 | .fini_native = fletcher_4_scalar_fini, |
fc897b24 GN |
158 | .compute_native = fletcher_4_scalar_native, |
159 | .init_byteswap = fletcher_4_scalar_init, | |
5bf703b8 | 160 | .fini_byteswap = fletcher_4_scalar_fini, |
1eeb4562 JX |
161 | .compute_byteswap = fletcher_4_scalar_byteswap, |
162 | .valid = fletcher_4_scalar_valid, | |
78289b84 | 163 | .uses_fpu = B_FALSE, |
1eeb4562 JX |
164 | .name = "scalar" |
165 | }; | |
166 | ||
fc897b24 GN |
167 | static fletcher_4_ops_t fletcher_4_fastest_impl = { |
168 | .name = "fastest", | |
169 | .valid = fletcher_4_scalar_valid | |
170 | }; | |
171 | ||
172 | static const fletcher_4_ops_t *fletcher_4_impls[] = { | |
1eeb4562 | 173 | &fletcher_4_scalar_ops, |
7f319493 RD |
174 | &fletcher_4_superscalar_ops, |
175 | &fletcher_4_superscalar4_ops, | |
35a76a03 TS |
176 | #if defined(HAVE_SSE2) |
177 | &fletcher_4_sse2_ops, | |
178 | #endif | |
179 | #if defined(HAVE_SSE2) && defined(HAVE_SSSE3) | |
180 | &fletcher_4_ssse3_ops, | |
181 | #endif | |
1eeb4562 JX |
182 | #if defined(HAVE_AVX) && defined(HAVE_AVX2) |
183 | &fletcher_4_avx2_ops, | |
184 | #endif | |
70b258fc GN |
185 | #if defined(__x86_64) && defined(HAVE_AVX512F) |
186 | &fletcher_4_avx512f_ops, | |
187 | #endif | |
0b2a6423 RD |
188 | #if defined(__x86_64) && defined(HAVE_AVX512BW) |
189 | &fletcher_4_avx512bw_ops, | |
190 | #endif | |
2dbad447 | 191 | #if defined(__aarch64__) && !defined(__FreeBSD__) |
24cdeaf1 RD |
192 | &fletcher_4_aarch64_neon_ops, |
193 | #endif | |
1eeb4562 JX |
194 | }; |
195 | ||
fc897b24 GN |
196 | /* Hold all supported implementations */ |
197 | static uint32_t fletcher_4_supp_impls_cnt = 0; | |
198 | static fletcher_4_ops_t *fletcher_4_supp_impls[ARRAY_SIZE(fletcher_4_impls)]; | |
199 | ||
200 | /* Select fletcher4 implementation */ | |
201 | #define IMPL_FASTEST (UINT32_MAX) | |
202 | #define IMPL_CYCLE (UINT32_MAX - 1) | |
203 | #define IMPL_SCALAR (0) | |
204 | ||
205 | static uint32_t fletcher_4_impl_chosen = IMPL_FASTEST; | |
206 | ||
207 | #define IMPL_READ(i) (*(volatile uint32_t *) &(i)) | |
1eeb4562 JX |
208 | |
209 | static struct fletcher_4_impl_selector { | |
fc897b24 GN |
210 | const char *fis_name; |
211 | uint32_t fis_sel; | |
1eeb4562 | 212 | } fletcher_4_impl_selectors[] = { |
fc897b24 | 213 | { "cycle", IMPL_CYCLE }, |
fc897b24 GN |
214 | { "fastest", IMPL_FASTEST }, |
215 | { "scalar", IMPL_SCALAR } | |
1eeb4562 JX |
216 | }; |
217 | ||
46364cb2 | 218 | #if defined(_KERNEL) |
1eeb4562 JX |
219 | static kstat_t *fletcher_4_kstat; |
220 | ||
fc897b24 GN |
221 | static struct fletcher_4_kstat { |
222 | uint64_t native; | |
223 | uint64_t byteswap; | |
224 | } fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1]; | |
e5db3134 | 225 | #endif |
fc897b24 GN |
226 | |
227 | /* Indicate that benchmark has been completed */ | |
228 | static boolean_t fletcher_4_initialized = B_FALSE; | |
34dc7c2f BB |
229 | |
230 | void | |
a6255b7f DQ |
231 | fletcher_init(zio_cksum_t *zcp) |
232 | { | |
233 | ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); | |
234 | } | |
235 | ||
236 | int | |
237 | fletcher_2_incremental_native(void *buf, size_t size, void *data) | |
34dc7c2f | 238 | { |
a6255b7f DQ |
239 | zio_cksum_t *zcp = data; |
240 | ||
34dc7c2f BB |
241 | const uint64_t *ip = buf; |
242 | const uint64_t *ipend = ip + (size / sizeof (uint64_t)); | |
243 | uint64_t a0, b0, a1, b1; | |
244 | ||
a6255b7f DQ |
245 | a0 = zcp->zc_word[0]; |
246 | a1 = zcp->zc_word[1]; | |
247 | b0 = zcp->zc_word[2]; | |
248 | b1 = zcp->zc_word[3]; | |
249 | ||
250 | for (; ip < ipend; ip += 2) { | |
34dc7c2f BB |
251 | a0 += ip[0]; |
252 | a1 += ip[1]; | |
253 | b0 += a0; | |
254 | b1 += a1; | |
255 | } | |
256 | ||
257 | ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); | |
a6255b7f | 258 | return (0); |
34dc7c2f BB |
259 | } |
260 | ||
261 | void | |
a6255b7f | 262 | fletcher_2_native(const void *buf, uint64_t size, |
3c67d83a | 263 | const void *ctx_template, zio_cksum_t *zcp) |
34dc7c2f | 264 | { |
1f182103 | 265 | (void) ctx_template; |
a6255b7f DQ |
266 | fletcher_init(zcp); |
267 | (void) fletcher_2_incremental_native((void *) buf, size, zcp); | |
268 | } | |
269 | ||
270 | int | |
271 | fletcher_2_incremental_byteswap(void *buf, size_t size, void *data) | |
272 | { | |
273 | zio_cksum_t *zcp = data; | |
274 | ||
34dc7c2f BB |
275 | const uint64_t *ip = buf; |
276 | const uint64_t *ipend = ip + (size / sizeof (uint64_t)); | |
277 | uint64_t a0, b0, a1, b1; | |
278 | ||
a6255b7f DQ |
279 | a0 = zcp->zc_word[0]; |
280 | a1 = zcp->zc_word[1]; | |
281 | b0 = zcp->zc_word[2]; | |
282 | b1 = zcp->zc_word[3]; | |
283 | ||
284 | for (; ip < ipend; ip += 2) { | |
34dc7c2f BB |
285 | a0 += BSWAP_64(ip[0]); |
286 | a1 += BSWAP_64(ip[1]); | |
287 | b0 += a0; | |
288 | b1 += a1; | |
289 | } | |
290 | ||
291 | ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); | |
a6255b7f DQ |
292 | return (0); |
293 | } | |
294 | ||
a6255b7f DQ |
295 | void |
296 | fletcher_2_byteswap(const void *buf, uint64_t size, | |
297 | const void *ctx_template, zio_cksum_t *zcp) | |
298 | { | |
1f182103 | 299 | (void) ctx_template; |
a6255b7f DQ |
300 | fletcher_init(zcp); |
301 | (void) fletcher_2_incremental_byteswap((void *) buf, size, zcp); | |
34dc7c2f BB |
302 | } |
303 | ||
fc897b24 | 304 | static void |
5bf703b8 | 305 | fletcher_4_scalar_init(fletcher_4_ctx_t *ctx) |
34dc7c2f | 306 | { |
5bf703b8 GN |
307 | ZIO_SET_CHECKSUM(&ctx->scalar, 0, 0, 0, 0); |
308 | } | |
309 | ||
310 | static void | |
311 | fletcher_4_scalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp) | |
312 | { | |
313 | memcpy(zcp, &ctx->scalar, sizeof (zio_cksum_t)); | |
34dc7c2f BB |
314 | } |
315 | ||
1eeb4562 | 316 | static void |
5bf703b8 GN |
317 | fletcher_4_scalar_native(fletcher_4_ctx_t *ctx, const void *buf, |
318 | uint64_t size) | |
34dc7c2f BB |
319 | { |
320 | const uint32_t *ip = buf; | |
321 | const uint32_t *ipend = ip + (size / sizeof (uint32_t)); | |
322 | uint64_t a, b, c, d; | |
323 | ||
5bf703b8 GN |
324 | a = ctx->scalar.zc_word[0]; |
325 | b = ctx->scalar.zc_word[1]; | |
326 | c = ctx->scalar.zc_word[2]; | |
327 | d = ctx->scalar.zc_word[3]; | |
1eeb4562 JX |
328 | |
329 | for (; ip < ipend; ip++) { | |
330 | a += ip[0]; | |
34dc7c2f BB |
331 | b += a; |
332 | c += b; | |
333 | d += c; | |
334 | } | |
335 | ||
5bf703b8 | 336 | ZIO_SET_CHECKSUM(&ctx->scalar, a, b, c, d); |
34dc7c2f BB |
337 | } |
338 | ||
1eeb4562 | 339 | static void |
5bf703b8 GN |
340 | fletcher_4_scalar_byteswap(fletcher_4_ctx_t *ctx, const void *buf, |
341 | uint64_t size) | |
34dc7c2f BB |
342 | { |
343 | const uint32_t *ip = buf; | |
344 | const uint32_t *ipend = ip + (size / sizeof (uint32_t)); | |
345 | uint64_t a, b, c, d; | |
346 | ||
5bf703b8 GN |
347 | a = ctx->scalar.zc_word[0]; |
348 | b = ctx->scalar.zc_word[1]; | |
349 | c = ctx->scalar.zc_word[2]; | |
350 | d = ctx->scalar.zc_word[3]; | |
34dc7c2f BB |
351 | |
352 | for (; ip < ipend; ip++) { | |
1eeb4562 | 353 | a += BSWAP_32(ip[0]); |
34dc7c2f BB |
354 | b += a; |
355 | c += b; | |
356 | d += c; | |
357 | } | |
358 | ||
5bf703b8 | 359 | ZIO_SET_CHECKSUM(&ctx->scalar, a, b, c, d); |
34dc7c2f BB |
360 | } |
361 | ||
1eeb4562 JX |
362 | static boolean_t |
363 | fletcher_4_scalar_valid(void) | |
364 | { | |
365 | return (B_TRUE); | |
366 | } | |
367 | ||
368 | int | |
369 | fletcher_4_impl_set(const char *val) | |
370 | { | |
fc897b24 GN |
371 | int err = -EINVAL; |
372 | uint32_t impl = IMPL_READ(fletcher_4_impl_chosen); | |
373 | size_t i, val_len; | |
1eeb4562 JX |
374 | |
375 | val_len = strlen(val); | |
376 | while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */ | |
377 | val_len--; | |
378 | ||
fc897b24 | 379 | /* check mandatory implementations */ |
1eeb4562 JX |
380 | for (i = 0; i < ARRAY_SIZE(fletcher_4_impl_selectors); i++) { |
381 | const char *name = fletcher_4_impl_selectors[i].fis_name; | |
382 | ||
383 | if (val_len == strlen(name) && | |
384 | strncmp(val, name, val_len) == 0) { | |
fc897b24 GN |
385 | impl = fletcher_4_impl_selectors[i].fis_sel; |
386 | err = 0; | |
1eeb4562 JX |
387 | break; |
388 | } | |
389 | } | |
1eeb4562 | 390 | |
fc897b24 GN |
391 | if (err != 0 && fletcher_4_initialized) { |
392 | /* check all supported implementations */ | |
393 | for (i = 0; i < fletcher_4_supp_impls_cnt; i++) { | |
394 | const char *name = fletcher_4_supp_impls[i]->name; | |
1eeb4562 | 395 | |
fc897b24 GN |
396 | if (val_len == strlen(name) && |
397 | strncmp(val, name, val_len) == 0) { | |
398 | impl = i; | |
399 | err = 0; | |
400 | break; | |
401 | } | |
402 | } | |
403 | } | |
1eeb4562 | 404 | |
fc897b24 GN |
405 | if (err == 0) { |
406 | atomic_swap_32(&fletcher_4_impl_chosen, impl); | |
407 | membar_producer(); | |
408 | } | |
409 | ||
410 | return (err); | |
1eeb4562 JX |
411 | } |
412 | ||
e5db3134 BB |
413 | /* |
414 | * Returns the Fletcher 4 operations for checksums. When a SIMD | |
415 | * implementation is not allowed in the current context, then fallback | |
416 | * to the fastest generic implementation. | |
417 | */ | |
1eeb4562 JX |
418 | static inline const fletcher_4_ops_t * |
419 | fletcher_4_impl_get(void) | |
420 | { | |
e5db3134 BB |
421 | if (!kfpu_allowed()) |
422 | return (&fletcher_4_superscalar4_ops); | |
423 | ||
424 | const fletcher_4_ops_t *ops = NULL; | |
425 | uint32_t impl = IMPL_READ(fletcher_4_impl_chosen); | |
fc897b24 GN |
426 | |
427 | switch (impl) { | |
428 | case IMPL_FASTEST: | |
429 | ASSERT(fletcher_4_initialized); | |
430 | ops = &fletcher_4_fastest_impl; | |
431 | break; | |
e5db3134 BB |
432 | case IMPL_CYCLE: |
433 | /* Cycle through supported implementations */ | |
fc897b24 GN |
434 | ASSERT(fletcher_4_initialized); |
435 | ASSERT3U(fletcher_4_supp_impls_cnt, >, 0); | |
fc897b24 GN |
436 | static uint32_t cycle_count = 0; |
437 | uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt; | |
438 | ops = fletcher_4_supp_impls[idx]; | |
e5db3134 | 439 | break; |
fc897b24 GN |
440 | default: |
441 | ASSERT3U(fletcher_4_supp_impls_cnt, >, 0); | |
442 | ASSERT3U(impl, <, fletcher_4_supp_impls_cnt); | |
fc897b24 GN |
443 | ops = fletcher_4_supp_impls[impl]; |
444 | break; | |
445 | } | |
446 | ||
447 | ASSERT3P(ops, !=, NULL); | |
448 | ||
449 | return (ops); | |
450 | } | |
451 | ||
fc897b24 | 452 | static inline void |
5bf703b8 | 453 | fletcher_4_native_impl(const void *buf, uint64_t size, zio_cksum_t *zcp) |
fc897b24 | 454 | { |
5bf703b8 GN |
455 | fletcher_4_ctx_t ctx; |
456 | const fletcher_4_ops_t *ops = fletcher_4_impl_get(); | |
457 | ||
78289b84 AF |
458 | if (ops->uses_fpu == B_TRUE) { |
459 | kfpu_begin(); | |
460 | } | |
5bf703b8 GN |
461 | ops->init_native(&ctx); |
462 | ops->compute_native(&ctx, buf, size); | |
463 | ops->fini_native(&ctx, zcp); | |
78289b84 AF |
464 | if (ops->uses_fpu == B_TRUE) { |
465 | kfpu_end(); | |
466 | } | |
1eeb4562 JX |
467 | } |
468 | ||
469 | void | |
3c67d83a TH |
470 | fletcher_4_native(const void *buf, uint64_t size, |
471 | const void *ctx_template, zio_cksum_t *zcp) | |
1eeb4562 | 472 | { |
1f182103 | 473 | (void) ctx_template; |
41ae864b | 474 | const uint64_t p2size = P2ALIGN_TYPED(size, FLETCHER_MIN_SIMD_SIZE, |
475 | uint64_t); | |
0dab2e84 | 476 | |
fc897b24 GN |
477 | ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t))); |
478 | ||
5bf703b8 | 479 | if (size == 0 || p2size == 0) { |
fc897b24 | 480 | ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); |
5bf703b8 GN |
481 | |
482 | if (size > 0) | |
483 | fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp, | |
484 | buf, size); | |
fc897b24 | 485 | } else { |
5bf703b8 | 486 | fletcher_4_native_impl(buf, p2size, zcp); |
1eeb4562 | 487 | |
fc897b24 | 488 | if (p2size < size) |
5bf703b8 GN |
489 | fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp, |
490 | (char *)buf + p2size, size - p2size); | |
fc897b24 GN |
491 | } |
492 | } | |
493 | ||
494 | void | |
495 | fletcher_4_native_varsize(const void *buf, uint64_t size, zio_cksum_t *zcp) | |
496 | { | |
5bf703b8 GN |
497 | ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); |
498 | fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp, buf, size); | |
fc897b24 GN |
499 | } |
500 | ||
501 | static inline void | |
5bf703b8 | 502 | fletcher_4_byteswap_impl(const void *buf, uint64_t size, zio_cksum_t *zcp) |
fc897b24 | 503 | { |
5bf703b8 GN |
504 | fletcher_4_ctx_t ctx; |
505 | const fletcher_4_ops_t *ops = fletcher_4_impl_get(); | |
506 | ||
78289b84 AF |
507 | if (ops->uses_fpu == B_TRUE) { |
508 | kfpu_begin(); | |
509 | } | |
5bf703b8 GN |
510 | ops->init_byteswap(&ctx); |
511 | ops->compute_byteswap(&ctx, buf, size); | |
512 | ops->fini_byteswap(&ctx, zcp); | |
78289b84 AF |
513 | if (ops->uses_fpu == B_TRUE) { |
514 | kfpu_end(); | |
515 | } | |
1eeb4562 JX |
516 | } |
517 | ||
518 | void | |
3c67d83a TH |
519 | fletcher_4_byteswap(const void *buf, uint64_t size, |
520 | const void *ctx_template, zio_cksum_t *zcp) | |
1eeb4562 | 521 | { |
1f182103 | 522 | (void) ctx_template; |
41ae864b | 523 | const uint64_t p2size = P2ALIGN_TYPED(size, FLETCHER_MIN_SIMD_SIZE, |
524 | uint64_t); | |
0dab2e84 | 525 | |
fc897b24 GN |
526 | ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t))); |
527 | ||
5bf703b8 | 528 | if (size == 0 || p2size == 0) { |
fc897b24 | 529 | ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); |
5bf703b8 GN |
530 | |
531 | if (size > 0) | |
532 | fletcher_4_scalar_byteswap((fletcher_4_ctx_t *)zcp, | |
533 | buf, size); | |
fc897b24 | 534 | } else { |
5bf703b8 | 535 | fletcher_4_byteswap_impl(buf, p2size, zcp); |
1eeb4562 | 536 | |
fc897b24 | 537 | if (p2size < size) |
5bf703b8 GN |
538 | fletcher_4_scalar_byteswap((fletcher_4_ctx_t *)zcp, |
539 | (char *)buf + p2size, size - p2size); | |
fc897b24 | 540 | } |
1eeb4562 JX |
541 | } |
542 | ||
37f520db GN |
543 | /* Incremental Fletcher 4 */ |
544 | ||
5bf703b8 GN |
545 | #define ZFS_FLETCHER_4_INC_MAX_SIZE (8ULL << 20) |
546 | ||
37f520db GN |
547 | static inline void |
548 | fletcher_4_incremental_combine(zio_cksum_t *zcp, const uint64_t size, | |
549 | const zio_cksum_t *nzcp) | |
550 | { | |
551 | const uint64_t c1 = size / sizeof (uint32_t); | |
552 | const uint64_t c2 = c1 * (c1 + 1) / 2; | |
553 | const uint64_t c3 = c2 * (c1 + 2) / 3; | |
554 | ||
5bf703b8 GN |
555 | /* |
556 | * Value of 'c3' overflows on buffer sizes close to 16MiB. For that | |
557 | * reason we split incremental fletcher4 computation of large buffers | |
558 | * to steps of (ZFS_FLETCHER_4_INC_MAX_SIZE) size. | |
559 | */ | |
560 | ASSERT3U(size, <=, ZFS_FLETCHER_4_INC_MAX_SIZE); | |
561 | ||
37f520db GN |
562 | zcp->zc_word[3] += nzcp->zc_word[3] + c1 * zcp->zc_word[2] + |
563 | c2 * zcp->zc_word[1] + c3 * zcp->zc_word[0]; | |
564 | zcp->zc_word[2] += nzcp->zc_word[2] + c1 * zcp->zc_word[1] + | |
565 | c2 * zcp->zc_word[0]; | |
566 | zcp->zc_word[1] += nzcp->zc_word[1] + c1 * zcp->zc_word[0]; | |
567 | zcp->zc_word[0] += nzcp->zc_word[0]; | |
568 | } | |
569 | ||
570 | static inline void | |
571 | fletcher_4_incremental_impl(boolean_t native, const void *buf, uint64_t size, | |
572 | zio_cksum_t *zcp) | |
573 | { | |
37f520db GN |
574 | while (size > 0) { |
575 | zio_cksum_t nzc; | |
5bf703b8 | 576 | uint64_t len = MIN(size, ZFS_FLETCHER_4_INC_MAX_SIZE); |
37f520db GN |
577 | |
578 | if (native) | |
579 | fletcher_4_native(buf, len, NULL, &nzc); | |
580 | else | |
581 | fletcher_4_byteswap(buf, len, NULL, &nzc); | |
582 | ||
583 | fletcher_4_incremental_combine(zcp, len, &nzc); | |
584 | ||
585 | size -= len; | |
586 | buf += len; | |
587 | } | |
588 | } | |
589 | ||
a6255b7f DQ |
590 | int |
591 | fletcher_4_incremental_native(void *buf, size_t size, void *data) | |
37f520db | 592 | { |
a6255b7f | 593 | zio_cksum_t *zcp = data; |
5bf703b8 GN |
594 | /* Use scalar impl to directly update cksum of small blocks */ |
595 | if (size < SPA_MINBLOCKSIZE) | |
596 | fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp, buf, size); | |
597 | else | |
598 | fletcher_4_incremental_impl(B_TRUE, buf, size, zcp); | |
a6255b7f | 599 | return (0); |
37f520db GN |
600 | } |
601 | ||
a6255b7f DQ |
602 | int |
603 | fletcher_4_incremental_byteswap(void *buf, size_t size, void *data) | |
37f520db | 604 | { |
a6255b7f | 605 | zio_cksum_t *zcp = data; |
5bf703b8 GN |
606 | /* Use scalar impl to directly update cksum of small blocks */ |
607 | if (size < SPA_MINBLOCKSIZE) | |
608 | fletcher_4_scalar_byteswap((fletcher_4_ctx_t *)zcp, buf, size); | |
609 | else | |
610 | fletcher_4_incremental_impl(B_FALSE, buf, size, zcp); | |
a6255b7f | 611 | return (0); |
37f520db GN |
612 | } |
613 | ||
46364cb2 | 614 | #if defined(_KERNEL) |
8062b768 BB |
615 | /* |
616 | * Fletcher 4 kstats | |
617 | */ | |
fc897b24 GN |
618 | static int |
619 | fletcher_4_kstat_headers(char *buf, size_t size) | |
1eeb4562 | 620 | { |
fc897b24 GN |
621 | ssize_t off = 0; |
622 | ||
623 | off += snprintf(buf + off, size, "%-17s", "implementation"); | |
624 | off += snprintf(buf + off, size - off, "%-15s", "native"); | |
625 | (void) snprintf(buf + off, size - off, "%-15s\n", "byteswap"); | |
626 | ||
627 | return (0); | |
1eeb4562 JX |
628 | } |
629 | ||
fc897b24 GN |
630 | static int |
631 | fletcher_4_kstat_data(char *buf, size_t size, void *data) | |
34dc7c2f | 632 | { |
fc897b24 GN |
633 | struct fletcher_4_kstat *fastest_stat = |
634 | &fletcher_4_stat_data[fletcher_4_supp_impls_cnt]; | |
02730c33 | 635 | struct fletcher_4_kstat *curr_stat = (struct fletcher_4_kstat *)data; |
fc897b24 GN |
636 | ssize_t off = 0; |
637 | ||
638 | if (curr_stat == fastest_stat) { | |
639 | off += snprintf(buf + off, size - off, "%-17s", "fastest"); | |
640 | off += snprintf(buf + off, size - off, "%-15s", | |
641 | fletcher_4_supp_impls[fastest_stat->native]->name); | |
2e7f664f | 642 | (void) snprintf(buf + off, size - off, "%-15s\n", |
fc897b24 GN |
643 | fletcher_4_supp_impls[fastest_stat->byteswap]->name); |
644 | } else { | |
645 | ptrdiff_t id = curr_stat - fletcher_4_stat_data; | |
646 | ||
647 | off += snprintf(buf + off, size - off, "%-17s", | |
648 | fletcher_4_supp_impls[id]->name); | |
649 | off += snprintf(buf + off, size - off, "%-15llu", | |
02730c33 | 650 | (u_longlong_t)curr_stat->native); |
2e7f664f | 651 | (void) snprintf(buf + off, size - off, "%-15llu\n", |
02730c33 | 652 | (u_longlong_t)curr_stat->byteswap); |
fc897b24 GN |
653 | } |
654 | ||
655 | return (0); | |
1eeb4562 | 656 | } |
34dc7c2f | 657 | |
fc897b24 GN |
658 | static void * |
659 | fletcher_4_kstat_addr(kstat_t *ksp, loff_t n) | |
1eeb4562 | 660 | { |
fc897b24 GN |
661 | if (n <= fletcher_4_supp_impls_cnt) |
662 | ksp->ks_private = (void *) (fletcher_4_stat_data + n); | |
663 | else | |
664 | ksp->ks_private = NULL; | |
665 | ||
666 | return (ksp->ks_private); | |
667 | } | |
46364cb2 | 668 | #endif |
fc897b24 GN |
669 | |
670 | #define FLETCHER_4_FASTEST_FN_COPY(type, src) \ | |
671 | { \ | |
672 | fletcher_4_fastest_impl.init_ ## type = src->init_ ## type; \ | |
673 | fletcher_4_fastest_impl.fini_ ## type = src->fini_ ## type; \ | |
674 | fletcher_4_fastest_impl.compute_ ## type = src->compute_ ## type; \ | |
78289b84 | 675 | fletcher_4_fastest_impl.uses_fpu = src->uses_fpu; \ |
fc897b24 GN |
676 | } |
677 | ||
83b698dc | 678 | #define FLETCHER_4_BENCH_NS (MSEC2NSEC(1)) /* 1ms */ |
34dc7c2f | 679 | |
a6255b7f DQ |
680 | typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *, |
681 | zio_cksum_t *); | |
682 | ||
e5db3134 | 683 | #if defined(_KERNEL) |
fc897b24 GN |
684 | static void |
685 | fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) | |
686 | { | |
687 | ||
688 | struct fletcher_4_kstat *fastest_stat = | |
689 | &fletcher_4_stat_data[fletcher_4_supp_impls_cnt]; | |
690 | hrtime_t start; | |
691 | uint64_t run_bw, run_time_ns, best_run = 0; | |
692 | zio_cksum_t zc; | |
693 | uint32_t i, l, sel_save = IMPL_READ(fletcher_4_impl_chosen); | |
694 | ||
a6255b7f DQ |
695 | fletcher_checksum_func_t *fletcher_4_test = native ? |
696 | fletcher_4_native : fletcher_4_byteswap; | |
1eeb4562 | 697 | |
fc897b24 GN |
698 | for (i = 0; i < fletcher_4_supp_impls_cnt; i++) { |
699 | struct fletcher_4_kstat *stat = &fletcher_4_stat_data[i]; | |
700 | uint64_t run_count = 0; | |
1eeb4562 | 701 | |
fc897b24 GN |
702 | /* temporary set an implementation */ |
703 | fletcher_4_impl_chosen = i; | |
1eeb4562 JX |
704 | |
705 | kpreempt_disable(); | |
706 | start = gethrtime(); | |
1eeb4562 | 707 | do { |
fc897b24 | 708 | for (l = 0; l < 32; l++, run_count++) |
3c67d83a | 709 | fletcher_4_test(data, data_size, NULL, &zc); |
fc897b24 GN |
710 | |
711 | run_time_ns = gethrtime() - start; | |
712 | } while (run_time_ns < FLETCHER_4_BENCH_NS); | |
1eeb4562 JX |
713 | kpreempt_enable(); |
714 | ||
fc897b24 GN |
715 | run_bw = data_size * run_count * NANOSEC; |
716 | run_bw /= run_time_ns; /* B/s */ | |
717 | ||
718 | if (native) | |
719 | stat->native = run_bw; | |
720 | else | |
721 | stat->byteswap = run_bw; | |
722 | ||
723 | if (run_bw > best_run) { | |
724 | best_run = run_bw; | |
725 | ||
726 | if (native) { | |
727 | fastest_stat->native = i; | |
728 | FLETCHER_4_FASTEST_FN_COPY(native, | |
729 | fletcher_4_supp_impls[i]); | |
730 | } else { | |
731 | fastest_stat->byteswap = i; | |
732 | FLETCHER_4_FASTEST_FN_COPY(byteswap, | |
733 | fletcher_4_supp_impls[i]); | |
734 | } | |
1eeb4562 | 735 | } |
fc897b24 GN |
736 | } |
737 | ||
738 | /* restore original selection */ | |
739 | atomic_swap_32(&fletcher_4_impl_chosen, sel_save); | |
740 | } | |
e5db3134 | 741 | #endif /* _KERNEL */ |
1eeb4562 | 742 | |
e5db3134 BB |
743 | /* |
744 | * Initialize and benchmark all supported implementations. | |
745 | */ | |
746 | static void | |
10fa2545 | 747 | fletcher_4_benchmark(void) |
fc897b24 | 748 | { |
fc897b24 | 749 | fletcher_4_ops_t *curr_impl; |
fc897b24 GN |
750 | int i, c; |
751 | ||
e5db3134 | 752 | /* Move supported implementations into fletcher_4_supp_impls */ |
fc897b24 | 753 | for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) { |
02730c33 | 754 | curr_impl = (fletcher_4_ops_t *)fletcher_4_impls[i]; |
fc897b24 GN |
755 | |
756 | if (curr_impl->valid && curr_impl->valid()) | |
757 | fletcher_4_supp_impls[c++] = curr_impl; | |
34dc7c2f | 758 | } |
fc897b24 GN |
759 | membar_producer(); /* complete fletcher_4_supp_impls[] init */ |
760 | fletcher_4_supp_impls_cnt = c; /* number of supported impl */ | |
34dc7c2f | 761 | |
e5db3134 BB |
762 | #if defined(_KERNEL) |
763 | static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */ | |
764 | char *databuf = vmem_alloc(data_size, KM_SLEEP); | |
1eeb4562 | 765 | |
fc897b24 GN |
766 | for (i = 0; i < data_size / sizeof (uint64_t); i++) |
767 | ((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */ | |
768 | ||
769 | fletcher_4_benchmark_impl(B_FALSE, databuf, data_size); | |
770 | fletcher_4_benchmark_impl(B_TRUE, databuf, data_size); | |
771 | ||
772 | vmem_free(databuf, data_size); | |
e5db3134 BB |
773 | #else |
774 | /* | |
775 | * Skip the benchmark in user space to avoid impacting libzpool | |
776 | * consumers (zdb, zhack, zinject, ztest). The last implementation | |
777 | * is assumed to be the fastest and used by default. | |
778 | */ | |
779 | memcpy(&fletcher_4_fastest_impl, | |
780 | fletcher_4_supp_impls[fletcher_4_supp_impls_cnt - 1], | |
781 | sizeof (fletcher_4_fastest_impl)); | |
782 | fletcher_4_fastest_impl.name = "fastest"; | |
783 | membar_producer(); | |
784 | #endif /* _KERNEL */ | |
785 | } | |
fc897b24 | 786 | |
e5db3134 BB |
787 | void |
788 | fletcher_4_init(void) | |
789 | { | |
10fa2545 BB |
790 | /* Determine the fastest available implementation. */ |
791 | fletcher_4_benchmark(); | |
e5db3134 | 792 | |
10fa2545 | 793 | #if defined(_KERNEL) |
e5db3134 | 794 | /* Install kstats for all implementations */ |
fc897b24 | 795 | fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc", |
02730c33 | 796 | KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); |
1eeb4562 | 797 | if (fletcher_4_kstat != NULL) { |
fc897b24 GN |
798 | fletcher_4_kstat->ks_data = NULL; |
799 | fletcher_4_kstat->ks_ndata = UINT32_MAX; | |
800 | kstat_set_raw_ops(fletcher_4_kstat, | |
801 | fletcher_4_kstat_headers, | |
802 | fletcher_4_kstat_data, | |
803 | fletcher_4_kstat_addr); | |
1eeb4562 JX |
804 | kstat_install(fletcher_4_kstat); |
805 | } | |
46364cb2 | 806 | #endif |
fc897b24 GN |
807 | |
808 | /* Finish initialization */ | |
809 | fletcher_4_initialized = B_TRUE; | |
1eeb4562 JX |
810 | } |
811 | ||
812 | void | |
813 | fletcher_4_fini(void) | |
814 | { | |
46364cb2 | 815 | #if defined(_KERNEL) |
1eeb4562 JX |
816 | if (fletcher_4_kstat != NULL) { |
817 | kstat_delete(fletcher_4_kstat); | |
818 | fletcher_4_kstat = NULL; | |
819 | } | |
46364cb2 | 820 | #endif |
34dc7c2f | 821 | } |
c28b2279 | 822 | |
2fe36b0b DQ |
823 | /* ABD adapters */ |
824 | ||
825 | static void | |
826 | abd_fletcher_4_init(zio_abd_checksum_data_t *cdp) | |
827 | { | |
828 | const fletcher_4_ops_t *ops = fletcher_4_impl_get(); | |
829 | cdp->acd_private = (void *) ops; | |
830 | ||
78289b84 AF |
831 | if (ops->uses_fpu == B_TRUE) { |
832 | kfpu_begin(); | |
833 | } | |
2fe36b0b DQ |
834 | if (cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE) |
835 | ops->init_native(cdp->acd_ctx); | |
836 | else | |
837 | ops->init_byteswap(cdp->acd_ctx); | |
78289b84 | 838 | |
2fe36b0b DQ |
839 | } |
840 | ||
841 | static void | |
842 | abd_fletcher_4_fini(zio_abd_checksum_data_t *cdp) | |
843 | { | |
844 | fletcher_4_ops_t *ops = (fletcher_4_ops_t *)cdp->acd_private; | |
845 | ||
846 | ASSERT(ops); | |
847 | ||
848 | if (cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE) | |
849 | ops->fini_native(cdp->acd_ctx, cdp->acd_zcp); | |
850 | else | |
851 | ops->fini_byteswap(cdp->acd_ctx, cdp->acd_zcp); | |
78289b84 AF |
852 | |
853 | if (ops->uses_fpu == B_TRUE) { | |
854 | kfpu_end(); | |
855 | } | |
2fe36b0b DQ |
856 | } |
857 | ||
78289b84 | 858 | |
2fe36b0b DQ |
859 | static void |
860 | abd_fletcher_4_simd2scalar(boolean_t native, void *data, size_t size, | |
861 | zio_abd_checksum_data_t *cdp) | |
862 | { | |
863 | zio_cksum_t *zcp = cdp->acd_zcp; | |
864 | ||
865 | ASSERT3U(size, <, FLETCHER_MIN_SIMD_SIZE); | |
866 | ||
867 | abd_fletcher_4_fini(cdp); | |
868 | cdp->acd_private = (void *)&fletcher_4_scalar_ops; | |
869 | ||
870 | if (native) | |
871 | fletcher_4_incremental_native(data, size, zcp); | |
872 | else | |
873 | fletcher_4_incremental_byteswap(data, size, zcp); | |
874 | } | |
875 | ||
876 | static int | |
877 | abd_fletcher_4_iter(void *data, size_t size, void *private) | |
878 | { | |
879 | zio_abd_checksum_data_t *cdp = (zio_abd_checksum_data_t *)private; | |
880 | fletcher_4_ctx_t *ctx = cdp->acd_ctx; | |
881 | fletcher_4_ops_t *ops = (fletcher_4_ops_t *)cdp->acd_private; | |
882 | boolean_t native = cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE; | |
41ae864b | 883 | uint64_t asize = P2ALIGN_TYPED(size, FLETCHER_MIN_SIMD_SIZE, uint64_t); |
2fe36b0b DQ |
884 | |
885 | ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t))); | |
886 | ||
887 | if (asize > 0) { | |
888 | if (native) | |
889 | ops->compute_native(ctx, data, asize); | |
890 | else | |
891 | ops->compute_byteswap(ctx, data, asize); | |
892 | ||
893 | size -= asize; | |
894 | data = (char *)data + asize; | |
895 | } | |
896 | ||
897 | if (size > 0) { | |
898 | ASSERT3U(size, <, FLETCHER_MIN_SIMD_SIZE); | |
899 | /* At this point we have to switch to scalar impl */ | |
900 | abd_fletcher_4_simd2scalar(native, data, size, cdp); | |
901 | } | |
902 | ||
903 | return (0); | |
904 | } | |
905 | ||
906 | zio_abd_checksum_func_t fletcher_4_abd_ops = { | |
907 | .acf_init = abd_fletcher_4_init, | |
908 | .acf_fini = abd_fletcher_4_fini, | |
909 | .acf_iter = abd_fletcher_4_iter | |
910 | }; | |
911 | ||
439dc034 RM |
912 | #if defined(_KERNEL) |
913 | ||
914 | #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") | |
2fe36b0b | 915 | |
439dc034 | 916 | #if defined(__linux__) |
1eeb4562 JX |
917 | |
918 | static int | |
9cc1844a | 919 | fletcher_4_param_get(char *buffer, zfs_kernel_param_t *unused) |
1eeb4562 | 920 | { |
fc897b24 GN |
921 | const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen); |
922 | char *fmt; | |
439dc034 | 923 | int cnt = 0; |
1eeb4562 | 924 | |
fc897b24 | 925 | /* list fastest */ |
439dc034 | 926 | fmt = IMPL_FMT(impl, IMPL_FASTEST); |
30367835 | 927 | cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest"); |
1eeb4562 | 928 | |
fc897b24 | 929 | /* list all supported implementations */ |
439dc034 RM |
930 | for (uint32_t i = 0; i < fletcher_4_supp_impls_cnt; ++i) { |
931 | fmt = IMPL_FMT(impl, i); | |
30367835 | 932 | cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, |
fc897b24 | 933 | fletcher_4_supp_impls[i]->name); |
1eeb4562 JX |
934 | } |
935 | ||
936 | return (cnt); | |
937 | } | |
938 | ||
939 | static int | |
9cc1844a | 940 | fletcher_4_param_set(const char *val, zfs_kernel_param_t *unused) |
1eeb4562 JX |
941 | { |
942 | return (fletcher_4_impl_set(val)); | |
943 | } | |
944 | ||
439dc034 RM |
945 | #else |
946 | ||
947 | #include <sys/sbuf.h> | |
948 | ||
949 | static int | |
950 | fletcher_4_param(ZFS_MODULE_PARAM_ARGS) | |
951 | { | |
952 | int err; | |
953 | ||
954 | if (req->newptr == NULL) { | |
955 | const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen); | |
956 | const int init_buflen = 64; | |
957 | const char *fmt; | |
958 | struct sbuf *s; | |
959 | ||
960 | s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); | |
961 | ||
962 | /* list fastest */ | |
963 | fmt = IMPL_FMT(impl, IMPL_FASTEST); | |
964 | (void) sbuf_printf(s, fmt, "fastest"); | |
965 | ||
966 | /* list all supported implementations */ | |
967 | for (uint32_t i = 0; i < fletcher_4_supp_impls_cnt; ++i) { | |
968 | fmt = IMPL_FMT(impl, i); | |
969 | (void) sbuf_printf(s, fmt, | |
970 | fletcher_4_supp_impls[i]->name); | |
971 | } | |
972 | ||
973 | err = sbuf_finish(s); | |
974 | sbuf_delete(s); | |
975 | ||
976 | return (err); | |
977 | } | |
978 | ||
979 | char buf[16]; | |
980 | ||
981 | err = sysctl_handle_string(oidp, buf, sizeof (buf), req); | |
982 | if (err) | |
983 | return (err); | |
984 | return (-fletcher_4_impl_set(buf)); | |
985 | } | |
986 | ||
987 | #endif | |
988 | ||
989 | #undef IMPL_FMT | |
990 | ||
1eeb4562 JX |
991 | /* |
992 | * Choose a fletcher 4 implementation in ZFS. | |
fc897b24 | 993 | * Users can choose "cycle" to exercise all implementations, but this is |
1eeb4562 JX |
994 | * for testing purpose therefore it can only be set in user space. |
995 | */ | |
439dc034 | 996 | ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, fletcher_4_impl, |
7ada752a | 997 | fletcher_4_param_set, fletcher_4_param_get, ZMOD_RW, |
439dc034 | 998 | "Select fletcher 4 implementation."); |
1eeb4562 | 999 | |
a6255b7f DQ |
1000 | EXPORT_SYMBOL(fletcher_init); |
1001 | EXPORT_SYMBOL(fletcher_2_incremental_native); | |
1002 | EXPORT_SYMBOL(fletcher_2_incremental_byteswap); | |
1eeb4562 JX |
1003 | EXPORT_SYMBOL(fletcher_4_init); |
1004 | EXPORT_SYMBOL(fletcher_4_fini); | |
c28b2279 BB |
1005 | EXPORT_SYMBOL(fletcher_2_native); |
1006 | EXPORT_SYMBOL(fletcher_2_byteswap); | |
1007 | EXPORT_SYMBOL(fletcher_4_native); | |
fc897b24 | 1008 | EXPORT_SYMBOL(fletcher_4_native_varsize); |
c28b2279 BB |
1009 | EXPORT_SYMBOL(fletcher_4_byteswap); |
1010 | EXPORT_SYMBOL(fletcher_4_incremental_native); | |
1011 | EXPORT_SYMBOL(fletcher_4_incremental_byteswap); | |
2fe36b0b | 1012 | EXPORT_SYMBOL(fletcher_4_abd_ops); |
c28b2279 | 1013 | #endif |