]> git.proxmox.com Git - mirror_zfs.git/blame - module/zcommon/zfs_fletcher.c
OpenZFS 4185 - add new cryptographic checksums to ZFS: SHA-512, Skein, Edon-R
[mirror_zfs.git] / module / zcommon / zfs_fletcher.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
9babb374 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
34dc7c2f 23 * Use is subject to license terms.
fc897b24 24 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
34dc7c2f 25 */
3c67d83a
TH
26/*
27 * Copyright 2013 Saso Kiselkov. All rights reserved.
28 */
34dc7c2f 29
9babb374
BB
30/*
31 * Fletcher Checksums
32 * ------------------
33 *
34 * ZFS's 2nd and 4th order Fletcher checksums are defined by the following
35 * recurrence relations:
36 *
37 * a = a + f
38 * i i-1 i-1
39 *
40 * b = b + a
41 * i i-1 i
42 *
43 * c = c + b (fletcher-4 only)
44 * i i-1 i
45 *
46 * d = d + c (fletcher-4 only)
47 * i i-1 i
48 *
49 * Where
50 * a_0 = b_0 = c_0 = d_0 = 0
51 * and
52 * f_0 .. f_(n-1) are the input data.
53 *
54 * Using standard techniques, these translate into the following series:
55 *
56 * __n_ __n_
57 * \ | \ |
58 * a = > f b = > i * f
59 * n /___| n - i n /___| n - i
60 * i = 1 i = 1
61 *
62 *
63 * __n_ __n_
64 * \ | i*(i+1) \ | i*(i+1)*(i+2)
65 * c = > ------- f d = > ------------- f
66 * n /___| 2 n - i n /___| 6 n - i
67 * i = 1 i = 1
68 *
69 * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators.
70 * Since the additions are done mod (2^64), errors in the high bits may not
71 * be noticed. For this reason, fletcher-2 is deprecated.
72 *
73 * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators.
74 * A conservative estimate of how big the buffer can get before we overflow
75 * can be estimated using f_i = 0xffffffff for all i:
76 *
77 * % bc
78 * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4
79 * 2264
80 * quit
81 * %
82 *
83 * So blocks of up to 2k will not overflow. Our largest block size is
84 * 128k, which has 32k 4-byte words, so we can compute the largest possible
85 * accumulators, then divide by 2^64 to figure the max amount of overflow:
86 *
87 * % bc
88 * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c }
89 * a/2^64;b/2^64;c/2^64;d/2^64
90 * 0
91 * 0
92 * 1365
93 * 11186858
94 * quit
95 * %
96 *
97 * So a and b cannot overflow. To make sure each bit of input has some
98 * effect on the contents of c and d, we can look at what the factors of
99 * the coefficients in the equations for c_n and d_n are. The number of 2s
100 * in the factors determines the lowest set bit in the multiplier. Running
101 * through the cases for n*(n+1)/2 reveals that the highest power of 2 is
102 * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow
103 * the 64-bit accumulators, every bit of every f_i effects every accumulator,
104 * even for 128k blocks.
105 *
106 * If we wanted to make a stronger version of fletcher4 (fletcher4c?),
107 * we could do our calculations mod (2^32 - 1) by adding in the carries
108 * periodically, and store the number of carries in the top 32-bits.
109 *
110 * --------------------
111 * Checksum Performance
112 * --------------------
113 *
114 * There are two interesting components to checksum performance: cached and
115 * uncached performance. With cached data, fletcher-2 is about four times
116 * faster than fletcher-4. With uncached data, the performance difference is
117 * negligible, since the cost of a cache fill dominates the processing time.
118 * Even though fletcher-4 is slower than fletcher-2, it is still a pretty
119 * efficient pass over the data.
120 *
121 * In normal operation, the data which is being checksummed is in a buffer
122 * which has been filled either by:
123 *
124 * 1. a compression step, which will be mostly cached, or
125 * 2. a bcopy() or copyin(), which will be uncached (because the
126 * copy is cache-bypassing).
127 *
128 * For both cached and uncached data, both fletcher checksums are much faster
129 * than sha-256, and slower than 'off', which doesn't touch the data at all.
130 */
34dc7c2f
BB
131
132#include <sys/types.h>
133#include <sys/sysmacros.h>
134#include <sys/byteorder.h>
135#include <sys/spa.h>
fc897b24 136#include <sys/zio_checksum.h>
1eeb4562
JX
137#include <sys/zfs_context.h>
138#include <zfs_fletcher.h>
139
fc897b24 140
1eeb4562 141static void fletcher_4_scalar_init(zio_cksum_t *zcp);
fc897b24 142static void fletcher_4_scalar_native(const void *buf, uint64_t size,
1eeb4562
JX
143 zio_cksum_t *zcp);
144static void fletcher_4_scalar_byteswap(const void *buf, uint64_t size,
145 zio_cksum_t *zcp);
146static boolean_t fletcher_4_scalar_valid(void);
147
148static const fletcher_4_ops_t fletcher_4_scalar_ops = {
fc897b24
GN
149 .init_native = fletcher_4_scalar_init,
150 .compute_native = fletcher_4_scalar_native,
151 .init_byteswap = fletcher_4_scalar_init,
1eeb4562
JX
152 .compute_byteswap = fletcher_4_scalar_byteswap,
153 .valid = fletcher_4_scalar_valid,
154 .name = "scalar"
155};
156
fc897b24
GN
157static fletcher_4_ops_t fletcher_4_fastest_impl = {
158 .name = "fastest",
159 .valid = fletcher_4_scalar_valid
160};
161
162static const fletcher_4_ops_t *fletcher_4_impls[] = {
1eeb4562 163 &fletcher_4_scalar_ops,
35a76a03
TS
164#if defined(HAVE_SSE2)
165 &fletcher_4_sse2_ops,
166#endif
167#if defined(HAVE_SSE2) && defined(HAVE_SSSE3)
168 &fletcher_4_ssse3_ops,
169#endif
1eeb4562
JX
170#if defined(HAVE_AVX) && defined(HAVE_AVX2)
171 &fletcher_4_avx2_ops,
172#endif
70b258fc
GN
173#if defined(__x86_64) && defined(HAVE_AVX512F)
174 &fletcher_4_avx512f_ops,
175#endif
1eeb4562
JX
176};
177
fc897b24
GN
178/* Hold all supported implementations */
179static uint32_t fletcher_4_supp_impls_cnt = 0;
180static fletcher_4_ops_t *fletcher_4_supp_impls[ARRAY_SIZE(fletcher_4_impls)];
181
182/* Select fletcher4 implementation */
183#define IMPL_FASTEST (UINT32_MAX)
184#define IMPL_CYCLE (UINT32_MAX - 1)
185#define IMPL_SCALAR (0)
186
187static uint32_t fletcher_4_impl_chosen = IMPL_FASTEST;
188
189#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
1eeb4562
JX
190
191static struct fletcher_4_impl_selector {
fc897b24
GN
192 const char *fis_name;
193 uint32_t fis_sel;
1eeb4562 194} fletcher_4_impl_selectors[] = {
1eeb4562 195#if !defined(_KERNEL)
fc897b24 196 { "cycle", IMPL_CYCLE },
1eeb4562 197#endif
fc897b24
GN
198 { "fastest", IMPL_FASTEST },
199 { "scalar", IMPL_SCALAR }
1eeb4562
JX
200};
201
1eeb4562
JX
202static kstat_t *fletcher_4_kstat;
203
fc897b24
GN
204static struct fletcher_4_kstat {
205 uint64_t native;
206 uint64_t byteswap;
207} fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1];
208
209/* Indicate that benchmark has been completed */
210static boolean_t fletcher_4_initialized = B_FALSE;
34dc7c2f 211
3c67d83a 212/*ARGSUSED*/
34dc7c2f 213void
3c67d83a
TH
214fletcher_2_native(const void *buf, uint64_t size,
215 const void *ctx_template, zio_cksum_t *zcp)
34dc7c2f
BB
216{
217 const uint64_t *ip = buf;
218 const uint64_t *ipend = ip + (size / sizeof (uint64_t));
219 uint64_t a0, b0, a1, b1;
220
221 for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
222 a0 += ip[0];
223 a1 += ip[1];
224 b0 += a0;
225 b1 += a1;
226 }
227
228 ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
229}
230
3c67d83a 231/*ARGSUSED*/
34dc7c2f 232void
3c67d83a
TH
233fletcher_2_byteswap(const void *buf, uint64_t size,
234 const void *ctx_template, zio_cksum_t *zcp)
34dc7c2f
BB
235{
236 const uint64_t *ip = buf;
237 const uint64_t *ipend = ip + (size / sizeof (uint64_t));
238 uint64_t a0, b0, a1, b1;
239
240 for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
241 a0 += BSWAP_64(ip[0]);
242 a1 += BSWAP_64(ip[1]);
243 b0 += a0;
244 b1 += a1;
245 }
246
247 ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
248}
249
fc897b24
GN
250static void
251fletcher_4_scalar_init(zio_cksum_t *zcp)
34dc7c2f 252{
1eeb4562 253 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
34dc7c2f
BB
254}
255
1eeb4562 256static void
fc897b24 257fletcher_4_scalar_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
34dc7c2f
BB
258{
259 const uint32_t *ip = buf;
260 const uint32_t *ipend = ip + (size / sizeof (uint32_t));
261 uint64_t a, b, c, d;
262
1eeb4562
JX
263 a = zcp->zc_word[0];
264 b = zcp->zc_word[1];
265 c = zcp->zc_word[2];
266 d = zcp->zc_word[3];
267
268 for (; ip < ipend; ip++) {
269 a += ip[0];
34dc7c2f
BB
270 b += a;
271 c += b;
272 d += c;
273 }
274
275 ZIO_SET_CHECKSUM(zcp, a, b, c, d);
276}
277
1eeb4562
JX
278static void
279fletcher_4_scalar_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
34dc7c2f
BB
280{
281 const uint32_t *ip = buf;
282 const uint32_t *ipend = ip + (size / sizeof (uint32_t));
283 uint64_t a, b, c, d;
284
285 a = zcp->zc_word[0];
286 b = zcp->zc_word[1];
287 c = zcp->zc_word[2];
288 d = zcp->zc_word[3];
289
290 for (; ip < ipend; ip++) {
1eeb4562 291 a += BSWAP_32(ip[0]);
34dc7c2f
BB
292 b += a;
293 c += b;
294 d += c;
295 }
296
297 ZIO_SET_CHECKSUM(zcp, a, b, c, d);
298}
299
1eeb4562
JX
300static boolean_t
301fletcher_4_scalar_valid(void)
302{
303 return (B_TRUE);
304}
305
306int
307fletcher_4_impl_set(const char *val)
308{
fc897b24
GN
309 int err = -EINVAL;
310 uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
311 size_t i, val_len;
1eeb4562
JX
312
313 val_len = strlen(val);
314 while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
315 val_len--;
316
fc897b24 317 /* check mandatory implementations */
1eeb4562
JX
318 for (i = 0; i < ARRAY_SIZE(fletcher_4_impl_selectors); i++) {
319 const char *name = fletcher_4_impl_selectors[i].fis_name;
320
321 if (val_len == strlen(name) &&
322 strncmp(val, name, val_len) == 0) {
fc897b24
GN
323 impl = fletcher_4_impl_selectors[i].fis_sel;
324 err = 0;
1eeb4562
JX
325 break;
326 }
327 }
1eeb4562 328
fc897b24
GN
329 if (err != 0 && fletcher_4_initialized) {
330 /* check all supported implementations */
331 for (i = 0; i < fletcher_4_supp_impls_cnt; i++) {
332 const char *name = fletcher_4_supp_impls[i]->name;
1eeb4562 333
fc897b24
GN
334 if (val_len == strlen(name) &&
335 strncmp(val, name, val_len) == 0) {
336 impl = i;
337 err = 0;
338 break;
339 }
340 }
341 }
1eeb4562 342
fc897b24
GN
343 if (err == 0) {
344 atomic_swap_32(&fletcher_4_impl_chosen, impl);
345 membar_producer();
346 }
347
348 return (err);
1eeb4562
JX
349}
350
351static inline const fletcher_4_ops_t *
352fletcher_4_impl_get(void)
353{
fc897b24
GN
354 fletcher_4_ops_t *ops = NULL;
355 const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
356
357 switch (impl) {
358 case IMPL_FASTEST:
359 ASSERT(fletcher_4_initialized);
360 ops = &fletcher_4_fastest_impl;
361 break;
1eeb4562 362#if !defined(_KERNEL)
fc897b24
GN
363 case IMPL_CYCLE: {
364 ASSERT(fletcher_4_initialized);
365 ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
366
367 static uint32_t cycle_count = 0;
368 uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt;
369 ops = fletcher_4_supp_impls[idx];
1eeb4562 370 }
fc897b24 371 break;
1eeb4562 372#endif
fc897b24
GN
373 default:
374 ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
375 ASSERT3U(impl, <, fletcher_4_supp_impls_cnt);
376
377 ops = fletcher_4_supp_impls[impl];
378 break;
379 }
380
381 ASSERT3P(ops, !=, NULL);
382
383 return (ops);
384}
385
386void
387fletcher_4_incremental_native(const void *buf, uint64_t size,
388 zio_cksum_t *zcp)
389{
390 ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
391
392 fletcher_4_scalar_native(buf, size, zcp);
393}
394
395void
396fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
397 zio_cksum_t *zcp)
398{
399 ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
400
401 fletcher_4_scalar_byteswap(buf, size, zcp);
402}
403
404static inline void
405fletcher_4_native_impl(const fletcher_4_ops_t *ops, const void *buf,
406 uint64_t size, zio_cksum_t *zcp)
407{
408 ops->init_native(zcp);
409 ops->compute_native(buf, size, zcp);
410 if (ops->fini_native != NULL)
411 ops->fini_native(zcp);
1eeb4562
JX
412}
413
3c67d83a 414/*ARGSUSED*/
1eeb4562 415void
3c67d83a
TH
416fletcher_4_native(const void *buf, uint64_t size,
417 const void *ctx_template, zio_cksum_t *zcp)
1eeb4562 418{
0dab2e84 419 const fletcher_4_ops_t *ops;
fc897b24 420 uint64_t p2size = P2ALIGN(size, 64);
0dab2e84 421
fc897b24
GN
422 ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
423
424 if (size == 0) {
425 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
426 } else if (p2size == 0) {
0dab2e84 427 ops = &fletcher_4_scalar_ops;
fc897b24
GN
428 fletcher_4_native_impl(ops, buf, size, zcp);
429 } else {
430 ops = fletcher_4_impl_get();
431 fletcher_4_native_impl(ops, buf, p2size, zcp);
1eeb4562 432
fc897b24
GN
433 if (p2size < size)
434 fletcher_4_incremental_native((char *)buf + p2size,
435 size - p2size, zcp);
436 }
437}
438
439void
440fletcher_4_native_varsize(const void *buf, uint64_t size, zio_cksum_t *zcp)
441{
442 fletcher_4_native_impl(&fletcher_4_scalar_ops, buf, size, zcp);
443}
444
445static inline void
446fletcher_4_byteswap_impl(const fletcher_4_ops_t *ops, const void *buf,
447 uint64_t size, zio_cksum_t *zcp)
448{
449 ops->init_byteswap(zcp);
450 ops->compute_byteswap(buf, size, zcp);
451 if (ops->fini_byteswap != NULL)
452 ops->fini_byteswap(zcp);
1eeb4562
JX
453}
454
3c67d83a 455/*ARGSUSED*/
1eeb4562 456void
3c67d83a
TH
457fletcher_4_byteswap(const void *buf, uint64_t size,
458 const void *ctx_template, zio_cksum_t *zcp)
1eeb4562 459{
0dab2e84 460 const fletcher_4_ops_t *ops;
fc897b24 461 uint64_t p2size = P2ALIGN(size, 64);
0dab2e84 462
fc897b24
GN
463 ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
464
465 if (size == 0) {
466 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
467 } else if (p2size == 0) {
0dab2e84 468 ops = &fletcher_4_scalar_ops;
fc897b24
GN
469 fletcher_4_byteswap_impl(ops, buf, size, zcp);
470 } else {
471 ops = fletcher_4_impl_get();
472 fletcher_4_byteswap_impl(ops, buf, p2size, zcp);
1eeb4562 473
fc897b24
GN
474 if (p2size < size)
475 fletcher_4_incremental_byteswap((char *)buf + p2size,
476 size - p2size, zcp);
477 }
1eeb4562
JX
478}
479
fc897b24
GN
480static int
481fletcher_4_kstat_headers(char *buf, size_t size)
1eeb4562 482{
fc897b24
GN
483 ssize_t off = 0;
484
485 off += snprintf(buf + off, size, "%-17s", "implementation");
486 off += snprintf(buf + off, size - off, "%-15s", "native");
487 (void) snprintf(buf + off, size - off, "%-15s\n", "byteswap");
488
489 return (0);
1eeb4562
JX
490}
491
fc897b24
GN
492static int
493fletcher_4_kstat_data(char *buf, size_t size, void *data)
34dc7c2f 494{
fc897b24
GN
495 struct fletcher_4_kstat *fastest_stat =
496 &fletcher_4_stat_data[fletcher_4_supp_impls_cnt];
497 struct fletcher_4_kstat *curr_stat = (struct fletcher_4_kstat *) data;
498 ssize_t off = 0;
499
500 if (curr_stat == fastest_stat) {
501 off += snprintf(buf + off, size - off, "%-17s", "fastest");
502 off += snprintf(buf + off, size - off, "%-15s",
503 fletcher_4_supp_impls[fastest_stat->native]->name);
504 off += snprintf(buf + off, size - off, "%-15s\n",
505 fletcher_4_supp_impls[fastest_stat->byteswap]->name);
506 } else {
507 ptrdiff_t id = curr_stat - fletcher_4_stat_data;
508
509 off += snprintf(buf + off, size - off, "%-17s",
510 fletcher_4_supp_impls[id]->name);
511 off += snprintf(buf + off, size - off, "%-15llu",
512 (u_longlong_t) curr_stat->native);
513 off += snprintf(buf + off, size - off, "%-15llu\n",
514 (u_longlong_t) curr_stat->byteswap);
515 }
516
517 return (0);
1eeb4562 518}
34dc7c2f 519
fc897b24
GN
520static void *
521fletcher_4_kstat_addr(kstat_t *ksp, loff_t n)
1eeb4562 522{
fc897b24
GN
523 if (n <= fletcher_4_supp_impls_cnt)
524 ksp->ks_private = (void *) (fletcher_4_stat_data + n);
525 else
526 ksp->ks_private = NULL;
527
528 return (ksp->ks_private);
529}
530
531#define FLETCHER_4_FASTEST_FN_COPY(type, src) \
532{ \
533 fletcher_4_fastest_impl.init_ ## type = src->init_ ## type; \
534 fletcher_4_fastest_impl.fini_ ## type = src->fini_ ## type; \
535 fletcher_4_fastest_impl.compute_ ## type = src->compute_ ## type; \
536}
537
538#define FLETCHER_4_BENCH_NS (MSEC2NSEC(50)) /* 50ms */
34dc7c2f 539
fc897b24
GN
540static void
541fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
542{
543
544 struct fletcher_4_kstat *fastest_stat =
545 &fletcher_4_stat_data[fletcher_4_supp_impls_cnt];
546 hrtime_t start;
547 uint64_t run_bw, run_time_ns, best_run = 0;
548 zio_cksum_t zc;
549 uint32_t i, l, sel_save = IMPL_READ(fletcher_4_impl_chosen);
550
551 zio_checksum_func_t *fletcher_4_test = native ? fletcher_4_native :
552 fletcher_4_byteswap;
1eeb4562 553
fc897b24
GN
554 for (i = 0; i < fletcher_4_supp_impls_cnt; i++) {
555 struct fletcher_4_kstat *stat = &fletcher_4_stat_data[i];
556 uint64_t run_count = 0;
1eeb4562 557
fc897b24
GN
558 /* temporary set an implementation */
559 fletcher_4_impl_chosen = i;
1eeb4562
JX
560
561 kpreempt_disable();
562 start = gethrtime();
1eeb4562 563 do {
fc897b24 564 for (l = 0; l < 32; l++, run_count++)
3c67d83a 565 fletcher_4_test(data, data_size, NULL, &zc);
fc897b24
GN
566
567 run_time_ns = gethrtime() - start;
568 } while (run_time_ns < FLETCHER_4_BENCH_NS);
1eeb4562
JX
569 kpreempt_enable();
570
fc897b24
GN
571 run_bw = data_size * run_count * NANOSEC;
572 run_bw /= run_time_ns; /* B/s */
573
574 if (native)
575 stat->native = run_bw;
576 else
577 stat->byteswap = run_bw;
578
579 if (run_bw > best_run) {
580 best_run = run_bw;
581
582 if (native) {
583 fastest_stat->native = i;
584 FLETCHER_4_FASTEST_FN_COPY(native,
585 fletcher_4_supp_impls[i]);
586 } else {
587 fastest_stat->byteswap = i;
588 FLETCHER_4_FASTEST_FN_COPY(byteswap,
589 fletcher_4_supp_impls[i]);
590 }
1eeb4562 591 }
fc897b24
GN
592 }
593
594 /* restore original selection */
595 atomic_swap_32(&fletcher_4_impl_chosen, sel_save);
596}
1eeb4562 597
fc897b24
GN
598void
599fletcher_4_init(void)
600{
601 static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */
602 fletcher_4_ops_t *curr_impl;
603 char *databuf;
604 int i, c;
605
606 /* move supported impl into fletcher_4_supp_impls */
607 for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) {
608 curr_impl = (fletcher_4_ops_t *) fletcher_4_impls[i];
609
610 if (curr_impl->valid && curr_impl->valid())
611 fletcher_4_supp_impls[c++] = curr_impl;
34dc7c2f 612 }
fc897b24
GN
613 membar_producer(); /* complete fletcher_4_supp_impls[] init */
614 fletcher_4_supp_impls_cnt = c; /* number of supported impl */
34dc7c2f 615
fc897b24
GN
616#if !defined(_KERNEL)
617 /* Skip benchmarking and use last implementation as fastest */
618 memcpy(&fletcher_4_fastest_impl,
619 fletcher_4_supp_impls[fletcher_4_supp_impls_cnt-1],
620 sizeof (fletcher_4_fastest_impl));
621 fletcher_4_fastest_impl.name = "fastest";
622 membar_producer();
1eeb4562 623
fc897b24 624 fletcher_4_initialized = B_TRUE;
1eeb4562 625
fc897b24
GN
626 /* Use 'cycle' math selection method for userspace */
627 VERIFY0(fletcher_4_impl_set("cycle"));
628 return;
629#endif
630 /* Benchmark all supported implementations */
631 databuf = vmem_alloc(data_size, KM_SLEEP);
632 for (i = 0; i < data_size / sizeof (uint64_t); i++)
633 ((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */
634
635 fletcher_4_benchmark_impl(B_FALSE, databuf, data_size);
636 fletcher_4_benchmark_impl(B_TRUE, databuf, data_size);
637
638 vmem_free(databuf, data_size);
639
640 /* install kstats for all implementations */
641 fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc",
642 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
1eeb4562 643 if (fletcher_4_kstat != NULL) {
fc897b24
GN
644 fletcher_4_kstat->ks_data = NULL;
645 fletcher_4_kstat->ks_ndata = UINT32_MAX;
646 kstat_set_raw_ops(fletcher_4_kstat,
647 fletcher_4_kstat_headers,
648 fletcher_4_kstat_data,
649 fletcher_4_kstat_addr);
1eeb4562
JX
650 kstat_install(fletcher_4_kstat);
651 }
fc897b24
GN
652
653 /* Finish initialization */
654 fletcher_4_initialized = B_TRUE;
1eeb4562
JX
655}
656
657void
658fletcher_4_fini(void)
659{
1eeb4562
JX
660 if (fletcher_4_kstat != NULL) {
661 kstat_delete(fletcher_4_kstat);
662 fletcher_4_kstat = NULL;
663 }
34dc7c2f 664}
c28b2279
BB
665
666#if defined(_KERNEL) && defined(HAVE_SPL)
9cc1844a 667#include <linux/mod_compat.h>
1eeb4562
JX
668
669static int
9cc1844a 670fletcher_4_param_get(char *buffer, zfs_kernel_param_t *unused)
1eeb4562 671{
fc897b24
GN
672 const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
673 char *fmt;
1eeb4562
JX
674 int i, cnt = 0;
675
fc897b24
GN
676 /* list fastest */
677 fmt = (impl == IMPL_FASTEST) ? "[%s] " : "%s ";
678 cnt += sprintf(buffer + cnt, fmt, "fastest");
1eeb4562 679
fc897b24
GN
680 /* list all supported implementations */
681 for (i = 0; i < fletcher_4_supp_impls_cnt; i++) {
682 fmt = (i == impl) ? "[%s] " : "%s ";
683 cnt += sprintf(buffer + cnt, fmt,
684 fletcher_4_supp_impls[i]->name);
1eeb4562
JX
685 }
686
687 return (cnt);
688}
689
690static int
9cc1844a 691fletcher_4_param_set(const char *val, zfs_kernel_param_t *unused)
1eeb4562
JX
692{
693 return (fletcher_4_impl_set(val));
694}
695
696/*
697 * Choose a fletcher 4 implementation in ZFS.
fc897b24 698 * Users can choose "cycle" to exercise all implementations, but this is
1eeb4562
JX
699 * for testing purpose therefore it can only be set in user space.
700 */
701module_param_call(zfs_fletcher_4_impl,
702 fletcher_4_param_set, fletcher_4_param_get, NULL, 0644);
fc897b24 703MODULE_PARM_DESC(zfs_fletcher_4_impl, "Select fletcher 4 implementation.");
1eeb4562
JX
704
705EXPORT_SYMBOL(fletcher_4_init);
706EXPORT_SYMBOL(fletcher_4_fini);
c28b2279
BB
707EXPORT_SYMBOL(fletcher_2_native);
708EXPORT_SYMBOL(fletcher_2_byteswap);
709EXPORT_SYMBOL(fletcher_4_native);
fc897b24 710EXPORT_SYMBOL(fletcher_4_native_varsize);
c28b2279
BB
711EXPORT_SYMBOL(fletcher_4_byteswap);
712EXPORT_SYMBOL(fletcher_4_incremental_native);
713EXPORT_SYMBOL(fletcher_4_incremental_byteswap);
714#endif