]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_chksum.c
ddt: modernise assertions
[mirror_zfs.git] / module / zfs / zfs_chksum.c
CommitLineData
985c33b1
TR
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1d3ba0bf 9 * or https://opensource.org/licenses/CDDL-1.0.
985c33b1
TR
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
97fd1ea4 23 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
985c33b1
TR
24 */
25
985c33b1
TR
26#include <sys/zio_checksum.h>
27#include <sys/zfs_context.h>
28#include <sys/zfs_chksum.h>
4c5fec01 29#include <sys/zfs_impl.h>
985c33b1
TR
30
31#include <sys/blake3.h>
4c5fec01 32#include <sys/sha2.h>
985c33b1 33
68aa3379
TR
34/* limit benchmarking to max 256KiB, when EdonR is slower then this: */
35#define LIMIT_PERF_MBS 300
985c33b1
TR
36
37typedef struct {
38 const char *name;
39 const char *impl;
40 uint64_t bs1k;
41 uint64_t bs4k;
42 uint64_t bs16k;
43 uint64_t bs64k;
44 uint64_t bs256k;
45 uint64_t bs1m;
46 uint64_t bs4m;
97fd1ea4 47 uint64_t bs16m;
985c33b1
TR
48 zio_cksum_salt_t salt;
49 zio_checksum_t *(func);
50 zio_checksum_tmpl_init_t *(init);
51 zio_checksum_tmpl_free_t *(free);
52} chksum_stat_t;
53
985c33b1 54static chksum_stat_t *chksum_stat_data = 0;
68aa3379
TR
55static int chksum_stat_cnt = 0;
56static kstat_t *chksum_kstat = NULL;
985c33b1
TR
57
58/*
4c5fec01 59 * Sample output on i3-1005G1 System:
985c33b1 60 *
4c5fec01
TR
61 * implementation 1k 4k 16k 64k 256k 1m 4m 16m
62 * edonr-generic 1278 1625 1769 1776 1783 1778 1771 1767
63 * skein-generic 548 594 613 623 621 623 621 486
64 * sha256-generic 255 270 281 278 279 281 283 283
65 * sha256-x64 288 310 316 317 318 317 317 316
66 * sha256-ssse3 304 342 351 355 356 357 356 356
67 * sha256-avx 311 348 359 362 362 363 363 362
68 * sha256-avx2 330 378 389 395 395 395 395 395
69 * sha256-shani 908 1127 1212 1230 1233 1234 1223 1230
70 * sha512-generic 359 409 431 427 429 430 428 423
71 * sha512-x64 420 473 490 496 497 497 496 495
72 * sha512-avx 406 522 546 560 560 560 556 560
73 * sha512-avx2 464 568 601 606 609 610 607 608
74 * blake3-generic 330 327 324 323 324 320 323 322
75 * blake3-sse2 424 1366 1449 1468 1458 1453 1395 1408
76 * blake3-sse41 453 1554 1658 1703 1689 1669 1622 1630
77 * blake3-avx2 452 2013 3225 3351 3356 3261 3076 3101
78 * blake3-avx512 498 2869 5269 5926 5872 5643 5014 5005
985c33b1
TR
79 */
80static int
68aa3379 81chksum_kstat_headers(char *buf, size_t size)
985c33b1
TR
82{
83 ssize_t off = 0;
84
97143b9d
RY
85 off += kmem_scnprintf(buf + off, size, "%-23s", "implementation");
86 off += kmem_scnprintf(buf + off, size - off, "%8s", "1k");
87 off += kmem_scnprintf(buf + off, size - off, "%8s", "4k");
88 off += kmem_scnprintf(buf + off, size - off, "%8s", "16k");
89 off += kmem_scnprintf(buf + off, size - off, "%8s", "64k");
90 off += kmem_scnprintf(buf + off, size - off, "%8s", "256k");
91 off += kmem_scnprintf(buf + off, size - off, "%8s", "1m");
92 off += kmem_scnprintf(buf + off, size - off, "%8s", "4m");
93 (void) kmem_scnprintf(buf + off, size - off, "%8s\n", "16m");
985c33b1
TR
94
95 return (0);
96}
97
98static int
68aa3379 99chksum_kstat_data(char *buf, size_t size, void *data)
985c33b1
TR
100{
101 chksum_stat_t *cs;
102 ssize_t off = 0;
103 char b[24];
104
105 cs = (chksum_stat_t *)data;
97143b9d
RY
106 kmem_scnprintf(b, 23, "%s-%s", cs->name, cs->impl);
107 off += kmem_scnprintf(buf + off, size - off, "%-23s", b);
108 off += kmem_scnprintf(buf + off, size - off, "%8llu",
985c33b1 109 (u_longlong_t)cs->bs1k);
97143b9d 110 off += kmem_scnprintf(buf + off, size - off, "%8llu",
985c33b1 111 (u_longlong_t)cs->bs4k);
97143b9d 112 off += kmem_scnprintf(buf + off, size - off, "%8llu",
985c33b1 113 (u_longlong_t)cs->bs16k);
97143b9d 114 off += kmem_scnprintf(buf + off, size - off, "%8llu",
985c33b1 115 (u_longlong_t)cs->bs64k);
97143b9d 116 off += kmem_scnprintf(buf + off, size - off, "%8llu",
985c33b1 117 (u_longlong_t)cs->bs256k);
97143b9d 118 off += kmem_scnprintf(buf + off, size - off, "%8llu",
985c33b1 119 (u_longlong_t)cs->bs1m);
97143b9d 120 off += kmem_scnprintf(buf + off, size - off, "%8llu",
985c33b1 121 (u_longlong_t)cs->bs4m);
97143b9d 122 (void) kmem_scnprintf(buf + off, size - off, "%8llu\n",
97fd1ea4 123 (u_longlong_t)cs->bs16m);
985c33b1
TR
124
125 return (0);
126}
127
128static void *
68aa3379 129chksum_kstat_addr(kstat_t *ksp, loff_t n)
985c33b1
TR
130{
131 if (n < chksum_stat_cnt)
132 ksp->ks_private = (void *)(chksum_stat_data + n);
133 else
134 ksp->ks_private = NULL;
135
136 return (ksp->ks_private);
137}
138
139static void
140chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
141 uint64_t *result)
142{
143 hrtime_t start;
144 uint64_t run_bw, run_time_ns, run_count = 0, size = 0;
145 uint32_t l, loops = 0;
146 zio_cksum_t zcp;
147
148 switch (round) {
149 case 1: /* 1k */
150 size = 1<<10; loops = 128; break;
151 case 2: /* 2k */
152 size = 1<<12; loops = 64; break;
153 case 3: /* 4k */
154 size = 1<<14; loops = 32; break;
155 case 4: /* 16k */
156 size = 1<<16; loops = 16; break;
157 case 5: /* 256k */
158 size = 1<<18; loops = 8; break;
159 case 6: /* 1m */
160 size = 1<<20; loops = 4; break;
161 case 7: /* 4m */
162 size = 1<<22; loops = 1; break;
97fd1ea4
TR
163 case 8: /* 16m */
164 size = 1<<24; loops = 1; break;
985c33b1
TR
165 }
166
167 kpreempt_disable();
168 start = gethrtime();
169 do {
170 for (l = 0; l < loops; l++, run_count++)
171 cs->func(abd, size, ctx, &zcp);
172
173 run_time_ns = gethrtime() - start;
174 } while (run_time_ns < MSEC2NSEC(1));
175 kpreempt_enable();
176
177 run_bw = size * run_count * NANOSEC;
178 run_bw /= run_time_ns; /* B/s */
179 *result = run_bw/1024/1024; /* MiB/s */
180}
181
68aa3379
TR
182#define LIMIT_INIT 0
183#define LIMIT_NEEDED 1
184#define LIMIT_NOLIMIT 2
185
985c33b1
TR
186static void
187chksum_benchit(chksum_stat_t *cs)
188{
189 abd_t *abd;
190 void *ctx = 0;
191 void *salt = &cs->salt.zcs_bytes;
68aa3379 192 static int chksum_stat_limit = LIMIT_INIT;
985c33b1 193
985c33b1 194 memset(salt, 0, sizeof (cs->salt.zcs_bytes));
68aa3379 195 if (cs->init)
985c33b1 196 ctx = cs->init(&cs->salt);
985c33b1 197
97fd1ea4
TR
198 /* allocate test memory via abd linear interface */
199 abd = abd_alloc_linear(1<<20, B_FALSE);
985c33b1
TR
200 chksum_run(cs, abd, ctx, 1, &cs->bs1k);
201 chksum_run(cs, abd, ctx, 2, &cs->bs4k);
202 chksum_run(cs, abd, ctx, 3, &cs->bs16k);
203 chksum_run(cs, abd, ctx, 4, &cs->bs64k);
204 chksum_run(cs, abd, ctx, 5, &cs->bs256k);
68aa3379
TR
205
206 /* check if we ran on a slow cpu */
207 if (chksum_stat_limit == LIMIT_INIT) {
208 if (cs->bs1k < LIMIT_PERF_MBS) {
209 chksum_stat_limit = LIMIT_NEEDED;
210 } else {
211 chksum_stat_limit = LIMIT_NOLIMIT;
212 }
213 }
214
215 /* skip benchmarks >= 1MiB when the CPU is to slow */
216 if (chksum_stat_limit == LIMIT_NEEDED)
217 goto abort;
218
985c33b1 219 chksum_run(cs, abd, ctx, 6, &cs->bs1m);
97fd1ea4
TR
220 abd_free(abd);
221
222 /* allocate test memory via abd non linear interface */
223 abd = abd_alloc(1<<24, B_FALSE);
985c33b1 224 chksum_run(cs, abd, ctx, 7, &cs->bs4m);
97fd1ea4 225 chksum_run(cs, abd, ctx, 8, &cs->bs16m);
68aa3379
TR
226
227abort:
97fd1ea4 228 abd_free(abd);
985c33b1
TR
229
230 /* free up temp memory */
68aa3379 231 if (cs->free)
985c33b1 232 cs->free(ctx);
985c33b1
TR
233}
234
235/*
236 * Initialize and benchmark all supported implementations.
237 */
238static void
239chksum_benchmark(void)
240{
985c33b1
TR
241#ifndef _KERNEL
242 /* we need the benchmark only for the kernel module */
243 return;
244#endif
245
246 chksum_stat_t *cs;
4c5fec01
TR
247 uint64_t max;
248 uint32_t id, cbid = 0, id_save;
249 const zfs_impl_t *blake3 = zfs_impl_get_ops("blake3");
250 const zfs_impl_t *sha256 = zfs_impl_get_ops("sha256");
251 const zfs_impl_t *sha512 = zfs_impl_get_ops("sha512");
252
253 /* count implementations */
254 chksum_stat_cnt = 2;
255 chksum_stat_cnt += sha256->getcnt();
256 chksum_stat_cnt += sha512->getcnt();
257 chksum_stat_cnt += blake3->getcnt();
7384ec65 258 chksum_stat_data = kmem_zalloc(
985c33b1
TR
259 sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
260
68aa3379 261 /* edonr - needs to be the first one here (slow CPU check) */
985c33b1 262 cs = &chksum_stat_data[cbid++];
4c5fec01
TR
263
264 /* edonr */
985c33b1
TR
265 cs->init = abd_checksum_edonr_tmpl_init;
266 cs->func = abd_checksum_edonr_native;
267 cs->free = abd_checksum_edonr_tmpl_free;
268 cs->name = "edonr";
269 cs->impl = "generic";
270 chksum_benchit(cs);
271
272 /* skein */
273 cs = &chksum_stat_data[cbid++];
274 cs->init = abd_checksum_skein_tmpl_init;
275 cs->func = abd_checksum_skein_native;
276 cs->free = abd_checksum_skein_tmpl_free;
277 cs->name = "skein";
278 cs->impl = "generic";
279 chksum_benchit(cs);
280
281 /* sha256 */
4c5fec01
TR
282 id_save = sha256->getid();
283 for (max = 0, id = 0; id < sha256->getcnt(); id++) {
284 sha256->setid(id);
285 cs = &chksum_stat_data[cbid++];
286 cs->init = 0;
287 cs->func = abd_checksum_sha256;
288 cs->free = 0;
289 cs->name = sha256->name;
290 cs->impl = sha256->getname();
291 chksum_benchit(cs);
292 if (cs->bs256k > max) {
293 max = cs->bs256k;
294 sha256->set_fastest(id);
295 }
296 }
297 sha256->setid(id_save);
985c33b1
TR
298
299 /* sha512 */
4c5fec01
TR
300 id_save = sha512->getid();
301 for (max = 0, id = 0; id < sha512->getcnt(); id++) {
302 sha512->setid(id);
303 cs = &chksum_stat_data[cbid++];
304 cs->init = 0;
305 cs->func = abd_checksum_sha512_native;
306 cs->free = 0;
307 cs->name = sha512->name;
308 cs->impl = sha512->getname();
309 chksum_benchit(cs);
310 if (cs->bs256k > max) {
311 max = cs->bs256k;
312 sha512->set_fastest(id);
313 }
314 }
315 sha512->setid(id_save);
985c33b1
TR
316
317 /* blake3 */
4c5fec01
TR
318 id_save = blake3->getid();
319 for (max = 0, id = 0; id < blake3->getcnt(); id++) {
320 blake3->setid(id);
985c33b1
TR
321 cs = &chksum_stat_data[cbid++];
322 cs->init = abd_checksum_blake3_tmpl_init;
323 cs->func = abd_checksum_blake3_native;
324 cs->free = abd_checksum_blake3_tmpl_free;
4c5fec01
TR
325 cs->name = blake3->name;
326 cs->impl = blake3->getname();
985c33b1
TR
327 chksum_benchit(cs);
328 if (cs->bs256k > max) {
329 max = cs->bs256k;
4c5fec01 330 blake3->set_fastest(id);
985c33b1
TR
331 }
332 }
4c5fec01 333 blake3->setid(id_save);
985c33b1
TR
334}
335
336void
337chksum_init(void)
338{
deb12130
TR
339#ifdef _KERNEL
340 blake3_per_cpu_ctx_init();
341#endif
985c33b1
TR
342
343 /* Benchmark supported implementations */
344 chksum_benchmark();
345
346 /* Install kstats for all implementations */
347 chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc",
348 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
349
350 if (chksum_kstat != NULL) {
351 chksum_kstat->ks_data = NULL;
352 chksum_kstat->ks_ndata = UINT32_MAX;
353 kstat_set_raw_ops(chksum_kstat,
68aa3379
TR
354 chksum_kstat_headers,
355 chksum_kstat_data,
356 chksum_kstat_addr);
985c33b1
TR
357 kstat_install(chksum_kstat);
358 }
985c33b1
TR
359}
360
361void
362chksum_fini(void)
363{
364 if (chksum_kstat != NULL) {
365 kstat_delete(chksum_kstat);
366 chksum_kstat = NULL;
367 }
368
369 if (chksum_stat_cnt) {
370 kmem_free(chksum_stat_data,
371 sizeof (chksum_stat_t) * chksum_stat_cnt);
372 chksum_stat_cnt = 0;
373 chksum_stat_data = 0;
374 }
deb12130
TR
375
376#ifdef _KERNEL
377 blake3_per_cpu_ctx_fini();
378#endif
985c33b1 379}