]>
Commit | Line | Data |
---|---|---|
985c33b1 TR |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
985c33b1 TR |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
97fd1ea4 | 23 | * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> |
985c33b1 TR |
24 | */ |
25 | ||
985c33b1 TR |
26 | #include <sys/zio_checksum.h> |
27 | #include <sys/zfs_context.h> | |
28 | #include <sys/zfs_chksum.h> | |
4c5fec01 | 29 | #include <sys/zfs_impl.h> |
985c33b1 TR |
30 | |
31 | #include <sys/blake3.h> | |
4c5fec01 | 32 | #include <sys/sha2.h> |
985c33b1 | 33 | |
68aa3379 TR |
34 | /* limit benchmarking to max 256KiB, when EdonR is slower then this: */ |
35 | #define LIMIT_PERF_MBS 300 | |
985c33b1 TR |
36 | |
37 | typedef struct { | |
38 | const char *name; | |
39 | const char *impl; | |
40 | uint64_t bs1k; | |
41 | uint64_t bs4k; | |
42 | uint64_t bs16k; | |
43 | uint64_t bs64k; | |
44 | uint64_t bs256k; | |
45 | uint64_t bs1m; | |
46 | uint64_t bs4m; | |
97fd1ea4 | 47 | uint64_t bs16m; |
985c33b1 TR |
48 | zio_cksum_salt_t salt; |
49 | zio_checksum_t *(func); | |
50 | zio_checksum_tmpl_init_t *(init); | |
51 | zio_checksum_tmpl_free_t *(free); | |
52 | } chksum_stat_t; | |
53 | ||
985c33b1 | 54 | static chksum_stat_t *chksum_stat_data = 0; |
68aa3379 TR |
55 | static int chksum_stat_cnt = 0; |
56 | static kstat_t *chksum_kstat = NULL; | |
985c33b1 TR |
57 | |
58 | /* | |
4c5fec01 | 59 | * Sample output on i3-1005G1 System: |
985c33b1 | 60 | * |
4c5fec01 TR |
61 | * implementation 1k 4k 16k 64k 256k 1m 4m 16m |
62 | * edonr-generic 1278 1625 1769 1776 1783 1778 1771 1767 | |
63 | * skein-generic 548 594 613 623 621 623 621 486 | |
64 | * sha256-generic 255 270 281 278 279 281 283 283 | |
65 | * sha256-x64 288 310 316 317 318 317 317 316 | |
66 | * sha256-ssse3 304 342 351 355 356 357 356 356 | |
67 | * sha256-avx 311 348 359 362 362 363 363 362 | |
68 | * sha256-avx2 330 378 389 395 395 395 395 395 | |
69 | * sha256-shani 908 1127 1212 1230 1233 1234 1223 1230 | |
70 | * sha512-generic 359 409 431 427 429 430 428 423 | |
71 | * sha512-x64 420 473 490 496 497 497 496 495 | |
72 | * sha512-avx 406 522 546 560 560 560 556 560 | |
73 | * sha512-avx2 464 568 601 606 609 610 607 608 | |
74 | * blake3-generic 330 327 324 323 324 320 323 322 | |
75 | * blake3-sse2 424 1366 1449 1468 1458 1453 1395 1408 | |
76 | * blake3-sse41 453 1554 1658 1703 1689 1669 1622 1630 | |
77 | * blake3-avx2 452 2013 3225 3351 3356 3261 3076 3101 | |
78 | * blake3-avx512 498 2869 5269 5926 5872 5643 5014 5005 | |
985c33b1 TR |
79 | */ |
80 | static int | |
68aa3379 | 81 | chksum_kstat_headers(char *buf, size_t size) |
985c33b1 TR |
82 | { |
83 | ssize_t off = 0; | |
84 | ||
97143b9d RY |
85 | off += kmem_scnprintf(buf + off, size, "%-23s", "implementation"); |
86 | off += kmem_scnprintf(buf + off, size - off, "%8s", "1k"); | |
87 | off += kmem_scnprintf(buf + off, size - off, "%8s", "4k"); | |
88 | off += kmem_scnprintf(buf + off, size - off, "%8s", "16k"); | |
89 | off += kmem_scnprintf(buf + off, size - off, "%8s", "64k"); | |
90 | off += kmem_scnprintf(buf + off, size - off, "%8s", "256k"); | |
91 | off += kmem_scnprintf(buf + off, size - off, "%8s", "1m"); | |
92 | off += kmem_scnprintf(buf + off, size - off, "%8s", "4m"); | |
93 | (void) kmem_scnprintf(buf + off, size - off, "%8s\n", "16m"); | |
985c33b1 TR |
94 | |
95 | return (0); | |
96 | } | |
97 | ||
98 | static int | |
68aa3379 | 99 | chksum_kstat_data(char *buf, size_t size, void *data) |
985c33b1 TR |
100 | { |
101 | chksum_stat_t *cs; | |
102 | ssize_t off = 0; | |
103 | char b[24]; | |
104 | ||
105 | cs = (chksum_stat_t *)data; | |
97143b9d RY |
106 | kmem_scnprintf(b, 23, "%s-%s", cs->name, cs->impl); |
107 | off += kmem_scnprintf(buf + off, size - off, "%-23s", b); | |
108 | off += kmem_scnprintf(buf + off, size - off, "%8llu", | |
985c33b1 | 109 | (u_longlong_t)cs->bs1k); |
97143b9d | 110 | off += kmem_scnprintf(buf + off, size - off, "%8llu", |
985c33b1 | 111 | (u_longlong_t)cs->bs4k); |
97143b9d | 112 | off += kmem_scnprintf(buf + off, size - off, "%8llu", |
985c33b1 | 113 | (u_longlong_t)cs->bs16k); |
97143b9d | 114 | off += kmem_scnprintf(buf + off, size - off, "%8llu", |
985c33b1 | 115 | (u_longlong_t)cs->bs64k); |
97143b9d | 116 | off += kmem_scnprintf(buf + off, size - off, "%8llu", |
985c33b1 | 117 | (u_longlong_t)cs->bs256k); |
97143b9d | 118 | off += kmem_scnprintf(buf + off, size - off, "%8llu", |
985c33b1 | 119 | (u_longlong_t)cs->bs1m); |
97143b9d | 120 | off += kmem_scnprintf(buf + off, size - off, "%8llu", |
985c33b1 | 121 | (u_longlong_t)cs->bs4m); |
97143b9d | 122 | (void) kmem_scnprintf(buf + off, size - off, "%8llu\n", |
97fd1ea4 | 123 | (u_longlong_t)cs->bs16m); |
985c33b1 TR |
124 | |
125 | return (0); | |
126 | } | |
127 | ||
128 | static void * | |
68aa3379 | 129 | chksum_kstat_addr(kstat_t *ksp, loff_t n) |
985c33b1 TR |
130 | { |
131 | if (n < chksum_stat_cnt) | |
132 | ksp->ks_private = (void *)(chksum_stat_data + n); | |
133 | else | |
134 | ksp->ks_private = NULL; | |
135 | ||
136 | return (ksp->ks_private); | |
137 | } | |
138 | ||
139 | static void | |
140 | chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round, | |
141 | uint64_t *result) | |
142 | { | |
143 | hrtime_t start; | |
144 | uint64_t run_bw, run_time_ns, run_count = 0, size = 0; | |
145 | uint32_t l, loops = 0; | |
146 | zio_cksum_t zcp; | |
147 | ||
148 | switch (round) { | |
149 | case 1: /* 1k */ | |
150 | size = 1<<10; loops = 128; break; | |
151 | case 2: /* 2k */ | |
152 | size = 1<<12; loops = 64; break; | |
153 | case 3: /* 4k */ | |
154 | size = 1<<14; loops = 32; break; | |
155 | case 4: /* 16k */ | |
156 | size = 1<<16; loops = 16; break; | |
157 | case 5: /* 256k */ | |
158 | size = 1<<18; loops = 8; break; | |
159 | case 6: /* 1m */ | |
160 | size = 1<<20; loops = 4; break; | |
161 | case 7: /* 4m */ | |
162 | size = 1<<22; loops = 1; break; | |
97fd1ea4 TR |
163 | case 8: /* 16m */ |
164 | size = 1<<24; loops = 1; break; | |
985c33b1 TR |
165 | } |
166 | ||
167 | kpreempt_disable(); | |
168 | start = gethrtime(); | |
169 | do { | |
170 | for (l = 0; l < loops; l++, run_count++) | |
171 | cs->func(abd, size, ctx, &zcp); | |
172 | ||
173 | run_time_ns = gethrtime() - start; | |
174 | } while (run_time_ns < MSEC2NSEC(1)); | |
175 | kpreempt_enable(); | |
176 | ||
177 | run_bw = size * run_count * NANOSEC; | |
178 | run_bw /= run_time_ns; /* B/s */ | |
179 | *result = run_bw/1024/1024; /* MiB/s */ | |
180 | } | |
181 | ||
68aa3379 TR |
182 | #define LIMIT_INIT 0 |
183 | #define LIMIT_NEEDED 1 | |
184 | #define LIMIT_NOLIMIT 2 | |
185 | ||
985c33b1 TR |
186 | static void |
187 | chksum_benchit(chksum_stat_t *cs) | |
188 | { | |
189 | abd_t *abd; | |
190 | void *ctx = 0; | |
191 | void *salt = &cs->salt.zcs_bytes; | |
68aa3379 | 192 | static int chksum_stat_limit = LIMIT_INIT; |
985c33b1 | 193 | |
985c33b1 | 194 | memset(salt, 0, sizeof (cs->salt.zcs_bytes)); |
68aa3379 | 195 | if (cs->init) |
985c33b1 | 196 | ctx = cs->init(&cs->salt); |
985c33b1 | 197 | |
97fd1ea4 TR |
198 | /* allocate test memory via abd linear interface */ |
199 | abd = abd_alloc_linear(1<<20, B_FALSE); | |
985c33b1 TR |
200 | chksum_run(cs, abd, ctx, 1, &cs->bs1k); |
201 | chksum_run(cs, abd, ctx, 2, &cs->bs4k); | |
202 | chksum_run(cs, abd, ctx, 3, &cs->bs16k); | |
203 | chksum_run(cs, abd, ctx, 4, &cs->bs64k); | |
204 | chksum_run(cs, abd, ctx, 5, &cs->bs256k); | |
68aa3379 TR |
205 | |
206 | /* check if we ran on a slow cpu */ | |
207 | if (chksum_stat_limit == LIMIT_INIT) { | |
208 | if (cs->bs1k < LIMIT_PERF_MBS) { | |
209 | chksum_stat_limit = LIMIT_NEEDED; | |
210 | } else { | |
211 | chksum_stat_limit = LIMIT_NOLIMIT; | |
212 | } | |
213 | } | |
214 | ||
215 | /* skip benchmarks >= 1MiB when the CPU is to slow */ | |
216 | if (chksum_stat_limit == LIMIT_NEEDED) | |
217 | goto abort; | |
218 | ||
985c33b1 | 219 | chksum_run(cs, abd, ctx, 6, &cs->bs1m); |
97fd1ea4 TR |
220 | abd_free(abd); |
221 | ||
222 | /* allocate test memory via abd non linear interface */ | |
223 | abd = abd_alloc(1<<24, B_FALSE); | |
985c33b1 | 224 | chksum_run(cs, abd, ctx, 7, &cs->bs4m); |
97fd1ea4 | 225 | chksum_run(cs, abd, ctx, 8, &cs->bs16m); |
68aa3379 TR |
226 | |
227 | abort: | |
97fd1ea4 | 228 | abd_free(abd); |
985c33b1 TR |
229 | |
230 | /* free up temp memory */ | |
68aa3379 | 231 | if (cs->free) |
985c33b1 | 232 | cs->free(ctx); |
985c33b1 TR |
233 | } |
234 | ||
235 | /* | |
236 | * Initialize and benchmark all supported implementations. | |
237 | */ | |
238 | static void | |
239 | chksum_benchmark(void) | |
240 | { | |
985c33b1 TR |
241 | #ifndef _KERNEL |
242 | /* we need the benchmark only for the kernel module */ | |
243 | return; | |
244 | #endif | |
245 | ||
246 | chksum_stat_t *cs; | |
4c5fec01 TR |
247 | uint64_t max; |
248 | uint32_t id, cbid = 0, id_save; | |
249 | const zfs_impl_t *blake3 = zfs_impl_get_ops("blake3"); | |
250 | const zfs_impl_t *sha256 = zfs_impl_get_ops("sha256"); | |
251 | const zfs_impl_t *sha512 = zfs_impl_get_ops("sha512"); | |
252 | ||
253 | /* count implementations */ | |
254 | chksum_stat_cnt = 2; | |
255 | chksum_stat_cnt += sha256->getcnt(); | |
256 | chksum_stat_cnt += sha512->getcnt(); | |
257 | chksum_stat_cnt += blake3->getcnt(); | |
7384ec65 | 258 | chksum_stat_data = kmem_zalloc( |
985c33b1 TR |
259 | sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP); |
260 | ||
68aa3379 | 261 | /* edonr - needs to be the first one here (slow CPU check) */ |
985c33b1 | 262 | cs = &chksum_stat_data[cbid++]; |
4c5fec01 TR |
263 | |
264 | /* edonr */ | |
985c33b1 TR |
265 | cs->init = abd_checksum_edonr_tmpl_init; |
266 | cs->func = abd_checksum_edonr_native; | |
267 | cs->free = abd_checksum_edonr_tmpl_free; | |
268 | cs->name = "edonr"; | |
269 | cs->impl = "generic"; | |
270 | chksum_benchit(cs); | |
271 | ||
272 | /* skein */ | |
273 | cs = &chksum_stat_data[cbid++]; | |
274 | cs->init = abd_checksum_skein_tmpl_init; | |
275 | cs->func = abd_checksum_skein_native; | |
276 | cs->free = abd_checksum_skein_tmpl_free; | |
277 | cs->name = "skein"; | |
278 | cs->impl = "generic"; | |
279 | chksum_benchit(cs); | |
280 | ||
281 | /* sha256 */ | |
4c5fec01 TR |
282 | id_save = sha256->getid(); |
283 | for (max = 0, id = 0; id < sha256->getcnt(); id++) { | |
284 | sha256->setid(id); | |
285 | cs = &chksum_stat_data[cbid++]; | |
286 | cs->init = 0; | |
287 | cs->func = abd_checksum_sha256; | |
288 | cs->free = 0; | |
289 | cs->name = sha256->name; | |
290 | cs->impl = sha256->getname(); | |
291 | chksum_benchit(cs); | |
292 | if (cs->bs256k > max) { | |
293 | max = cs->bs256k; | |
294 | sha256->set_fastest(id); | |
295 | } | |
296 | } | |
297 | sha256->setid(id_save); | |
985c33b1 TR |
298 | |
299 | /* sha512 */ | |
4c5fec01 TR |
300 | id_save = sha512->getid(); |
301 | for (max = 0, id = 0; id < sha512->getcnt(); id++) { | |
302 | sha512->setid(id); | |
303 | cs = &chksum_stat_data[cbid++]; | |
304 | cs->init = 0; | |
305 | cs->func = abd_checksum_sha512_native; | |
306 | cs->free = 0; | |
307 | cs->name = sha512->name; | |
308 | cs->impl = sha512->getname(); | |
309 | chksum_benchit(cs); | |
310 | if (cs->bs256k > max) { | |
311 | max = cs->bs256k; | |
312 | sha512->set_fastest(id); | |
313 | } | |
314 | } | |
315 | sha512->setid(id_save); | |
985c33b1 TR |
316 | |
317 | /* blake3 */ | |
4c5fec01 TR |
318 | id_save = blake3->getid(); |
319 | for (max = 0, id = 0; id < blake3->getcnt(); id++) { | |
320 | blake3->setid(id); | |
985c33b1 TR |
321 | cs = &chksum_stat_data[cbid++]; |
322 | cs->init = abd_checksum_blake3_tmpl_init; | |
323 | cs->func = abd_checksum_blake3_native; | |
324 | cs->free = abd_checksum_blake3_tmpl_free; | |
4c5fec01 TR |
325 | cs->name = blake3->name; |
326 | cs->impl = blake3->getname(); | |
985c33b1 TR |
327 | chksum_benchit(cs); |
328 | if (cs->bs256k > max) { | |
329 | max = cs->bs256k; | |
4c5fec01 | 330 | blake3->set_fastest(id); |
985c33b1 TR |
331 | } |
332 | } | |
4c5fec01 | 333 | blake3->setid(id_save); |
985c33b1 TR |
334 | } |
335 | ||
336 | void | |
337 | chksum_init(void) | |
338 | { | |
deb12130 TR |
339 | #ifdef _KERNEL |
340 | blake3_per_cpu_ctx_init(); | |
341 | #endif | |
985c33b1 TR |
342 | |
343 | /* Benchmark supported implementations */ | |
344 | chksum_benchmark(); | |
345 | ||
346 | /* Install kstats for all implementations */ | |
347 | chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc", | |
348 | KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); | |
349 | ||
350 | if (chksum_kstat != NULL) { | |
351 | chksum_kstat->ks_data = NULL; | |
352 | chksum_kstat->ks_ndata = UINT32_MAX; | |
353 | kstat_set_raw_ops(chksum_kstat, | |
68aa3379 TR |
354 | chksum_kstat_headers, |
355 | chksum_kstat_data, | |
356 | chksum_kstat_addr); | |
985c33b1 TR |
357 | kstat_install(chksum_kstat); |
358 | } | |
985c33b1 TR |
359 | } |
360 | ||
361 | void | |
362 | chksum_fini(void) | |
363 | { | |
364 | if (chksum_kstat != NULL) { | |
365 | kstat_delete(chksum_kstat); | |
366 | chksum_kstat = NULL; | |
367 | } | |
368 | ||
369 | if (chksum_stat_cnt) { | |
370 | kmem_free(chksum_stat_data, | |
371 | sizeof (chksum_stat_t) * chksum_stat_cnt); | |
372 | chksum_stat_cnt = 0; | |
373 | chksum_stat_data = 0; | |
374 | } | |
deb12130 TR |
375 | |
376 | #ifdef _KERNEL | |
377 | blake3_per_cpu_ctx_fini(); | |
378 | #endif | |
985c33b1 | 379 | } |