]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/vdev_raidz_math.c
ddt: document the theory and the key data structures
[mirror_zfs.git] / module / zfs / vdev_raidz_math.c
CommitLineData
ab9f4b0b
GN
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1d3ba0bf 9 * or https://opensource.org/licenses/CDDL-1.0.
ab9f4b0b
GN
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23 */
24
a3f82aec 25#include <sys/simd.h>
ab9f4b0b
GN
26#include <sys/zfs_context.h>
27#include <sys/types.h>
28#include <sys/zio.h>
29#include <sys/debug.h>
30#include <sys/zfs_debug.h>
ab9f4b0b
GN
31#include <sys/vdev_raidz.h>
32#include <sys/vdev_raidz_impl.h>
33
c9187d86
GN
34/* Opaque implementation with NULL methods to represent original methods */
35static const raidz_impl_ops_t vdev_raidz_original_impl = {
36 .name = "original",
37 .is_supported = raidz_will_scalar_work,
38};
39
40/* RAIDZ parity op that contain the fastest methods */
41static raidz_impl_ops_t vdev_raidz_fastest_impl = {
42 .name = "fastest"
43};
44
ab9f4b0b 45/* All compiled in implementations */
18168da7 46static const raidz_impl_ops_t *const raidz_all_maths[] = {
c9187d86 47 &vdev_raidz_original_impl,
ab9f4b0b 48 &vdev_raidz_scalar_impl,
ae25d222
GN
49#if defined(__x86_64) && defined(HAVE_SSE2) /* only x86_64 for now */
50 &vdev_raidz_sse2_impl,
51#endif
ab9f4b0b 52#if defined(__x86_64) && defined(HAVE_SSSE3) /* only x86_64 for now */
ae25d222 53 &vdev_raidz_ssse3_impl,
ab9f4b0b
GN
54#endif
55#if defined(__x86_64) && defined(HAVE_AVX2) /* only x86_64 for now */
62a65a65
RD
56 &vdev_raidz_avx2_impl,
57#endif
7f547f85 58#if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */
65d71d42 59 &vdev_raidz_avx512f_impl,
7f547f85
RD
60#endif
61#if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */
01017962 62 &vdev_raidz_avx512bw_impl,
7f547f85 63#endif
2dbad447 64#if defined(__aarch64__) && !defined(__FreeBSD__)
88cc2352
RD
65 &vdev_raidz_aarch64_neon_impl,
66 &vdev_raidz_aarch64_neonx2_impl,
ab9f4b0b 67#endif
f5b189f9 68#if defined(__powerpc__) && defined(__altivec__)
35b07497
RD
69 &vdev_raidz_powerpc_altivec_impl,
70#endif
ab9f4b0b
GN
71};
72
73/* Indicate that benchmark has been completed */
74static boolean_t raidz_math_initialized = B_FALSE;
75
76/* Select raidz implementation */
c9187d86
GN
77#define IMPL_FASTEST (UINT32_MAX)
78#define IMPL_CYCLE (UINT32_MAX - 1)
79#define IMPL_ORIGINAL (0)
80#define IMPL_SCALAR (1)
81
82#define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i))
83
84static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
85static uint32_t user_sel_impl = IMPL_FASTEST;
ab9f4b0b
GN
86
87/* Hold all supported implementations */
c9187d86
GN
88static size_t raidz_supp_impl_cnt = 0;
89static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
ab9f4b0b 90
e5db3134 91#if defined(_KERNEL)
ab9f4b0b 92/*
26a08b5c
GN
93 * kstats values for supported implementations
94 * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
ab9f4b0b
GN
95 */
96static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
97
98/* kstat for benchmarked implementations */
99static kstat_t *raidz_math_kstat = NULL;
e5db3134 100#endif
ab9f4b0b
GN
101
102/*
e5db3134
BB
103 * Returns the RAIDZ operations for raidz_map() parity calculations. When
104 * a SIMD implementation is not allowed in the current context, then fallback
105 * to the fastest generic implementation.
ab9f4b0b 106 */
e5db3134
BB
107const raidz_impl_ops_t *
108vdev_raidz_math_get_ops(void)
ab9f4b0b 109{
e5db3134
BB
110 if (!kfpu_allowed())
111 return (&vdev_raidz_scalar_impl);
112
c9187d86
GN
113 raidz_impl_ops_t *ops = NULL;
114 const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
115
116 switch (impl) {
117 case IMPL_FASTEST:
118 ASSERT(raidz_math_initialized);
119 ops = &vdev_raidz_fastest_impl;
120 break;
c9187d86 121 case IMPL_CYCLE:
e5db3134 122 /* Cycle through all supported implementations */
c9187d86
GN
123 ASSERT(raidz_math_initialized);
124 ASSERT3U(raidz_supp_impl_cnt, >, 0);
ab9f4b0b 125 static size_t cycle_impl_idx = 0;
c9187d86
GN
126 size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
127 ops = raidz_supp_impl[idx];
e5db3134 128 break;
c9187d86 129 case IMPL_ORIGINAL:
02730c33 130 ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
c9187d86
GN
131 break;
132 case IMPL_SCALAR:
02730c33 133 ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl;
c9187d86
GN
134 break;
135 default:
c9187d86
GN
136 ASSERT3U(impl, <, raidz_supp_impl_cnt);
137 ASSERT3U(raidz_supp_impl_cnt, >, 0);
d99a0153
CW
138 if (impl < ARRAY_SIZE(raidz_all_maths))
139 ops = raidz_supp_impl[impl];
c9187d86
GN
140 break;
141 }
ab9f4b0b 142
c9187d86
GN
143 ASSERT3P(ops, !=, NULL);
144
145 return (ops);
ab9f4b0b
GN
146}
147
148/*
149 * Select parity generation method for raidz_map
150 */
c9187d86 151int
b2255edc 152vdev_raidz_math_generate(raidz_map_t *rm, raidz_row_t *rr)
ab9f4b0b
GN
153{
154 raidz_gen_f gen_parity = NULL;
155
156 switch (raidz_parity(rm)) {
157 case 1:
158 gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
159 break;
160 case 2:
161 gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
162 break;
163 case 3:
164 gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
165 break;
166 default:
167 gen_parity = NULL;
5dbf6c5a
AZ
168 cmn_err(CE_PANIC, "invalid RAID-Z configuration %llu",
169 (u_longlong_t)raidz_parity(rm));
ab9f4b0b
GN
170 break;
171 }
172
c9187d86
GN
173 /* if method is NULL execute the original implementation */
174 if (gen_parity == NULL)
175 return (RAIDZ_ORIGINAL_IMPL);
ab9f4b0b 176
b2255edc 177 gen_parity(rr);
c9187d86
GN
178
179 return (0);
ab9f4b0b
GN
180}
181
182static raidz_rec_f
c9187d86 183reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
4ea3f864 184 const int nbaddata)
ab9f4b0b
GN
185{
186 if (nbaddata == 1 && parity_valid[CODE_P]) {
187 return (rm->rm_ops->rec[RAIDZ_REC_P]);
188 }
189 return ((raidz_rec_f) NULL);
190}
191
192static raidz_rec_f
c9187d86 193reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
4ea3f864 194 const int nbaddata)
ab9f4b0b
GN
195{
196 if (nbaddata == 1) {
197 if (parity_valid[CODE_P]) {
198 return (rm->rm_ops->rec[RAIDZ_REC_P]);
199 } else if (parity_valid[CODE_Q]) {
200 return (rm->rm_ops->rec[RAIDZ_REC_Q]);
201 }
202 } else if (nbaddata == 2 &&
02730c33 203 parity_valid[CODE_P] && parity_valid[CODE_Q]) {
ab9f4b0b
GN
204 return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
205 }
206 return ((raidz_rec_f) NULL);
207}
208
209static raidz_rec_f
c9187d86 210reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
4ea3f864 211 const int nbaddata)
ab9f4b0b
GN
212{
213 if (nbaddata == 1) {
214 if (parity_valid[CODE_P]) {
215 return (rm->rm_ops->rec[RAIDZ_REC_P]);
216 } else if (parity_valid[CODE_Q]) {
217 return (rm->rm_ops->rec[RAIDZ_REC_Q]);
218 } else if (parity_valid[CODE_R]) {
219 return (rm->rm_ops->rec[RAIDZ_REC_R]);
220 }
221 } else if (nbaddata == 2) {
222 if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
223 return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
224 } else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
225 return (rm->rm_ops->rec[RAIDZ_REC_PR]);
226 } else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
227 return (rm->rm_ops->rec[RAIDZ_REC_QR]);
228 }
229 } else if (nbaddata == 3 &&
02730c33
BB
230 parity_valid[CODE_P] && parity_valid[CODE_Q] &&
231 parity_valid[CODE_R]) {
ab9f4b0b
GN
232 return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
233 }
234 return ((raidz_rec_f) NULL);
235}
236
237/*
238 * Select data reconstruction method for raidz_map
239 * @parity_valid - Parity validity flag
240 * @dt - Failed data index array
241 * @nbaddata - Number of failed data columns
242 */
243int
b2255edc
BB
244vdev_raidz_math_reconstruct(raidz_map_t *rm, raidz_row_t *rr,
245 const int *parity_valid, const int *dt, const int nbaddata)
ab9f4b0b 246{
cbf484f8 247 raidz_rec_f rec_fn = NULL;
ab9f4b0b
GN
248
249 switch (raidz_parity(rm)) {
c9187d86 250 case PARITY_P:
cbf484f8 251 rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
c9187d86
GN
252 break;
253 case PARITY_PQ:
cbf484f8 254 rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
c9187d86
GN
255 break;
256 case PARITY_PQR:
cbf484f8 257 rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
c9187d86
GN
258 break;
259 default:
5dbf6c5a
AZ
260 cmn_err(CE_PANIC, "invalid RAID-Z configuration %llu",
261 (u_longlong_t)raidz_parity(rm));
c9187d86 262 break;
ab9f4b0b
GN
263 }
264
cbf484f8 265 if (rec_fn == NULL)
c9187d86
GN
266 return (RAIDZ_ORIGINAL_IMPL);
267 else
b2255edc 268 return (rec_fn(rr, dt));
ab9f4b0b
GN
269}
270
18168da7 271const char *const raidz_gen_name[] = {
ab9f4b0b
GN
272 "gen_p", "gen_pq", "gen_pqr"
273};
18168da7 274const char *const raidz_rec_name[] = {
ab9f4b0b
GN
275 "rec_p", "rec_q", "rec_r",
276 "rec_pq", "rec_pr", "rec_qr", "rec_pqr"
277};
278
e5db3134
BB
279#if defined(_KERNEL)
280
26a08b5c
GN
281#define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1)
282
283static int
284raidz_math_kstat_headers(char *buf, size_t size)
ab9f4b0b 285{
26a08b5c
GN
286 ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
287
97143b9d 288 ssize_t off = kmem_scnprintf(buf, size, "%-17s", "implementation");
26a08b5c 289
18168da7 290 for (int i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
97143b9d 291 off += kmem_scnprintf(buf + off, size - off, "%-16s",
26a08b5c
GN
292 raidz_gen_name[i]);
293
18168da7 294 for (int i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
97143b9d 295 off += kmem_scnprintf(buf + off, size - off, "%-16s",
26a08b5c
GN
296 raidz_rec_name[i]);
297
97143b9d 298 (void) kmem_scnprintf(buf + off, size - off, "\n");
26a08b5c
GN
299
300 return (0);
301}
302
303static int
304raidz_math_kstat_data(char *buf, size_t size, void *data)
305{
02730c33
BB
306 raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
307 raidz_impl_kstat_t *cstat = (raidz_impl_kstat_t *)data;
26a08b5c
GN
308 ssize_t off = 0;
309 int i;
310
311 ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
ab9f4b0b 312
26a08b5c 313 if (cstat == fstat) {
97143b9d
RY
314 off += kmem_scnprintf(buf + off, size - off, "%-17s",
315 "fastest");
ab9f4b0b 316
26a08b5c
GN
317 for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) {
318 int id = fstat->gen[i];
97143b9d 319 off += kmem_scnprintf(buf + off, size - off, "%-16s",
26a08b5c
GN
320 raidz_supp_impl[id]->name);
321 }
322 for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) {
323 int id = fstat->rec[i];
97143b9d 324 off += kmem_scnprintf(buf + off, size - off, "%-16s",
26a08b5c
GN
325 raidz_supp_impl[id]->name);
326 }
327 } else {
328 ptrdiff_t id = cstat - raidz_impl_kstats;
329
97143b9d 330 off += kmem_scnprintf(buf + off, size - off, "%-17s",
26a08b5c
GN
331 raidz_supp_impl[id]->name);
332
333 for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
97143b9d 334 off += kmem_scnprintf(buf + off, size - off, "%-16llu",
02730c33 335 (u_longlong_t)cstat->gen[i]);
26a08b5c
GN
336
337 for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
97143b9d 338 off += kmem_scnprintf(buf + off, size - off, "%-16llu",
02730c33 339 (u_longlong_t)cstat->rec[i]);
ab9f4b0b 340 }
26a08b5c 341
97143b9d 342 (void) kmem_scnprintf(buf + off, size - off, "\n");
26a08b5c
GN
343
344 return (0);
345}
346
347static void *
348raidz_math_kstat_addr(kstat_t *ksp, loff_t n)
349{
350 if (n <= raidz_supp_impl_cnt)
351 ksp->ks_private = (void *) (raidz_impl_kstats + n);
352 else
353 ksp->ks_private = NULL;
354
355 return (ksp->ks_private);
ab9f4b0b
GN
356}
357
358#define BENCH_D_COLS (8ULL)
359#define BENCH_COLS (BENCH_D_COLS + PARITY_PQR)
590c9a09 360#define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */
83b698dc 361#define BENCH_NS MSEC2NSEC(1) /* 1ms */
ab9f4b0b
GN
362
363typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
364
365static void
366benchmark_gen_impl(raidz_map_t *rm, const int fn)
367{
368 (void) fn;
369 vdev_raidz_generate_parity(rm);
370}
371
372static void
373benchmark_rec_impl(raidz_map_t *rm, const int fn)
374{
375 static const int rec_tgt[7][3] = {
376 {1, 2, 3}, /* rec_p: bad QR & D[0] */
377 {0, 2, 3}, /* rec_q: bad PR & D[0] */
378 {0, 1, 3}, /* rec_r: bad PQ & D[0] */
379 {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
380 {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
381 {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
382 {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
383 };
384
385 vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
386}
387
388/*
389 * Benchmarking of all supported implementations (raidz_supp_impl_cnt)
390 * is performed by setting the rm_ops pointer and calling the top level
391 * generate/reconstruct methods of bench_rm.
392 */
393static void
394benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
395{
396 uint64_t run_cnt, speed, best_speed = 0;
397 hrtime_t t_start, t_diff;
398 raidz_impl_ops_t *curr_impl;
02730c33 399 raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
ab9f4b0b
GN
400 int impl, i;
401
c9187d86 402 for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
ab9f4b0b
GN
403 /* set an implementation to benchmark */
404 curr_impl = raidz_supp_impl[impl];
405 bench_rm->rm_ops = curr_impl;
406
407 run_cnt = 0;
408 t_start = gethrtime();
409
410 do {
83b698dc 411 for (i = 0; i < 5; i++, run_cnt++)
ab9f4b0b
GN
412 bench_fn(bench_rm, fn);
413
414 t_diff = gethrtime() - t_start;
415 } while (t_diff < BENCH_NS);
416
417 speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
418 speed /= (t_diff * BENCH_COLS);
419
420 if (bench_fn == benchmark_gen_impl)
26a08b5c 421 raidz_impl_kstats[impl].gen[fn] = speed;
ab9f4b0b 422 else
26a08b5c 423 raidz_impl_kstats[impl].rec[fn] = speed;
ab9f4b0b 424
c9187d86
GN
425 /* Update fastest implementation method */
426 if (speed > best_speed) {
ab9f4b0b
GN
427 best_speed = speed;
428
c9187d86 429 if (bench_fn == benchmark_gen_impl) {
26a08b5c 430 fstat->gen[fn] = impl;
ab9f4b0b
GN
431 vdev_raidz_fastest_impl.gen[fn] =
432 curr_impl->gen[fn];
c9187d86 433 } else {
26a08b5c 434 fstat->rec[fn] = impl;
ab9f4b0b
GN
435 vdev_raidz_fastest_impl.rec[fn] =
436 curr_impl->rec[fn];
c9187d86 437 }
ab9f4b0b
GN
438 }
439 }
440}
e5db3134 441#endif
ab9f4b0b 442
e5db3134
BB
443/*
444 * Initialize and benchmark all supported implementations.
445 */
446static void
10fa2545 447benchmark_raidz(void)
ab9f4b0b
GN
448{
449 raidz_impl_ops_t *curr_impl;
e5db3134 450 int i, c;
ab9f4b0b 451
e5db3134 452 /* Move supported impl into raidz_supp_impl */
ab9f4b0b 453 for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
02730c33 454 curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
ab9f4b0b 455
ab9f4b0b
GN
456 if (curr_impl->init)
457 curr_impl->init();
458
26a08b5c 459 if (curr_impl->is_supported())
02730c33 460 raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl;
ab9f4b0b 461 }
c9187d86 462 membar_producer(); /* complete raidz_supp_impl[] init */
ab9f4b0b 463 raidz_supp_impl_cnt = c; /* number of supported impl */
ab9f4b0b 464
e5db3134 465#if defined(_KERNEL)
3ee9a997 466 abd_t *pabd;
e5db3134
BB
467 zio_t *bench_zio = NULL;
468 raidz_map_t *bench_rm = NULL;
469 uint64_t bench_parity;
ab9f4b0b 470
10269e02 471 /* Fake a zio and run the benchmark on a warmed up buffer */
ab9f4b0b
GN
472 bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
473 bench_zio->io_offset = 0;
474 bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
a6255b7f
DQ
475 bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE);
476 memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
ab9f4b0b
GN
477
478 /* Benchmark parity generation methods */
e5db3134 479 for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
ab9f4b0b
GN
480 bench_parity = fn + 1;
481 /* New raidz_map is needed for each generate_p/q/r */
c9187d86 482 bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
ab9f4b0b
GN
483 BENCH_D_COLS + bench_parity, bench_parity);
484
485 benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
486
487 vdev_raidz_map_free(bench_rm);
488 }
489
490 /* Benchmark data reconstruction methods */
c9187d86
GN
491 bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
492 BENCH_COLS, PARITY_PQR);
ab9f4b0b 493
3ee9a997
MJ
494 /* Ensure that fake parity blocks are initialized */
495 for (c = 0; c < bench_rm->rm_row[0]->rr_firstdatacol; c++) {
496 pabd = bench_rm->rm_row[0]->rr_col[c].rc_abd;
497 memset(abd_to_buf(pabd), 0xAA, abd_get_size(pabd));
498 }
499
e5db3134 500 for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
ab9f4b0b
GN
501 benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
502
503 vdev_raidz_map_free(bench_rm);
504
505 /* cleanup the bench zio */
a6255b7f 506 abd_free(bench_zio->io_abd);
ab9f4b0b 507 kmem_free(bench_zio, sizeof (zio_t));
e5db3134
BB
508#else
509 /*
510 * Skip the benchmark in user space to avoid impacting libzpool
511 * consumers (zdb, zhack, zinject, ztest). The last implementation
512 * is assumed to be the fastest and used by default.
513 */
514 memcpy(&vdev_raidz_fastest_impl,
515 raidz_supp_impl[raidz_supp_impl_cnt - 1],
516 sizeof (vdev_raidz_fastest_impl));
517 strcpy(vdev_raidz_fastest_impl.name, "fastest");
518#endif /* _KERNEL */
519}
520
521void
522vdev_raidz_math_init(void)
523{
10fa2545
BB
524 /* Determine the fastest available implementation. */
525 benchmark_raidz();
ab9f4b0b 526
10fa2545 527#if defined(_KERNEL)
e5db3134 528 /* Install kstats for all implementations */
26a08b5c 529 raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc",
02730c33 530 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
ab9f4b0b 531 if (raidz_math_kstat != NULL) {
26a08b5c
GN
532 raidz_math_kstat->ks_data = NULL;
533 raidz_math_kstat->ks_ndata = UINT32_MAX;
534 kstat_set_raw_ops(raidz_math_kstat,
535 raidz_math_kstat_headers,
536 raidz_math_kstat_data,
537 raidz_math_kstat_addr);
ab9f4b0b
GN
538 kstat_install(raidz_math_kstat);
539 }
e5db3134 540#endif
ab9f4b0b
GN
541
542 /* Finish initialization */
c9187d86 543 atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
ab9f4b0b 544 raidz_math_initialized = B_TRUE;
ab9f4b0b
GN
545}
546
547void
548vdev_raidz_math_fini(void)
549{
550 raidz_impl_ops_t const *curr_impl;
ab9f4b0b 551
e5db3134 552#if defined(_KERNEL)
ab9f4b0b
GN
553 if (raidz_math_kstat != NULL) {
554 kstat_delete(raidz_math_kstat);
555 raidz_math_kstat = NULL;
556 }
e5db3134 557#endif
ab9f4b0b 558
e5db3134 559 for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
ab9f4b0b 560 curr_impl = raidz_all_maths[i];
ab9f4b0b
GN
561 if (curr_impl->fini)
562 curr_impl->fini();
563 }
564}
565
c9187d86 566static const struct {
a926aab9 567 const char *name;
c9187d86 568 uint32_t sel;
ab9f4b0b 569} math_impl_opts[] = {
c9187d86 570 { "cycle", IMPL_CYCLE },
c9187d86
GN
571 { "fastest", IMPL_FASTEST },
572 { "original", IMPL_ORIGINAL },
573 { "scalar", IMPL_SCALAR }
ab9f4b0b
GN
574};
575
576/*
577 * Function sets desired raidz implementation.
c9187d86
GN
578 *
579 * If we are called before init(), user preference will be saved in
580 * user_sel_impl, and applied in later init() call. This occurs when module
581 * parameter is specified on module load. Otherwise, directly update
582 * zfs_vdev_raidz_impl.
ab9f4b0b
GN
583 *
584 * @val Name of raidz implementation to use
585 * @param Unused.
586 */
9cc1844a
GN
587int
588vdev_raidz_impl_set(const char *val)
ab9f4b0b 589{
c9187d86
GN
590 int err = -EINVAL;
591 char req_name[RAIDZ_IMPL_NAME_MAX];
592 uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
ab9f4b0b
GN
593 size_t i;
594
c9187d86
GN
595 /* sanitize input */
596 i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
597 if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
598 return (err);
599
600 strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
601 while (i > 0 && !!isspace(req_name[i-1]))
602 i--;
603 req_name[i] = '\0';
604
ab9f4b0b
GN
605 /* Check mandatory options */
606 for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
c9187d86
GN
607 if (strcmp(req_name, math_impl_opts[i].name) == 0) {
608 impl = math_impl_opts[i].sel;
609 err = 0;
610 break;
ab9f4b0b
GN
611 }
612 }
613
c9187d86
GN
614 /* check all supported impl if init() was already called */
615 if (err != 0 && raidz_math_initialized) {
616 /* check all supported implementations */
617 for (i = 0; i < raidz_supp_impl_cnt; i++) {
618 if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
619 impl = i;
620 err = 0;
621 break;
622 }
ab9f4b0b
GN
623 }
624 }
625
c9187d86
GN
626 if (err == 0) {
627 if (raidz_math_initialized)
628 atomic_swap_32(&zfs_vdev_raidz_impl, impl);
629 else
630 atomic_swap_32(&user_sel_impl, impl);
631 }
632
633 return (err);
ab9f4b0b
GN
634}
635
b3673342 636#if defined(_KERNEL) && defined(__linux__)
ab9f4b0b 637
9cc1844a
GN
638static int
639zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)
640{
641 return (vdev_raidz_impl_set(val));
ab9f4b0b
GN
642}
643
ab9f4b0b 644static int
9cc1844a 645zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp)
ab9f4b0b
GN
646{
647 int i, cnt = 0;
648 char *fmt;
c9187d86 649 const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
ab9f4b0b
GN
650
651 ASSERT(raidz_math_initialized);
652
ab9f4b0b 653 /* list mandatory options */
c9187d86
GN
654 for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
655 fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
30367835
RY
656 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
657 math_impl_opts[i].name);
ab9f4b0b
GN
658 }
659
660 /* list all supported implementations */
661 for (i = 0; i < raidz_supp_impl_cnt; i++) {
c9187d86 662 fmt = (i == impl) ? "[%s] " : "%s ";
30367835
RY
663 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
664 raidz_supp_impl[i]->name);
ab9f4b0b
GN
665 }
666
ab9f4b0b
GN
667 return (cnt);
668}
669
670module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
4ea3f864 671 zfs_vdev_raidz_impl_get, NULL, 0644);
ab9f4b0b
GN
672MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
673#endif