]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/vdev_raidz_math_impl.h
Add parity generation/rebuild using 128-bits NEON for Aarch64
[mirror_zfs.git] / module / zfs / vdev_raidz_math_impl.h
CommitLineData
ab9f4b0b
GN
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23 */
24
25#ifndef _VDEV_RAIDZ_MATH_IMPL_H
26#define _VDEV_RAIDZ_MATH_IMPL_H
27
28#include <sys/types.h>
29
30#define raidz_inline inline __attribute__((always_inline))
31#ifndef noinline
32#define noinline __attribute__((noinline))
33#endif
34
35/* Calculate data offset in raidz column, offset is in bytes */
36#define COL_OFF(col, off) ((v_t *)(((char *)(col)->rc_data) + (off)))
37
38/*
39 * PARITY CALCULATION
40 * An optimized function is called for a full length of data columns
41 * If RAIDZ map contains remainder columns (shorter columns) the same function
42 * is called for reminder of full columns.
43 *
44 * GEN_[P|PQ|PQR]_BLOCK() functions are designed to be efficiently in-lined by
45 * the compiler. This removes a lot of conditionals from the inside loop which
46 * makes the code faster, especially for vectorized code.
47 * They are also highly parametrized, allowing for each implementation to define
48 * most optimal stride, and register allocation.
49 */
50
51static raidz_inline void
52GEN_P_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
53 const int ncols)
54{
55 int c;
56 size_t ioff;
57 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
58 raidz_col_t *col;
59
60 GEN_P_DEFINE();
61
62 for (ioff = off; ioff < end; ioff += (GEN_P_STRIDE * sizeof (v_t))) {
63 LOAD(COL_OFF(&(rm->rm_col[1]), ioff), GEN_P_P);
64
65 for (c = 2; c < ncols; c++) {
66 col = &rm->rm_col[c];
67 XOR_ACC(COL_OFF(col, ioff), GEN_P_P);
68 }
69
70 STORE(COL_OFF(pcol, ioff), GEN_P_P);
71 }
72}
73
74/*
75 * Generate P parity (RAIDZ1)
76 *
77 * @rm RAIDZ map
78 */
79static raidz_inline void
80raidz_generate_p_impl(raidz_map_t * const rm)
81{
82 const int ncols = raidz_ncols(rm);
83 const size_t psize = raidz_big_size(rm);
84 const size_t short_size = raidz_short_size(rm);
85
86 raidz_math_begin();
87
88 /* short_size */
89 GEN_P_BLOCK(rm, 0, short_size, ncols);
90
91 /* fullcols */
92 GEN_P_BLOCK(rm, short_size, psize, raidz_nbigcols(rm));
93
94 raidz_math_end();
95}
96
97static raidz_inline void
98GEN_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
99 const int ncols, const int nbigcols)
100{
101 int c;
102 size_t ioff;
103 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
104 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
105 raidz_col_t *col;
106
107 GEN_PQ_DEFINE();
108
109 MUL2_SETUP();
110
111 for (ioff = off; ioff < end; ioff += (GEN_PQ_STRIDE * sizeof (v_t))) {
112 LOAD(COL_OFF(&rm->rm_col[2], ioff), GEN_PQ_P);
113 COPY(GEN_PQ_P, GEN_PQ_Q);
114
115 for (c = 3; c < nbigcols; c++) {
116 col = &rm->rm_col[c];
117 LOAD(COL_OFF(col, ioff), GEN_PQ_D);
118 MUL2(GEN_PQ_Q);
119 XOR(GEN_PQ_D, GEN_PQ_P);
120 XOR(GEN_PQ_D, GEN_PQ_Q);
121 }
122
123 STORE(COL_OFF(pcol, ioff), GEN_PQ_P);
124
125 for (; c < ncols; c++)
126 MUL2(GEN_PQ_Q);
127
128 STORE(COL_OFF(qcol, ioff), GEN_PQ_Q);
129 }
130}
131
132/*
133 * Generate PQ parity (RAIDZ2)
134 *
135 * @rm RAIDZ map
136 */
137static raidz_inline void
138raidz_generate_pq_impl(raidz_map_t * const rm)
139{
140 const int ncols = raidz_ncols(rm);
141 const size_t psize = raidz_big_size(rm);
142 const size_t short_size = raidz_short_size(rm);
143
144 raidz_math_begin();
145
146 /* short_size */
147 GEN_PQ_BLOCK(rm, 0, short_size, ncols, ncols);
148
149 /* fullcols */
150 GEN_PQ_BLOCK(rm, short_size, psize, ncols, raidz_nbigcols(rm));
151
152 raidz_math_end();
153}
154
155
156static raidz_inline void
157GEN_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
158 const int ncols, const int nbigcols)
159{
160 int c;
161 size_t ioff;
162 raidz_col_t *col;
163 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
164 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
165 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
166
167 GEN_PQR_DEFINE();
168
169 MUL2_SETUP();
170
171 for (ioff = off; ioff < end; ioff += (GEN_PQR_STRIDE * sizeof (v_t))) {
172 LOAD(COL_OFF(&rm->rm_col[3], ioff), GEN_PQR_P);
173 COPY(GEN_PQR_P, GEN_PQR_Q);
174 COPY(GEN_PQR_P, GEN_PQR_R);
175
176 for (c = 4; c < nbigcols; c++) {
177 col = &rm->rm_col[c];
178 LOAD(COL_OFF(col, ioff), GEN_PQR_D);
179 MUL2(GEN_PQR_Q);
180 MUL4(GEN_PQR_R);
181 XOR(GEN_PQR_D, GEN_PQR_P);
182 XOR(GEN_PQR_D, GEN_PQR_Q);
183 XOR(GEN_PQR_D, GEN_PQR_R);
184 }
185
186 STORE(COL_OFF(pcol, ioff), GEN_PQR_P);
187
188 for (; c < ncols; c++) {
189 MUL2(GEN_PQR_Q);
190 MUL4(GEN_PQR_R);
191 }
192
193 STORE(COL_OFF(qcol, ioff), GEN_PQR_Q);
194 STORE(COL_OFF(rcol, ioff), GEN_PQR_R);
195 }
196}
197
198
199/*
200 * Generate PQR parity (RAIDZ3)
201 *
202 * @rm RAIDZ map
203 */
204static raidz_inline void
205raidz_generate_pqr_impl(raidz_map_t * const rm)
206{
207 const int ncols = raidz_ncols(rm);
208 const size_t psize = raidz_big_size(rm);
209 const size_t short_size = raidz_short_size(rm);
210
211 raidz_math_begin();
212
213 /* short_size */
214 GEN_PQR_BLOCK(rm, 0, short_size, ncols, ncols);
215
216 /* fullcols */
217 GEN_PQR_BLOCK(rm, short_size, psize, ncols, raidz_nbigcols(rm));
218
219 raidz_math_end();
220}
221
222/*
223 * DATA RECONSTRUCTION
224 *
225 * Data reconstruction process consists of two phases:
226 * - Syndrome calculation
227 * - Data reconstruction
228 *
229 * Syndrome is calculated by generating parity using available data columns
230 * and zeros in places of erasure. Existing parity is added to corresponding
231 * syndrome value to obtain the [P|Q|R]syn values from equation:
232 * P = Psyn + Dx + Dy + Dz
233 * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
234 * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
235 *
236 * For data reconstruction phase, the corresponding equations are solved
237 * for missing data (Dx, Dy, Dz). This generally involves multiplying known
238 * symbols by an coefficient and adding them together. The multiplication
239 * constant coefficients are calculated ahead of the operation in
240 * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
241 *
242 * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
243 * and "short" columns.
244 * For this reason, reconstruction is performed in minimum of
245 * two steps. First, from offset 0 to short_size, then from short_size to
246 * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
247 * over both ranges. The split also enables removal of conditional expressions
248 * from loop bodies, improving throughput of SIMD implementations.
249 * For the best performance, all functions marked with raidz_inline attribute
250 * must be inlined by compiler.
251 *
252 * parity data
253 * columns columns
254 * <----------> <------------------>
255 * x y <----+ missing columns (x, y)
256 * | |
257 * +---+---+---+---+-v-+---+-v-+---+ ^ 0
258 * | | | | | | | | | |
259 * | | | | | | | | | |
260 * | P | Q | R | D | D | D | D | D | |
261 * | | | | 0 | 1 | 2 | 3 | 4 | |
262 * | | | | | | | | | v
263 * | | | | | +---+---+---+ ^ short_size
264 * | | | | | | |
265 * +---+---+---+---+---+ v big_size
266 * <------------------> <---------->
267 * big columns short columns
268 *
269 */
270
271/*
272 * Functions calculate multiplication constants for data reconstruction.
273 * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
274 * used parity columns for reconstruction.
275 * @rm RAIDZ map
276 * @tgtidx array of missing data indexes
277 * @coeff output array of coefficients. Array must be user
278 * provided and must hold minimum MUL_CNT values
279 */
280static noinline void
281raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
282{
283 const unsigned ncols = raidz_ncols(rm);
284 const unsigned x = tgtidx[TARGET_X];
285
286 coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
287}
288
289static noinline void
290raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
291{
292 const unsigned ncols = raidz_ncols(rm);
293 const unsigned x = tgtidx[TARGET_X];
294
295 coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
296}
297
298static noinline void
299raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
300{
301 const unsigned ncols = raidz_ncols(rm);
302 const unsigned x = tgtidx[TARGET_X];
303 const unsigned y = tgtidx[TARGET_Y];
304 gf_t a, b, e;
305
306 a = gf_exp2(x + 255 - y);
307 b = gf_exp2(255 - (ncols - x - 1));
308 e = a ^ 0x01;
309
310 coeff[MUL_PQ_X] = gf_div(a, e);
311 coeff[MUL_PQ_Y] = gf_div(b, e);
312}
313
314static noinline void
315raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
316{
317 const unsigned ncols = raidz_ncols(rm);
318 const unsigned x = tgtidx[TARGET_X];
319 const unsigned y = tgtidx[TARGET_Y];
320
321 gf_t a, b, e;
322
323 a = gf_exp4(x + 255 - y);
324 b = gf_exp4(255 - (ncols - x - 1));
325 e = a ^ 0x01;
326
327 coeff[MUL_PR_X] = gf_div(a, e);
328 coeff[MUL_PR_Y] = gf_div(b, e);
329}
330
331static noinline void
332raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
333{
334 const unsigned ncols = raidz_ncols(rm);
335 const unsigned x = tgtidx[TARGET_X];
336 const unsigned y = tgtidx[TARGET_Y];
337
338 gf_t nx, ny, nxxy, nxyy, d;
339
340 nx = gf_exp2(ncols - x - 1);
341 ny = gf_exp2(ncols - y - 1);
342 nxxy = gf_mul(gf_mul(nx, nx), ny);
343 nxyy = gf_mul(gf_mul(nx, ny), ny);
344 d = nxxy ^ nxyy;
345
346 coeff[MUL_QR_XQ] = ny;
347 coeff[MUL_QR_X] = gf_div(ny, d);
348 coeff[MUL_QR_YQ] = nx;
349 coeff[MUL_QR_Y] = gf_div(nx, d);
350}
351
352static noinline void
353raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
354{
355 const unsigned ncols = raidz_ncols(rm);
356 const unsigned x = tgtidx[TARGET_X];
357 const unsigned y = tgtidx[TARGET_Y];
358 const unsigned z = tgtidx[TARGET_Z];
359
360 gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
361
362 nx = gf_exp2(ncols - x - 1);
363 ny = gf_exp2(ncols - y - 1);
364 nz = gf_exp2(ncols - z - 1);
365
366 nxx = gf_exp4(ncols - x - 1);
367 nyy = gf_exp4(ncols - y - 1);
368 nzz = gf_exp4(ncols - z - 1);
369
370 nyyz = gf_mul(gf_mul(ny, nz), ny);
371 nyzz = gf_mul(nzz, ny);
372
373 xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
374 gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^ nyzz;
375
376 yd = gf_inv(ny ^ nz);
377
378 coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
379 coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
380 coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
381 coeff[MUL_PQR_YU] = nx;
382 coeff[MUL_PQR_YP] = gf_mul(nz, yd);
383 coeff[MUL_PQR_YQ] = yd;
384}
385
386
387/*
388 * Reconstruction using P parity
389 * @rm RAIDZ map
390 * @off starting offset
391 * @end ending offset
392 * @x missing data column
393 * @ncols number of column
394 */
395static raidz_inline void
396REC_P_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
397 const int x, const int ncols)
398{
399 int c;
400 size_t ioff;
401 const size_t firstdc = raidz_parity(rm);
402 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
403 raidz_col_t * const xcol = raidz_col_p(rm, x);
404 raidz_col_t *col;
405
406 REC_P_DEFINE();
407
408 for (ioff = off; ioff < end; ioff += (REC_P_STRIDE * sizeof (v_t))) {
409 LOAD(COL_OFF(pcol, ioff), REC_P_X);
410
411 for (c = firstdc; c < x; c++) {
412 col = &rm->rm_col[c];
413 XOR_ACC(COL_OFF(col, ioff), REC_P_X);
414 }
415
416 for (c++; c < ncols; c++) {
417 col = &rm->rm_col[c];
418 XOR_ACC(COL_OFF(col, ioff), REC_P_X);
419 }
420
421 STORE(COL_OFF(xcol, ioff), REC_P_X);
422 }
423}
424
425/*
426 * Reconstruct single data column using P parity
427 * @rec_method REC_P_BLOCK()
428 *
429 * @rm RAIDZ map
430 * @tgtidx array of missing data indexes
431 */
432static raidz_inline int
433raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
434{
435 const int x = tgtidx[TARGET_X];
436 const int ncols = raidz_ncols(rm);
437 const int nbigcols = raidz_nbigcols(rm);
438 const size_t xsize = raidz_col_size(rm, x);
439 const size_t short_size = raidz_short_size(rm);
440
441 raidz_math_begin();
442
443 /* 0 - short_size */
444 REC_P_BLOCK(rm, 0, short_size, x, ncols);
445
446 /* short_size - xsize */
447 REC_P_BLOCK(rm, short_size, xsize, x, nbigcols);
448
449 raidz_math_end();
450
451 return (1 << CODE_P);
452}
453
454/*
455 * Reconstruct using Q parity
456 */
457
458#define REC_Q_SYN_UPDATE() MUL2(REC_Q_X)
459
460#define REC_Q_INNER_LOOP(c) \
461{ \
462 col = &rm->rm_col[c]; \
463 REC_Q_SYN_UPDATE(); \
464 XOR_ACC(COL_OFF(col, ioff), REC_Q_X); \
465}
466
467/*
468 * Reconstruction using Q parity
469 * @rm RAIDZ map
470 * @off starting offset
471 * @end ending offset
472 * @x missing data column
473 * @coeff multiplication coefficients
474 * @ncols number of column
475 * @nbigcols number of big columns
476 */
477static raidz_inline void
478REC_Q_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
479 const int x, const unsigned *coeff, const int ncols, const int nbigcols)
480{
481 int c;
482 size_t ioff = 0;
483 const size_t firstdc = raidz_parity(rm);
484 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
485 raidz_col_t * const xcol = raidz_col_p(rm, x);
486 raidz_col_t *col;
487
488 REC_Q_DEFINE();
489
490 for (ioff = off; ioff < end; ioff += (REC_Q_STRIDE * sizeof (v_t))) {
491 MUL2_SETUP();
492
62a65a65 493 ZERO(REC_Q_X);
ab9f4b0b
GN
494
495 if (ncols == nbigcols) {
496 for (c = firstdc; c < x; c++)
497 REC_Q_INNER_LOOP(c);
498
499 REC_Q_SYN_UPDATE();
500 for (c++; c < nbigcols; c++)
501 REC_Q_INNER_LOOP(c);
502 } else {
503 for (c = firstdc; c < nbigcols; c++) {
504 REC_Q_SYN_UPDATE();
505 if (x != c) {
506 col = &rm->rm_col[c];
507 XOR_ACC(COL_OFF(col, ioff), REC_Q_X);
508 }
509 }
510 for (; c < ncols; c++)
511 REC_Q_SYN_UPDATE();
512 }
513
514 XOR_ACC(COL_OFF(qcol, ioff), REC_Q_X);
515 MUL(coeff[MUL_Q_X], REC_Q_X);
516 STORE(COL_OFF(xcol, ioff), REC_Q_X);
517 }
518}
519
520/*
521 * Reconstruct single data column using Q parity
522 * @rec_method REC_Q_BLOCK()
523 *
524 * @rm RAIDZ map
525 * @tgtidx array of missing data indexes
526 */
527static raidz_inline int
528raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
529{
530 const int x = tgtidx[TARGET_X];
531 const int ncols = raidz_ncols(rm);
532 const int nbigcols = raidz_nbigcols(rm);
533 const size_t xsize = raidz_col_size(rm, x);
534 const size_t short_size = raidz_short_size(rm);
535 unsigned coeff[MUL_CNT];
536
537 raidz_rec_q_coeff(rm, tgtidx, coeff);
538
539 raidz_math_begin();
540
541 /* 0 - short_size */
542 REC_Q_BLOCK(rm, 0, short_size, x, coeff, ncols, ncols);
543
544 /* short_size - xsize */
545 REC_Q_BLOCK(rm, short_size, xsize, x, coeff, ncols, nbigcols);
546
547 raidz_math_end();
548
549 return (1 << CODE_Q);
550}
551
552/*
553 * Reconstruct using R parity
554 */
555
556#define REC_R_SYN_UPDATE() MUL4(REC_R_X)
557#define REC_R_INNER_LOOP(c) \
558{ \
559 col = &rm->rm_col[c]; \
560 REC_R_SYN_UPDATE(); \
561 XOR_ACC(COL_OFF(col, ioff), REC_R_X); \
562}
563
564/*
565 * Reconstruction using R parity
566 * @rm RAIDZ map
567 * @off starting offset
568 * @end ending offset
569 * @x missing data column
570 * @coeff multiplication coefficients
571 * @ncols number of column
572 * @nbigcols number of big columns
573 */
574static raidz_inline void
575REC_R_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
576 const int x, const unsigned *coeff, const int ncols, const int nbigcols)
577{
578 int c;
579 size_t ioff = 0;
580 const size_t firstdc = raidz_parity(rm);
581 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
582 raidz_col_t * const xcol = raidz_col_p(rm, x);
583 raidz_col_t *col;
584
585 REC_R_DEFINE();
586
587 for (ioff = off; ioff < end; ioff += (REC_R_STRIDE * sizeof (v_t))) {
588 MUL2_SETUP();
589
62a65a65 590 ZERO(REC_R_X);
ab9f4b0b
GN
591
592 if (ncols == nbigcols) {
593 for (c = firstdc; c < x; c++)
594 REC_R_INNER_LOOP(c);
595
596 REC_R_SYN_UPDATE();
597 for (c++; c < nbigcols; c++)
598 REC_R_INNER_LOOP(c);
599 } else {
600 for (c = firstdc; c < nbigcols; c++) {
601 REC_R_SYN_UPDATE();
602 if (c != x) {
603 col = &rm->rm_col[c];
604 XOR_ACC(COL_OFF(col, ioff), REC_R_X);
605 }
606 }
607 for (; c < ncols; c++)
608 REC_R_SYN_UPDATE();
609 }
610
611 XOR_ACC(COL_OFF(rcol, ioff), REC_R_X);
612 MUL(coeff[MUL_R_X], REC_R_X);
613 STORE(COL_OFF(xcol, ioff), REC_R_X);
614 }
615}
616
617/*
618 * Reconstruct single data column using R parity
619 * @rec_method REC_R_BLOCK()
620 *
621 * @rm RAIDZ map
622 * @tgtidx array of missing data indexes
623 */
624static raidz_inline int
625raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
626{
627 const int x = tgtidx[TARGET_X];
628 const int ncols = raidz_ncols(rm);
629 const int nbigcols = raidz_nbigcols(rm);
630 const size_t xsize = raidz_col_size(rm, x);
631 const size_t short_size = raidz_short_size(rm);
632 unsigned coeff[MUL_CNT];
633
634 raidz_rec_r_coeff(rm, tgtidx, coeff);
635
636 raidz_math_begin();
637
638 /* 0 - short_size */
639 REC_R_BLOCK(rm, 0, short_size, x, coeff, ncols, ncols);
640
641 /* short_size - xsize */
642 REC_R_BLOCK(rm, short_size, xsize, x, coeff, ncols, nbigcols);
643
644 raidz_math_end();
645
646 return (1 << CODE_R);
647}
648
649/*
650 * Reconstruct using PQ parity
651 */
652
653#define REC_PQ_SYN_UPDATE() MUL2(REC_PQ_Y)
654#define REC_PQ_INNER_LOOP(c) \
655{ \
656 col = &rm->rm_col[c]; \
657 LOAD(COL_OFF(col, ioff), REC_PQ_D); \
658 REC_PQ_SYN_UPDATE(); \
659 XOR(REC_PQ_D, REC_PQ_X); \
660 XOR(REC_PQ_D, REC_PQ_Y); \
661}
662
663/*
664 * Reconstruction using PQ parity
665 * @rm RAIDZ map
666 * @off starting offset
667 * @end ending offset
668 * @x missing data column
669 * @y missing data column
670 * @coeff multiplication coefficients
671 * @ncols number of column
672 * @nbigcols number of big columns
673 * @calcy calculate second data column
674 */
675static raidz_inline void
676REC_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
677 const int x, const int y, const unsigned *coeff, const int ncols,
678 const int nbigcols, const boolean_t calcy)
679{
680 int c;
681 size_t ioff = 0;
682 const size_t firstdc = raidz_parity(rm);
683 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
684 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
685 raidz_col_t * const xcol = raidz_col_p(rm, x);
686 raidz_col_t * const ycol = raidz_col_p(rm, y);
687 raidz_col_t *col;
688
689 REC_PQ_DEFINE();
690
691 for (ioff = off; ioff < end; ioff += (REC_PQ_STRIDE * sizeof (v_t))) {
692 LOAD(COL_OFF(pcol, ioff), REC_PQ_X);
62a65a65 693 ZERO(REC_PQ_Y);
ab9f4b0b
GN
694 MUL2_SETUP();
695
696 if (ncols == nbigcols) {
697 for (c = firstdc; c < x; c++)
698 REC_PQ_INNER_LOOP(c);
699
700 REC_PQ_SYN_UPDATE();
701 for (c++; c < y; c++)
702 REC_PQ_INNER_LOOP(c);
703
704 REC_PQ_SYN_UPDATE();
705 for (c++; c < nbigcols; c++)
706 REC_PQ_INNER_LOOP(c);
707 } else {
708 for (c = firstdc; c < nbigcols; c++) {
709 REC_PQ_SYN_UPDATE();
710 if (c != x && c != y) {
711 col = &rm->rm_col[c];
712 LOAD(COL_OFF(col, ioff), REC_PQ_D);
713 XOR(REC_PQ_D, REC_PQ_X);
714 XOR(REC_PQ_D, REC_PQ_Y);
715 }
716 }
717 for (; c < ncols; c++)
718 REC_PQ_SYN_UPDATE();
719 }
720
721 XOR_ACC(COL_OFF(qcol, ioff), REC_PQ_Y);
722
723 /* Save Pxy */
724 COPY(REC_PQ_X, REC_PQ_D);
725
726 /* Calc X */
727 MUL(coeff[MUL_PQ_X], REC_PQ_X);
728 MUL(coeff[MUL_PQ_Y], REC_PQ_Y);
729 XOR(REC_PQ_Y, REC_PQ_X);
730 STORE(COL_OFF(xcol, ioff), REC_PQ_X);
731
732 if (calcy) {
733 /* Calc Y */
734 XOR(REC_PQ_D, REC_PQ_X);
735 STORE(COL_OFF(ycol, ioff), REC_PQ_X);
736 }
737 }
738}
739
740/*
741 * Reconstruct two data columns using PQ parity
742 * @rec_method REC_PQ_BLOCK()
743 *
744 * @rm RAIDZ map
745 * @tgtidx array of missing data indexes
746 */
747static raidz_inline int
748raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
749{
750 const int x = tgtidx[TARGET_X];
751 const int y = tgtidx[TARGET_Y];
752 const int ncols = raidz_ncols(rm);
753 const int nbigcols = raidz_nbigcols(rm);
754 const size_t xsize = raidz_col_size(rm, x);
755 const size_t ysize = raidz_col_size(rm, y);
756 const size_t short_size = raidz_short_size(rm);
757 unsigned coeff[MUL_CNT];
758
759 raidz_rec_pq_coeff(rm, tgtidx, coeff);
760
761 raidz_math_begin();
762
763 /* 0 - short_size */
764 REC_PQ_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
765
766 /* short_size - xsize */
767 REC_PQ_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
768 xsize == ysize);
769
770 raidz_math_end();
771
772 return ((1 << CODE_P) | (1 << CODE_Q));
773}
774
775/*
776 * Reconstruct using PR parity
777 */
778
779#define REC_PR_SYN_UPDATE() MUL4(REC_PR_Y)
780#define REC_PR_INNER_LOOP(c) \
781{ \
782 col = &rm->rm_col[c]; \
783 LOAD(COL_OFF(col, ioff), REC_PR_D); \
784 REC_PR_SYN_UPDATE(); \
785 XOR(REC_PR_D, REC_PR_X); \
786 XOR(REC_PR_D, REC_PR_Y); \
787}
788
789/*
790 * Reconstruction using PR parity
791 * @rm RAIDZ map
792 * @off starting offset
793 * @end ending offset
794 * @x missing data column
795 * @y missing data column
796 * @coeff multiplication coefficients
797 * @ncols number of column
798 * @nbigcols number of big columns
799 * @calcy calculate second data column
800 */
801static raidz_inline void
802REC_PR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
803 const int x, const int y, const unsigned *coeff, const int ncols,
804 const int nbigcols, const boolean_t calcy)
805{
806 int c;
807 size_t ioff;
808 const size_t firstdc = raidz_parity(rm);
809 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
810 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
811 raidz_col_t * const xcol = raidz_col_p(rm, x);
812 raidz_col_t * const ycol = raidz_col_p(rm, y);
813 raidz_col_t *col;
814
815 REC_PR_DEFINE();
816
817 for (ioff = off; ioff < end; ioff += (REC_PR_STRIDE * sizeof (v_t))) {
818 LOAD(COL_OFF(pcol, ioff), REC_PR_X);
62a65a65 819 ZERO(REC_PR_Y);
ab9f4b0b
GN
820 MUL2_SETUP();
821
822 if (ncols == nbigcols) {
823 for (c = firstdc; c < x; c++)
824 REC_PR_INNER_LOOP(c);
825
826 REC_PR_SYN_UPDATE();
827 for (c++; c < y; c++)
828 REC_PR_INNER_LOOP(c);
829
830 REC_PR_SYN_UPDATE();
831 for (c++; c < nbigcols; c++)
832 REC_PR_INNER_LOOP(c);
833 } else {
834 for (c = firstdc; c < nbigcols; c++) {
835 REC_PR_SYN_UPDATE();
836 if (c != x && c != y) {
837 col = &rm->rm_col[c];
838 LOAD(COL_OFF(col, ioff), REC_PR_D);
839 XOR(REC_PR_D, REC_PR_X);
840 XOR(REC_PR_D, REC_PR_Y);
841 }
842 }
843 for (; c < ncols; c++)
844 REC_PR_SYN_UPDATE();
845 }
846
847 XOR_ACC(COL_OFF(rcol, ioff), REC_PR_Y);
848
849 /* Save Pxy */
850 COPY(REC_PR_X, REC_PR_D);
851
852 /* Calc X */
853 MUL(coeff[MUL_PR_X], REC_PR_X);
854 MUL(coeff[MUL_PR_Y], REC_PR_Y);
855 XOR(REC_PR_Y, REC_PR_X);
856 STORE(COL_OFF(xcol, ioff), REC_PR_X);
857
858 if (calcy) {
859 /* Calc Y */
860 XOR(REC_PR_D, REC_PR_X);
861 STORE(COL_OFF(ycol, ioff), REC_PR_X);
862 }
863 }
864}
865
866
867/*
868 * Reconstruct two data columns using PR parity
869 * @rec_method REC_PR_BLOCK()
870 *
871 * @rm RAIDZ map
872 * @tgtidx array of missing data indexes
873 */
874static raidz_inline int
875raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
876{
877 const int x = tgtidx[TARGET_X];
878 const int y = tgtidx[TARGET_Y];
879 const int ncols = raidz_ncols(rm);
880 const int nbigcols = raidz_nbigcols(rm);
881 const size_t xsize = raidz_col_size(rm, x);
882 const size_t ysize = raidz_col_size(rm, y);
883 const size_t short_size = raidz_short_size(rm);
884 unsigned coeff[MUL_CNT];
885
886 raidz_rec_pr_coeff(rm, tgtidx, coeff);
887
888 raidz_math_begin();
889
890 /* 0 - short_size */
891 REC_PR_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
892
893 /* short_size - xsize */
894 REC_PR_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
895 xsize == ysize);
896
897 raidz_math_end();
898
899 return ((1 << CODE_P) | (1 << CODE_R));
900}
901
902
903/*
904 * Reconstruct using QR parity
905 */
906
907#define REC_QR_SYN_UPDATE() \
908{ \
909 MUL2(REC_QR_X); \
910 MUL4(REC_QR_Y); \
911}
912
913#define REC_QR_INNER_LOOP(c) \
914{ \
915 col = &rm->rm_col[c]; \
916 LOAD(COL_OFF(col, ioff), REC_QR_D); \
917 REC_QR_SYN_UPDATE(); \
918 XOR(REC_QR_D, REC_QR_X); \
919 XOR(REC_QR_D, REC_QR_Y); \
920}
921
922/*
923 * Reconstruction using QR parity
924 * @rm RAIDZ map
925 * @off starting offset
926 * @end ending offset
927 * @x missing data column
928 * @y missing data column
929 * @coeff multiplication coefficients
930 * @ncols number of column
931 * @nbigcols number of big columns
932 * @calcy calculate second data column
933 */
934static raidz_inline void
935REC_QR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
936 const int x, const int y, const unsigned *coeff, const int ncols,
937 const int nbigcols, const boolean_t calcy)
938{
939 int c;
940 size_t ioff;
941 const size_t firstdc = raidz_parity(rm);
942 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
943 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
944 raidz_col_t * const xcol = raidz_col_p(rm, x);
945 raidz_col_t * const ycol = raidz_col_p(rm, y);
946 raidz_col_t *col;
947
948 REC_QR_DEFINE();
949
950 for (ioff = off; ioff < end; ioff += (REC_QR_STRIDE * sizeof (v_t))) {
951 MUL2_SETUP();
62a65a65
RD
952 ZERO(REC_QR_X);
953 ZERO(REC_QR_Y);
ab9f4b0b
GN
954
955 if (ncols == nbigcols) {
956 for (c = firstdc; c < x; c++)
957 REC_QR_INNER_LOOP(c);
958
959 REC_QR_SYN_UPDATE();
960 for (c++; c < y; c++)
961 REC_QR_INNER_LOOP(c);
962
963 REC_QR_SYN_UPDATE();
964 for (c++; c < nbigcols; c++)
965 REC_QR_INNER_LOOP(c);
966 } else {
967 for (c = firstdc; c < nbigcols; c++) {
968 REC_QR_SYN_UPDATE();
969 if (c != x && c != y) {
970 col = &rm->rm_col[c];
971 LOAD(COL_OFF(col, ioff), REC_QR_D);
972 XOR(REC_QR_D, REC_QR_X);
973 XOR(REC_QR_D, REC_QR_Y);
974 }
975 }
976 for (; c < ncols; c++)
977 REC_QR_SYN_UPDATE();
978 }
979
980 XOR_ACC(COL_OFF(qcol, ioff), REC_QR_X);
981 XOR_ACC(COL_OFF(rcol, ioff), REC_QR_Y);
982
983 /* Save Qxy */
984 COPY(REC_QR_X, REC_QR_D);
985
986 /* Calc X */
987 MUL(coeff[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */
988 XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */
989 MUL(coeff[MUL_QR_X], REC_QR_X); /* X = X * xm */
990 STORE(COL_OFF(xcol, ioff), REC_QR_X);
991
992 if (calcy) {
993 /* Calc Y */
994 MUL(coeff[MUL_QR_YQ], REC_QR_D); /* X = Q * xqm */
995 XOR(REC_QR_Y, REC_QR_D); /* X = R ^ X */
996 MUL(coeff[MUL_QR_Y], REC_QR_D); /* X = X * xm */
997 STORE(COL_OFF(ycol, ioff), REC_QR_D);
998 }
999 }
1000}
1001
1002/*
1003 * Reconstruct two data columns using QR parity
1004 * @rec_method REC_QR_BLOCK()
1005 *
1006 * @rm RAIDZ map
1007 * @tgtidx array of missing data indexes
1008 */
1009static raidz_inline int
1010raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
1011{
1012 const int x = tgtidx[TARGET_X];
1013 const int y = tgtidx[TARGET_Y];
1014 const int ncols = raidz_ncols(rm);
1015 const int nbigcols = raidz_nbigcols(rm);
1016 const size_t xsize = raidz_col_size(rm, x);
1017 const size_t ysize = raidz_col_size(rm, y);
1018 const size_t short_size = raidz_short_size(rm);
1019 unsigned coeff[MUL_CNT];
1020
1021 raidz_rec_qr_coeff(rm, tgtidx, coeff);
1022
1023 raidz_math_begin();
1024
1025 /* 0 - short_size */
1026 REC_QR_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
1027
1028 /* short_size - xsize */
1029 REC_QR_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
1030 xsize == ysize);
1031
1032 raidz_math_end();
1033
1034 return ((1 << CODE_Q) | (1 << CODE_R));
1035}
1036
1037/*
1038 * Reconstruct using PQR parity
1039 */
1040
1041#define REC_PQR_SYN_UPDATE() \
1042{ \
1043 MUL2(REC_PQR_Y); \
1044 MUL4(REC_PQR_Z); \
1045}
1046
1047#define REC_PQR_INNER_LOOP(c) \
1048{ \
1049 col = &rm->rm_col[(c)]; \
1050 LOAD(COL_OFF(col, ioff), REC_PQR_D); \
1051 REC_PQR_SYN_UPDATE(); \
1052 XOR(REC_PQR_D, REC_PQR_X); \
1053 XOR(REC_PQR_D, REC_PQR_Y); \
1054 XOR(REC_PQR_D, REC_PQR_Z); \
1055}
1056
1057/*
1058 * Reconstruction using PQR parity
1059 * @rm RAIDZ map
1060 * @off starting offset
1061 * @end ending offset
1062 * @x missing data column
1063 * @y missing data column
1064 * @z missing data column
1065 * @coeff multiplication coefficients
1066 * @ncols number of column
1067 * @nbigcols number of big columns
1068 * @calcy calculate second data column
1069 * @calcz calculate third data column
1070 */
1071static raidz_inline void
1072REC_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
1073 const int x, const int y, const int z, const unsigned *coeff,
1074 const int ncols, const int nbigcols, const boolean_t calcy,
1075 const boolean_t calcz)
1076{
1077 int c;
1078 size_t ioff;
1079 const size_t firstdc = raidz_parity(rm);
1080 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
1081 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
1082 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
1083 raidz_col_t * const xcol = raidz_col_p(rm, x);
1084 raidz_col_t * const ycol = raidz_col_p(rm, y);
1085 raidz_col_t * const zcol = raidz_col_p(rm, z);
1086 raidz_col_t *col;
1087
1088 REC_PQR_DEFINE();
1089
1090 for (ioff = off; ioff < end; ioff += (REC_PQR_STRIDE * sizeof (v_t))) {
1091 MUL2_SETUP();
1092 LOAD(COL_OFF(pcol, ioff), REC_PQR_X);
62a65a65
RD
1093 ZERO(REC_PQR_Y);
1094 ZERO(REC_PQR_Z);
ab9f4b0b
GN
1095
1096 if (ncols == nbigcols) {
1097 for (c = firstdc; c < x; c++)
1098 REC_PQR_INNER_LOOP(c);
1099
1100 REC_PQR_SYN_UPDATE();
1101 for (c++; c < y; c++)
1102 REC_PQR_INNER_LOOP(c);
1103
1104 REC_PQR_SYN_UPDATE();
1105 for (c++; c < z; c++)
1106 REC_PQR_INNER_LOOP(c);
1107
1108 REC_PQR_SYN_UPDATE();
1109 for (c++; c < nbigcols; c++)
1110 REC_PQR_INNER_LOOP(c);
1111 } else {
1112 for (c = firstdc; c < nbigcols; c++) {
1113 REC_PQR_SYN_UPDATE();
1114 if (c != x && c != y && c != z) {
1115 col = &rm->rm_col[c];
1116 LOAD(COL_OFF(col, ioff), REC_PQR_D);
1117 XOR(REC_PQR_D, REC_PQR_X);
1118 XOR(REC_PQR_D, REC_PQR_Y);
1119 XOR(REC_PQR_D, REC_PQR_Z);
1120 }
1121 }
1122 for (; c < ncols; c++)
1123 REC_PQR_SYN_UPDATE();
1124 }
1125
1126 XOR_ACC(COL_OFF(qcol, ioff), REC_PQR_Y);
1127 XOR_ACC(COL_OFF(rcol, ioff), REC_PQR_Z);
1128
1129 /* Save Pxyz and Qxyz */
1130 COPY(REC_PQR_X, REC_PQR_XS);
1131 COPY(REC_PQR_Y, REC_PQR_YS);
1132
1133 /* Calc X */
1134 MUL(coeff[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */
1135 MUL(coeff[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */
1136 XOR(REC_PQR_Y, REC_PQR_X);
1137 MUL(coeff[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */
1138 XOR(REC_PQR_Z, REC_PQR_X); /* X = Xp + Xq + Xr */
1139 STORE(COL_OFF(xcol, ioff), REC_PQR_X);
1140
1141 if (calcy) {
1142 /* Calc Y */
1143 XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */
1144 MUL(coeff[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */
1145 XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */
1146 COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */
1147 MUL(coeff[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */
1148 MUL(coeff[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */
1149 XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */
1150 STORE(COL_OFF(ycol, ioff), REC_PQR_YS);
1151 }
1152
1153 if (calcz) {
1154 /* Calc Z */
1155 XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */
1156 STORE(COL_OFF(zcol, ioff), REC_PQR_YS);
1157 }
1158 }
1159}
1160
1161/*
1162 * Reconstruct three data columns using PQR parity
1163 * @rec_method REC_PQR_BLOCK()
1164 *
1165 * @rm RAIDZ map
1166 * @tgtidx array of missing data indexes
1167 */
1168static raidz_inline int
1169raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
1170{
1171 const int x = tgtidx[TARGET_X];
1172 const int y = tgtidx[TARGET_Y];
1173 const int z = tgtidx[TARGET_Z];
1174 const int ncols = raidz_ncols(rm);
1175 const int nbigcols = raidz_nbigcols(rm);
1176 const size_t xsize = raidz_col_size(rm, x);
1177 const size_t ysize = raidz_col_size(rm, y);
1178 const size_t zsize = raidz_col_size(rm, z);
1179 const size_t short_size = raidz_short_size(rm);
1180 unsigned coeff[MUL_CNT];
1181
1182 raidz_rec_pqr_coeff(rm, tgtidx, coeff);
1183
1184 raidz_math_begin();
1185
1186 /* 0 - short_size */
1187 REC_PQR_BLOCK(rm, 0, short_size, x, y, z, coeff, ncols, ncols,
1188 B_TRUE, B_TRUE);
1189
1190 /* short_size - xsize */
1191 REC_PQR_BLOCK(rm, short_size, xsize, x, y, z, coeff, ncols, nbigcols,
1192 xsize == ysize, xsize == zsize);
1193
1194 raidz_math_end();
1195
1196 return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1197}
1198
1199#endif /* _VDEV_RAIDZ_MATH_IMPL_H */