]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/vdev_raidz_math_impl.h
ABD changes for vectorized RAIDZ
[mirror_zfs.git] / module / zfs / vdev_raidz_math_impl.h
CommitLineData
ab9f4b0b
GN
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23 */
24
25#ifndef _VDEV_RAIDZ_MATH_IMPL_H
26#define _VDEV_RAIDZ_MATH_IMPL_H
27
28#include <sys/types.h>
29
30#define raidz_inline inline __attribute__((always_inline))
31#ifndef noinline
32#define noinline __attribute__((noinline))
33#endif
34
35/* Calculate data offset in raidz column, offset is in bytes */
a6255b7f
DQ
36/* ADB BRINGUP -- needs to be refactored for ABD */
37#define COL_OFF(col, off) ((v_t *)(((char *)(col)->rc_abd) + (off)))
ab9f4b0b
GN
38
39/*
40 * PARITY CALCULATION
41 * An optimized function is called for a full length of data columns
42 * If RAIDZ map contains remainder columns (shorter columns) the same function
43 * is called for reminder of full columns.
44 *
45 * GEN_[P|PQ|PQR]_BLOCK() functions are designed to be efficiently in-lined by
46 * the compiler. This removes a lot of conditionals from the inside loop which
47 * makes the code faster, especially for vectorized code.
48 * They are also highly parametrized, allowing for each implementation to define
49 * most optimal stride, and register allocation.
50 */
51
52static raidz_inline void
53GEN_P_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
54 const int ncols)
55{
56 int c;
57 size_t ioff;
58 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
59 raidz_col_t *col;
60
61 GEN_P_DEFINE();
62
63 for (ioff = off; ioff < end; ioff += (GEN_P_STRIDE * sizeof (v_t))) {
64 LOAD(COL_OFF(&(rm->rm_col[1]), ioff), GEN_P_P);
65
66 for (c = 2; c < ncols; c++) {
67 col = &rm->rm_col[c];
68 XOR_ACC(COL_OFF(col, ioff), GEN_P_P);
69 }
70
71 STORE(COL_OFF(pcol, ioff), GEN_P_P);
72 }
73}
74
75/*
76 * Generate P parity (RAIDZ1)
77 *
78 * @rm RAIDZ map
79 */
80static raidz_inline void
81raidz_generate_p_impl(raidz_map_t * const rm)
82{
83 const int ncols = raidz_ncols(rm);
84 const size_t psize = raidz_big_size(rm);
85 const size_t short_size = raidz_short_size(rm);
86
a6255b7f
DQ
87 panic("not ABD ready");
88
ab9f4b0b
GN
89 raidz_math_begin();
90
91 /* short_size */
92 GEN_P_BLOCK(rm, 0, short_size, ncols);
93
94 /* fullcols */
95 GEN_P_BLOCK(rm, short_size, psize, raidz_nbigcols(rm));
96
97 raidz_math_end();
98}
99
100static raidz_inline void
101GEN_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
102 const int ncols, const int nbigcols)
103{
104 int c;
105 size_t ioff;
106 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
107 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
108 raidz_col_t *col;
109
110 GEN_PQ_DEFINE();
111
112 MUL2_SETUP();
113
114 for (ioff = off; ioff < end; ioff += (GEN_PQ_STRIDE * sizeof (v_t))) {
115 LOAD(COL_OFF(&rm->rm_col[2], ioff), GEN_PQ_P);
116 COPY(GEN_PQ_P, GEN_PQ_Q);
117
118 for (c = 3; c < nbigcols; c++) {
119 col = &rm->rm_col[c];
120 LOAD(COL_OFF(col, ioff), GEN_PQ_D);
121 MUL2(GEN_PQ_Q);
122 XOR(GEN_PQ_D, GEN_PQ_P);
123 XOR(GEN_PQ_D, GEN_PQ_Q);
124 }
125
126 STORE(COL_OFF(pcol, ioff), GEN_PQ_P);
127
128 for (; c < ncols; c++)
129 MUL2(GEN_PQ_Q);
130
131 STORE(COL_OFF(qcol, ioff), GEN_PQ_Q);
132 }
133}
134
135/*
136 * Generate PQ parity (RAIDZ2)
137 *
138 * @rm RAIDZ map
139 */
140static raidz_inline void
141raidz_generate_pq_impl(raidz_map_t * const rm)
142{
143 const int ncols = raidz_ncols(rm);
144 const size_t psize = raidz_big_size(rm);
145 const size_t short_size = raidz_short_size(rm);
146
a6255b7f
DQ
147 panic("not ABD ready");
148
ab9f4b0b
GN
149 raidz_math_begin();
150
151 /* short_size */
152 GEN_PQ_BLOCK(rm, 0, short_size, ncols, ncols);
153
154 /* fullcols */
155 GEN_PQ_BLOCK(rm, short_size, psize, ncols, raidz_nbigcols(rm));
156
157 raidz_math_end();
158}
159
160
161static raidz_inline void
162GEN_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
163 const int ncols, const int nbigcols)
164{
165 int c;
166 size_t ioff;
167 raidz_col_t *col;
168 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
169 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
170 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
171
172 GEN_PQR_DEFINE();
173
174 MUL2_SETUP();
175
176 for (ioff = off; ioff < end; ioff += (GEN_PQR_STRIDE * sizeof (v_t))) {
177 LOAD(COL_OFF(&rm->rm_col[3], ioff), GEN_PQR_P);
178 COPY(GEN_PQR_P, GEN_PQR_Q);
179 COPY(GEN_PQR_P, GEN_PQR_R);
180
181 for (c = 4; c < nbigcols; c++) {
182 col = &rm->rm_col[c];
183 LOAD(COL_OFF(col, ioff), GEN_PQR_D);
184 MUL2(GEN_PQR_Q);
185 MUL4(GEN_PQR_R);
186 XOR(GEN_PQR_D, GEN_PQR_P);
187 XOR(GEN_PQR_D, GEN_PQR_Q);
188 XOR(GEN_PQR_D, GEN_PQR_R);
189 }
190
191 STORE(COL_OFF(pcol, ioff), GEN_PQR_P);
192
193 for (; c < ncols; c++) {
194 MUL2(GEN_PQR_Q);
195 MUL4(GEN_PQR_R);
196 }
197
198 STORE(COL_OFF(qcol, ioff), GEN_PQR_Q);
199 STORE(COL_OFF(rcol, ioff), GEN_PQR_R);
200 }
201}
202
203
204/*
205 * Generate PQR parity (RAIDZ3)
206 *
207 * @rm RAIDZ map
208 */
209static raidz_inline void
210raidz_generate_pqr_impl(raidz_map_t * const rm)
211{
212 const int ncols = raidz_ncols(rm);
213 const size_t psize = raidz_big_size(rm);
214 const size_t short_size = raidz_short_size(rm);
215
a6255b7f
DQ
216 panic("not ABD ready");
217
ab9f4b0b
GN
218 raidz_math_begin();
219
220 /* short_size */
221 GEN_PQR_BLOCK(rm, 0, short_size, ncols, ncols);
222
223 /* fullcols */
224 GEN_PQR_BLOCK(rm, short_size, psize, ncols, raidz_nbigcols(rm));
225
226 raidz_math_end();
227}
228
229/*
230 * DATA RECONSTRUCTION
231 *
232 * Data reconstruction process consists of two phases:
233 * - Syndrome calculation
234 * - Data reconstruction
235 *
236 * Syndrome is calculated by generating parity using available data columns
237 * and zeros in places of erasure. Existing parity is added to corresponding
238 * syndrome value to obtain the [P|Q|R]syn values from equation:
239 * P = Psyn + Dx + Dy + Dz
240 * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
241 * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
242 *
243 * For data reconstruction phase, the corresponding equations are solved
244 * for missing data (Dx, Dy, Dz). This generally involves multiplying known
245 * symbols by an coefficient and adding them together. The multiplication
246 * constant coefficients are calculated ahead of the operation in
247 * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
248 *
249 * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
250 * and "short" columns.
251 * For this reason, reconstruction is performed in minimum of
252 * two steps. First, from offset 0 to short_size, then from short_size to
253 * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
254 * over both ranges. The split also enables removal of conditional expressions
255 * from loop bodies, improving throughput of SIMD implementations.
256 * For the best performance, all functions marked with raidz_inline attribute
257 * must be inlined by compiler.
258 *
259 * parity data
260 * columns columns
261 * <----------> <------------------>
262 * x y <----+ missing columns (x, y)
263 * | |
264 * +---+---+---+---+-v-+---+-v-+---+ ^ 0
265 * | | | | | | | | | |
266 * | | | | | | | | | |
267 * | P | Q | R | D | D | D | D | D | |
268 * | | | | 0 | 1 | 2 | 3 | 4 | |
269 * | | | | | | | | | v
270 * | | | | | +---+---+---+ ^ short_size
271 * | | | | | | |
272 * +---+---+---+---+---+ v big_size
273 * <------------------> <---------->
274 * big columns short columns
275 *
276 */
277
278/*
279 * Functions calculate multiplication constants for data reconstruction.
280 * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
281 * used parity columns for reconstruction.
282 * @rm RAIDZ map
283 * @tgtidx array of missing data indexes
284 * @coeff output array of coefficients. Array must be user
285 * provided and must hold minimum MUL_CNT values
286 */
287static noinline void
288raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
289{
290 const unsigned ncols = raidz_ncols(rm);
291 const unsigned x = tgtidx[TARGET_X];
292
293 coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
294}
295
296static noinline void
297raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
298{
299 const unsigned ncols = raidz_ncols(rm);
300 const unsigned x = tgtidx[TARGET_X];
301
302 coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
303}
304
305static noinline void
306raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
307{
308 const unsigned ncols = raidz_ncols(rm);
309 const unsigned x = tgtidx[TARGET_X];
310 const unsigned y = tgtidx[TARGET_Y];
311 gf_t a, b, e;
312
313 a = gf_exp2(x + 255 - y);
314 b = gf_exp2(255 - (ncols - x - 1));
315 e = a ^ 0x01;
316
317 coeff[MUL_PQ_X] = gf_div(a, e);
318 coeff[MUL_PQ_Y] = gf_div(b, e);
319}
320
321static noinline void
322raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
323{
324 const unsigned ncols = raidz_ncols(rm);
325 const unsigned x = tgtidx[TARGET_X];
326 const unsigned y = tgtidx[TARGET_Y];
327
328 gf_t a, b, e;
329
330 a = gf_exp4(x + 255 - y);
331 b = gf_exp4(255 - (ncols - x - 1));
332 e = a ^ 0x01;
333
334 coeff[MUL_PR_X] = gf_div(a, e);
335 coeff[MUL_PR_Y] = gf_div(b, e);
336}
337
338static noinline void
339raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
340{
341 const unsigned ncols = raidz_ncols(rm);
342 const unsigned x = tgtidx[TARGET_X];
343 const unsigned y = tgtidx[TARGET_Y];
344
345 gf_t nx, ny, nxxy, nxyy, d;
346
347 nx = gf_exp2(ncols - x - 1);
348 ny = gf_exp2(ncols - y - 1);
349 nxxy = gf_mul(gf_mul(nx, nx), ny);
350 nxyy = gf_mul(gf_mul(nx, ny), ny);
351 d = nxxy ^ nxyy;
352
353 coeff[MUL_QR_XQ] = ny;
354 coeff[MUL_QR_X] = gf_div(ny, d);
355 coeff[MUL_QR_YQ] = nx;
356 coeff[MUL_QR_Y] = gf_div(nx, d);
357}
358
359static noinline void
360raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
361{
362 const unsigned ncols = raidz_ncols(rm);
363 const unsigned x = tgtidx[TARGET_X];
364 const unsigned y = tgtidx[TARGET_Y];
365 const unsigned z = tgtidx[TARGET_Z];
366
367 gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
368
369 nx = gf_exp2(ncols - x - 1);
370 ny = gf_exp2(ncols - y - 1);
371 nz = gf_exp2(ncols - z - 1);
372
373 nxx = gf_exp4(ncols - x - 1);
374 nyy = gf_exp4(ncols - y - 1);
375 nzz = gf_exp4(ncols - z - 1);
376
377 nyyz = gf_mul(gf_mul(ny, nz), ny);
378 nyzz = gf_mul(nzz, ny);
379
380 xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
381 gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^ nyzz;
382
383 yd = gf_inv(ny ^ nz);
384
385 coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
386 coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
387 coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
388 coeff[MUL_PQR_YU] = nx;
389 coeff[MUL_PQR_YP] = gf_mul(nz, yd);
390 coeff[MUL_PQR_YQ] = yd;
391}
392
393
394/*
395 * Reconstruction using P parity
396 * @rm RAIDZ map
397 * @off starting offset
398 * @end ending offset
399 * @x missing data column
400 * @ncols number of column
401 */
402static raidz_inline void
403REC_P_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
404 const int x, const int ncols)
405{
406 int c;
407 size_t ioff;
408 const size_t firstdc = raidz_parity(rm);
409 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
410 raidz_col_t * const xcol = raidz_col_p(rm, x);
411 raidz_col_t *col;
412
413 REC_P_DEFINE();
414
415 for (ioff = off; ioff < end; ioff += (REC_P_STRIDE * sizeof (v_t))) {
416 LOAD(COL_OFF(pcol, ioff), REC_P_X);
417
418 for (c = firstdc; c < x; c++) {
419 col = &rm->rm_col[c];
420 XOR_ACC(COL_OFF(col, ioff), REC_P_X);
421 }
422
423 for (c++; c < ncols; c++) {
424 col = &rm->rm_col[c];
425 XOR_ACC(COL_OFF(col, ioff), REC_P_X);
426 }
427
428 STORE(COL_OFF(xcol, ioff), REC_P_X);
429 }
430}
431
432/*
433 * Reconstruct single data column using P parity
434 * @rec_method REC_P_BLOCK()
435 *
436 * @rm RAIDZ map
437 * @tgtidx array of missing data indexes
438 */
439static raidz_inline int
440raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
441{
442 const int x = tgtidx[TARGET_X];
443 const int ncols = raidz_ncols(rm);
444 const int nbigcols = raidz_nbigcols(rm);
445 const size_t xsize = raidz_col_size(rm, x);
446 const size_t short_size = raidz_short_size(rm);
447
448 raidz_math_begin();
449
450 /* 0 - short_size */
451 REC_P_BLOCK(rm, 0, short_size, x, ncols);
452
453 /* short_size - xsize */
454 REC_P_BLOCK(rm, short_size, xsize, x, nbigcols);
455
456 raidz_math_end();
457
458 return (1 << CODE_P);
459}
460
461/*
462 * Reconstruct using Q parity
463 */
464
465#define REC_Q_SYN_UPDATE() MUL2(REC_Q_X)
466
467#define REC_Q_INNER_LOOP(c) \
468{ \
469 col = &rm->rm_col[c]; \
470 REC_Q_SYN_UPDATE(); \
471 XOR_ACC(COL_OFF(col, ioff), REC_Q_X); \
472}
473
474/*
475 * Reconstruction using Q parity
476 * @rm RAIDZ map
477 * @off starting offset
478 * @end ending offset
479 * @x missing data column
480 * @coeff multiplication coefficients
481 * @ncols number of column
482 * @nbigcols number of big columns
483 */
484static raidz_inline void
485REC_Q_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
486 const int x, const unsigned *coeff, const int ncols, const int nbigcols)
487{
488 int c;
489 size_t ioff = 0;
490 const size_t firstdc = raidz_parity(rm);
491 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
492 raidz_col_t * const xcol = raidz_col_p(rm, x);
493 raidz_col_t *col;
494
495 REC_Q_DEFINE();
496
497 for (ioff = off; ioff < end; ioff += (REC_Q_STRIDE * sizeof (v_t))) {
498 MUL2_SETUP();
499
62a65a65 500 ZERO(REC_Q_X);
ab9f4b0b
GN
501
502 if (ncols == nbigcols) {
503 for (c = firstdc; c < x; c++)
504 REC_Q_INNER_LOOP(c);
505
506 REC_Q_SYN_UPDATE();
507 for (c++; c < nbigcols; c++)
508 REC_Q_INNER_LOOP(c);
509 } else {
510 for (c = firstdc; c < nbigcols; c++) {
511 REC_Q_SYN_UPDATE();
512 if (x != c) {
513 col = &rm->rm_col[c];
514 XOR_ACC(COL_OFF(col, ioff), REC_Q_X);
515 }
516 }
517 for (; c < ncols; c++)
518 REC_Q_SYN_UPDATE();
519 }
520
521 XOR_ACC(COL_OFF(qcol, ioff), REC_Q_X);
522 MUL(coeff[MUL_Q_X], REC_Q_X);
523 STORE(COL_OFF(xcol, ioff), REC_Q_X);
524 }
525}
526
527/*
528 * Reconstruct single data column using Q parity
529 * @rec_method REC_Q_BLOCK()
530 *
531 * @rm RAIDZ map
532 * @tgtidx array of missing data indexes
533 */
534static raidz_inline int
535raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
536{
537 const int x = tgtidx[TARGET_X];
538 const int ncols = raidz_ncols(rm);
539 const int nbigcols = raidz_nbigcols(rm);
540 const size_t xsize = raidz_col_size(rm, x);
541 const size_t short_size = raidz_short_size(rm);
542 unsigned coeff[MUL_CNT];
543
544 raidz_rec_q_coeff(rm, tgtidx, coeff);
545
546 raidz_math_begin();
547
548 /* 0 - short_size */
549 REC_Q_BLOCK(rm, 0, short_size, x, coeff, ncols, ncols);
550
551 /* short_size - xsize */
552 REC_Q_BLOCK(rm, short_size, xsize, x, coeff, ncols, nbigcols);
553
554 raidz_math_end();
555
556 return (1 << CODE_Q);
557}
558
559/*
560 * Reconstruct using R parity
561 */
562
563#define REC_R_SYN_UPDATE() MUL4(REC_R_X)
564#define REC_R_INNER_LOOP(c) \
565{ \
566 col = &rm->rm_col[c]; \
567 REC_R_SYN_UPDATE(); \
568 XOR_ACC(COL_OFF(col, ioff), REC_R_X); \
569}
570
571/*
572 * Reconstruction using R parity
573 * @rm RAIDZ map
574 * @off starting offset
575 * @end ending offset
576 * @x missing data column
577 * @coeff multiplication coefficients
578 * @ncols number of column
579 * @nbigcols number of big columns
580 */
581static raidz_inline void
582REC_R_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
583 const int x, const unsigned *coeff, const int ncols, const int nbigcols)
584{
585 int c;
586 size_t ioff = 0;
587 const size_t firstdc = raidz_parity(rm);
588 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
589 raidz_col_t * const xcol = raidz_col_p(rm, x);
590 raidz_col_t *col;
591
592 REC_R_DEFINE();
593
594 for (ioff = off; ioff < end; ioff += (REC_R_STRIDE * sizeof (v_t))) {
595 MUL2_SETUP();
596
62a65a65 597 ZERO(REC_R_X);
ab9f4b0b
GN
598
599 if (ncols == nbigcols) {
600 for (c = firstdc; c < x; c++)
601 REC_R_INNER_LOOP(c);
602
603 REC_R_SYN_UPDATE();
604 for (c++; c < nbigcols; c++)
605 REC_R_INNER_LOOP(c);
606 } else {
607 for (c = firstdc; c < nbigcols; c++) {
608 REC_R_SYN_UPDATE();
609 if (c != x) {
610 col = &rm->rm_col[c];
611 XOR_ACC(COL_OFF(col, ioff), REC_R_X);
612 }
613 }
614 for (; c < ncols; c++)
615 REC_R_SYN_UPDATE();
616 }
617
618 XOR_ACC(COL_OFF(rcol, ioff), REC_R_X);
619 MUL(coeff[MUL_R_X], REC_R_X);
620 STORE(COL_OFF(xcol, ioff), REC_R_X);
621 }
622}
623
624/*
625 * Reconstruct single data column using R parity
626 * @rec_method REC_R_BLOCK()
627 *
628 * @rm RAIDZ map
629 * @tgtidx array of missing data indexes
630 */
631static raidz_inline int
632raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
633{
634 const int x = tgtidx[TARGET_X];
635 const int ncols = raidz_ncols(rm);
636 const int nbigcols = raidz_nbigcols(rm);
637 const size_t xsize = raidz_col_size(rm, x);
638 const size_t short_size = raidz_short_size(rm);
639 unsigned coeff[MUL_CNT];
640
641 raidz_rec_r_coeff(rm, tgtidx, coeff);
642
643 raidz_math_begin();
644
645 /* 0 - short_size */
646 REC_R_BLOCK(rm, 0, short_size, x, coeff, ncols, ncols);
647
648 /* short_size - xsize */
649 REC_R_BLOCK(rm, short_size, xsize, x, coeff, ncols, nbigcols);
650
651 raidz_math_end();
652
653 return (1 << CODE_R);
654}
655
656/*
657 * Reconstruct using PQ parity
658 */
659
660#define REC_PQ_SYN_UPDATE() MUL2(REC_PQ_Y)
661#define REC_PQ_INNER_LOOP(c) \
662{ \
663 col = &rm->rm_col[c]; \
664 LOAD(COL_OFF(col, ioff), REC_PQ_D); \
665 REC_PQ_SYN_UPDATE(); \
666 XOR(REC_PQ_D, REC_PQ_X); \
667 XOR(REC_PQ_D, REC_PQ_Y); \
668}
669
670/*
671 * Reconstruction using PQ parity
672 * @rm RAIDZ map
673 * @off starting offset
674 * @end ending offset
675 * @x missing data column
676 * @y missing data column
677 * @coeff multiplication coefficients
678 * @ncols number of column
679 * @nbigcols number of big columns
680 * @calcy calculate second data column
681 */
682static raidz_inline void
683REC_PQ_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
684 const int x, const int y, const unsigned *coeff, const int ncols,
685 const int nbigcols, const boolean_t calcy)
686{
687 int c;
688 size_t ioff = 0;
689 const size_t firstdc = raidz_parity(rm);
690 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
691 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
692 raidz_col_t * const xcol = raidz_col_p(rm, x);
693 raidz_col_t * const ycol = raidz_col_p(rm, y);
694 raidz_col_t *col;
695
696 REC_PQ_DEFINE();
697
698 for (ioff = off; ioff < end; ioff += (REC_PQ_STRIDE * sizeof (v_t))) {
699 LOAD(COL_OFF(pcol, ioff), REC_PQ_X);
62a65a65 700 ZERO(REC_PQ_Y);
ab9f4b0b
GN
701 MUL2_SETUP();
702
703 if (ncols == nbigcols) {
704 for (c = firstdc; c < x; c++)
705 REC_PQ_INNER_LOOP(c);
706
707 REC_PQ_SYN_UPDATE();
708 for (c++; c < y; c++)
709 REC_PQ_INNER_LOOP(c);
710
711 REC_PQ_SYN_UPDATE();
712 for (c++; c < nbigcols; c++)
713 REC_PQ_INNER_LOOP(c);
714 } else {
715 for (c = firstdc; c < nbigcols; c++) {
716 REC_PQ_SYN_UPDATE();
717 if (c != x && c != y) {
718 col = &rm->rm_col[c];
719 LOAD(COL_OFF(col, ioff), REC_PQ_D);
720 XOR(REC_PQ_D, REC_PQ_X);
721 XOR(REC_PQ_D, REC_PQ_Y);
722 }
723 }
724 for (; c < ncols; c++)
725 REC_PQ_SYN_UPDATE();
726 }
727
728 XOR_ACC(COL_OFF(qcol, ioff), REC_PQ_Y);
729
730 /* Save Pxy */
731 COPY(REC_PQ_X, REC_PQ_D);
732
733 /* Calc X */
734 MUL(coeff[MUL_PQ_X], REC_PQ_X);
735 MUL(coeff[MUL_PQ_Y], REC_PQ_Y);
736 XOR(REC_PQ_Y, REC_PQ_X);
737 STORE(COL_OFF(xcol, ioff), REC_PQ_X);
738
739 if (calcy) {
740 /* Calc Y */
741 XOR(REC_PQ_D, REC_PQ_X);
742 STORE(COL_OFF(ycol, ioff), REC_PQ_X);
743 }
744 }
745}
746
747/*
748 * Reconstruct two data columns using PQ parity
749 * @rec_method REC_PQ_BLOCK()
750 *
751 * @rm RAIDZ map
752 * @tgtidx array of missing data indexes
753 */
754static raidz_inline int
755raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
756{
757 const int x = tgtidx[TARGET_X];
758 const int y = tgtidx[TARGET_Y];
759 const int ncols = raidz_ncols(rm);
760 const int nbigcols = raidz_nbigcols(rm);
761 const size_t xsize = raidz_col_size(rm, x);
762 const size_t ysize = raidz_col_size(rm, y);
763 const size_t short_size = raidz_short_size(rm);
764 unsigned coeff[MUL_CNT];
765
766 raidz_rec_pq_coeff(rm, tgtidx, coeff);
767
768 raidz_math_begin();
769
770 /* 0 - short_size */
771 REC_PQ_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
772
773 /* short_size - xsize */
774 REC_PQ_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
775 xsize == ysize);
776
777 raidz_math_end();
778
779 return ((1 << CODE_P) | (1 << CODE_Q));
780}
781
782/*
783 * Reconstruct using PR parity
784 */
785
786#define REC_PR_SYN_UPDATE() MUL4(REC_PR_Y)
787#define REC_PR_INNER_LOOP(c) \
788{ \
789 col = &rm->rm_col[c]; \
790 LOAD(COL_OFF(col, ioff), REC_PR_D); \
791 REC_PR_SYN_UPDATE(); \
792 XOR(REC_PR_D, REC_PR_X); \
793 XOR(REC_PR_D, REC_PR_Y); \
794}
795
796/*
797 * Reconstruction using PR parity
798 * @rm RAIDZ map
799 * @off starting offset
800 * @end ending offset
801 * @x missing data column
802 * @y missing data column
803 * @coeff multiplication coefficients
804 * @ncols number of column
805 * @nbigcols number of big columns
806 * @calcy calculate second data column
807 */
808static raidz_inline void
809REC_PR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
810 const int x, const int y, const unsigned *coeff, const int ncols,
811 const int nbigcols, const boolean_t calcy)
812{
813 int c;
814 size_t ioff;
815 const size_t firstdc = raidz_parity(rm);
816 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
817 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
818 raidz_col_t * const xcol = raidz_col_p(rm, x);
819 raidz_col_t * const ycol = raidz_col_p(rm, y);
820 raidz_col_t *col;
821
822 REC_PR_DEFINE();
823
824 for (ioff = off; ioff < end; ioff += (REC_PR_STRIDE * sizeof (v_t))) {
825 LOAD(COL_OFF(pcol, ioff), REC_PR_X);
62a65a65 826 ZERO(REC_PR_Y);
ab9f4b0b
GN
827 MUL2_SETUP();
828
829 if (ncols == nbigcols) {
830 for (c = firstdc; c < x; c++)
831 REC_PR_INNER_LOOP(c);
832
833 REC_PR_SYN_UPDATE();
834 for (c++; c < y; c++)
835 REC_PR_INNER_LOOP(c);
836
837 REC_PR_SYN_UPDATE();
838 for (c++; c < nbigcols; c++)
839 REC_PR_INNER_LOOP(c);
840 } else {
841 for (c = firstdc; c < nbigcols; c++) {
842 REC_PR_SYN_UPDATE();
843 if (c != x && c != y) {
844 col = &rm->rm_col[c];
845 LOAD(COL_OFF(col, ioff), REC_PR_D);
846 XOR(REC_PR_D, REC_PR_X);
847 XOR(REC_PR_D, REC_PR_Y);
848 }
849 }
850 for (; c < ncols; c++)
851 REC_PR_SYN_UPDATE();
852 }
853
854 XOR_ACC(COL_OFF(rcol, ioff), REC_PR_Y);
855
856 /* Save Pxy */
857 COPY(REC_PR_X, REC_PR_D);
858
859 /* Calc X */
860 MUL(coeff[MUL_PR_X], REC_PR_X);
861 MUL(coeff[MUL_PR_Y], REC_PR_Y);
862 XOR(REC_PR_Y, REC_PR_X);
863 STORE(COL_OFF(xcol, ioff), REC_PR_X);
864
865 if (calcy) {
866 /* Calc Y */
867 XOR(REC_PR_D, REC_PR_X);
868 STORE(COL_OFF(ycol, ioff), REC_PR_X);
869 }
870 }
871}
872
873
874/*
875 * Reconstruct two data columns using PR parity
876 * @rec_method REC_PR_BLOCK()
877 *
878 * @rm RAIDZ map
879 * @tgtidx array of missing data indexes
880 */
881static raidz_inline int
882raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
883{
884 const int x = tgtidx[TARGET_X];
885 const int y = tgtidx[TARGET_Y];
886 const int ncols = raidz_ncols(rm);
887 const int nbigcols = raidz_nbigcols(rm);
888 const size_t xsize = raidz_col_size(rm, x);
889 const size_t ysize = raidz_col_size(rm, y);
890 const size_t short_size = raidz_short_size(rm);
891 unsigned coeff[MUL_CNT];
892
893 raidz_rec_pr_coeff(rm, tgtidx, coeff);
894
895 raidz_math_begin();
896
897 /* 0 - short_size */
898 REC_PR_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
899
900 /* short_size - xsize */
901 REC_PR_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
902 xsize == ysize);
903
904 raidz_math_end();
905
906 return ((1 << CODE_P) | (1 << CODE_R));
907}
908
909
910/*
911 * Reconstruct using QR parity
912 */
913
914#define REC_QR_SYN_UPDATE() \
915{ \
916 MUL2(REC_QR_X); \
917 MUL4(REC_QR_Y); \
918}
919
920#define REC_QR_INNER_LOOP(c) \
921{ \
922 col = &rm->rm_col[c]; \
923 LOAD(COL_OFF(col, ioff), REC_QR_D); \
924 REC_QR_SYN_UPDATE(); \
925 XOR(REC_QR_D, REC_QR_X); \
926 XOR(REC_QR_D, REC_QR_Y); \
927}
928
929/*
930 * Reconstruction using QR parity
931 * @rm RAIDZ map
932 * @off starting offset
933 * @end ending offset
934 * @x missing data column
935 * @y missing data column
936 * @coeff multiplication coefficients
937 * @ncols number of column
938 * @nbigcols number of big columns
939 * @calcy calculate second data column
940 */
941static raidz_inline void
942REC_QR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
943 const int x, const int y, const unsigned *coeff, const int ncols,
944 const int nbigcols, const boolean_t calcy)
945{
946 int c;
947 size_t ioff;
948 const size_t firstdc = raidz_parity(rm);
949 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
950 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
951 raidz_col_t * const xcol = raidz_col_p(rm, x);
952 raidz_col_t * const ycol = raidz_col_p(rm, y);
953 raidz_col_t *col;
954
955 REC_QR_DEFINE();
956
957 for (ioff = off; ioff < end; ioff += (REC_QR_STRIDE * sizeof (v_t))) {
958 MUL2_SETUP();
62a65a65
RD
959 ZERO(REC_QR_X);
960 ZERO(REC_QR_Y);
ab9f4b0b
GN
961
962 if (ncols == nbigcols) {
963 for (c = firstdc; c < x; c++)
964 REC_QR_INNER_LOOP(c);
965
966 REC_QR_SYN_UPDATE();
967 for (c++; c < y; c++)
968 REC_QR_INNER_LOOP(c);
969
970 REC_QR_SYN_UPDATE();
971 for (c++; c < nbigcols; c++)
972 REC_QR_INNER_LOOP(c);
973 } else {
974 for (c = firstdc; c < nbigcols; c++) {
975 REC_QR_SYN_UPDATE();
976 if (c != x && c != y) {
977 col = &rm->rm_col[c];
978 LOAD(COL_OFF(col, ioff), REC_QR_D);
979 XOR(REC_QR_D, REC_QR_X);
980 XOR(REC_QR_D, REC_QR_Y);
981 }
982 }
983 for (; c < ncols; c++)
984 REC_QR_SYN_UPDATE();
985 }
986
987 XOR_ACC(COL_OFF(qcol, ioff), REC_QR_X);
988 XOR_ACC(COL_OFF(rcol, ioff), REC_QR_Y);
989
990 /* Save Qxy */
991 COPY(REC_QR_X, REC_QR_D);
992
993 /* Calc X */
994 MUL(coeff[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */
995 XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */
996 MUL(coeff[MUL_QR_X], REC_QR_X); /* X = X * xm */
997 STORE(COL_OFF(xcol, ioff), REC_QR_X);
998
999 if (calcy) {
1000 /* Calc Y */
1001 MUL(coeff[MUL_QR_YQ], REC_QR_D); /* X = Q * xqm */
1002 XOR(REC_QR_Y, REC_QR_D); /* X = R ^ X */
1003 MUL(coeff[MUL_QR_Y], REC_QR_D); /* X = X * xm */
1004 STORE(COL_OFF(ycol, ioff), REC_QR_D);
1005 }
1006 }
1007}
1008
1009/*
1010 * Reconstruct two data columns using QR parity
1011 * @rec_method REC_QR_BLOCK()
1012 *
1013 * @rm RAIDZ map
1014 * @tgtidx array of missing data indexes
1015 */
1016static raidz_inline int
1017raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
1018{
1019 const int x = tgtidx[TARGET_X];
1020 const int y = tgtidx[TARGET_Y];
1021 const int ncols = raidz_ncols(rm);
1022 const int nbigcols = raidz_nbigcols(rm);
1023 const size_t xsize = raidz_col_size(rm, x);
1024 const size_t ysize = raidz_col_size(rm, y);
1025 const size_t short_size = raidz_short_size(rm);
1026 unsigned coeff[MUL_CNT];
1027
1028 raidz_rec_qr_coeff(rm, tgtidx, coeff);
1029
1030 raidz_math_begin();
1031
1032 /* 0 - short_size */
1033 REC_QR_BLOCK(rm, 0, short_size, x, y, coeff, ncols, ncols, B_TRUE);
1034
1035 /* short_size - xsize */
1036 REC_QR_BLOCK(rm, short_size, xsize, x, y, coeff, ncols, nbigcols,
1037 xsize == ysize);
1038
1039 raidz_math_end();
1040
1041 return ((1 << CODE_Q) | (1 << CODE_R));
1042}
1043
1044/*
1045 * Reconstruct using PQR parity
1046 */
1047
1048#define REC_PQR_SYN_UPDATE() \
1049{ \
1050 MUL2(REC_PQR_Y); \
1051 MUL4(REC_PQR_Z); \
1052}
1053
1054#define REC_PQR_INNER_LOOP(c) \
1055{ \
1056 col = &rm->rm_col[(c)]; \
1057 LOAD(COL_OFF(col, ioff), REC_PQR_D); \
1058 REC_PQR_SYN_UPDATE(); \
1059 XOR(REC_PQR_D, REC_PQR_X); \
1060 XOR(REC_PQR_D, REC_PQR_Y); \
1061 XOR(REC_PQR_D, REC_PQR_Z); \
1062}
1063
1064/*
1065 * Reconstruction using PQR parity
1066 * @rm RAIDZ map
1067 * @off starting offset
1068 * @end ending offset
1069 * @x missing data column
1070 * @y missing data column
1071 * @z missing data column
1072 * @coeff multiplication coefficients
1073 * @ncols number of column
1074 * @nbigcols number of big columns
1075 * @calcy calculate second data column
1076 * @calcz calculate third data column
1077 */
1078static raidz_inline void
1079REC_PQR_BLOCK(raidz_map_t * const rm, const size_t off, const size_t end,
1080 const int x, const int y, const int z, const unsigned *coeff,
1081 const int ncols, const int nbigcols, const boolean_t calcy,
1082 const boolean_t calcz)
1083{
1084 int c;
1085 size_t ioff;
1086 const size_t firstdc = raidz_parity(rm);
1087 raidz_col_t * const pcol = raidz_col_p(rm, CODE_P);
1088 raidz_col_t * const qcol = raidz_col_p(rm, CODE_Q);
1089 raidz_col_t * const rcol = raidz_col_p(rm, CODE_R);
1090 raidz_col_t * const xcol = raidz_col_p(rm, x);
1091 raidz_col_t * const ycol = raidz_col_p(rm, y);
1092 raidz_col_t * const zcol = raidz_col_p(rm, z);
1093 raidz_col_t *col;
1094
1095 REC_PQR_DEFINE();
1096
1097 for (ioff = off; ioff < end; ioff += (REC_PQR_STRIDE * sizeof (v_t))) {
1098 MUL2_SETUP();
1099 LOAD(COL_OFF(pcol, ioff), REC_PQR_X);
62a65a65
RD
1100 ZERO(REC_PQR_Y);
1101 ZERO(REC_PQR_Z);
ab9f4b0b
GN
1102
1103 if (ncols == nbigcols) {
1104 for (c = firstdc; c < x; c++)
1105 REC_PQR_INNER_LOOP(c);
1106
1107 REC_PQR_SYN_UPDATE();
1108 for (c++; c < y; c++)
1109 REC_PQR_INNER_LOOP(c);
1110
1111 REC_PQR_SYN_UPDATE();
1112 for (c++; c < z; c++)
1113 REC_PQR_INNER_LOOP(c);
1114
1115 REC_PQR_SYN_UPDATE();
1116 for (c++; c < nbigcols; c++)
1117 REC_PQR_INNER_LOOP(c);
1118 } else {
1119 for (c = firstdc; c < nbigcols; c++) {
1120 REC_PQR_SYN_UPDATE();
1121 if (c != x && c != y && c != z) {
1122 col = &rm->rm_col[c];
1123 LOAD(COL_OFF(col, ioff), REC_PQR_D);
1124 XOR(REC_PQR_D, REC_PQR_X);
1125 XOR(REC_PQR_D, REC_PQR_Y);
1126 XOR(REC_PQR_D, REC_PQR_Z);
1127 }
1128 }
1129 for (; c < ncols; c++)
1130 REC_PQR_SYN_UPDATE();
1131 }
1132
1133 XOR_ACC(COL_OFF(qcol, ioff), REC_PQR_Y);
1134 XOR_ACC(COL_OFF(rcol, ioff), REC_PQR_Z);
1135
1136 /* Save Pxyz and Qxyz */
1137 COPY(REC_PQR_X, REC_PQR_XS);
1138 COPY(REC_PQR_Y, REC_PQR_YS);
1139
1140 /* Calc X */
1141 MUL(coeff[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */
1142 MUL(coeff[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */
1143 XOR(REC_PQR_Y, REC_PQR_X);
1144 MUL(coeff[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */
1145 XOR(REC_PQR_Z, REC_PQR_X); /* X = Xp + Xq + Xr */
1146 STORE(COL_OFF(xcol, ioff), REC_PQR_X);
1147
1148 if (calcy) {
1149 /* Calc Y */
1150 XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */
1151 MUL(coeff[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */
1152 XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */
1153 COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */
1154 MUL(coeff[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */
1155 MUL(coeff[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */
1156 XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */
1157 STORE(COL_OFF(ycol, ioff), REC_PQR_YS);
1158 }
1159
1160 if (calcz) {
1161 /* Calc Z */
1162 XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */
1163 STORE(COL_OFF(zcol, ioff), REC_PQR_YS);
1164 }
1165 }
1166}
1167
1168/*
1169 * Reconstruct three data columns using PQR parity
1170 * @rec_method REC_PQR_BLOCK()
1171 *
1172 * @rm RAIDZ map
1173 * @tgtidx array of missing data indexes
1174 */
1175static raidz_inline int
1176raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
1177{
1178 const int x = tgtidx[TARGET_X];
1179 const int y = tgtidx[TARGET_Y];
1180 const int z = tgtidx[TARGET_Z];
1181 const int ncols = raidz_ncols(rm);
1182 const int nbigcols = raidz_nbigcols(rm);
1183 const size_t xsize = raidz_col_size(rm, x);
1184 const size_t ysize = raidz_col_size(rm, y);
1185 const size_t zsize = raidz_col_size(rm, z);
1186 const size_t short_size = raidz_short_size(rm);
1187 unsigned coeff[MUL_CNT];
1188
1189 raidz_rec_pqr_coeff(rm, tgtidx, coeff);
1190
1191 raidz_math_begin();
1192
1193 /* 0 - short_size */
1194 REC_PQR_BLOCK(rm, 0, short_size, x, y, z, coeff, ncols, ncols,
1195 B_TRUE, B_TRUE);
1196
1197 /* short_size - xsize */
1198 REC_PQR_BLOCK(rm, short_size, xsize, x, y, z, coeff, ncols, nbigcols,
1199 xsize == ysize, xsize == zsize);
1200
1201 raidz_math_end();
1202
1203 return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1204}
1205
1206#endif /* _VDEV_RAIDZ_MATH_IMPL_H */