4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
26 #include <sys/vdev_raidz_impl.h>
29 * Provide native CPU scalar routines.
30 * Support 32bit and 64bit CPUs.
32 #if ((~(0x0ULL)) >> 24) == 0xffULL
34 typedef uint32_t iv_t
;
35 #elif ((~(0x0ULL)) >> 56) == 0xffULL
37 typedef uint64_t iv_t
;
41 * Vector type used in scalar implementation
43 * The union is expected to be of native CPU register size. Since addition
44 * uses XOR operation, it can be performed an all byte elements at once.
45 * Multiplication requires per byte access.
53 * Precomputed lookup tables for multiplication by a constant
55 * Reconstruction path requires multiplication by a constant factors. Instead of
56 * performing two step lookup (log & exp tables), a direct lookup can be used
57 * instead. Multiplication of element 'a' by a constant 'c' is obtained as:
59 * r = vdev_raidz_mul_lt[c_log][a];
61 * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
62 * they are faster to obtain while solving the syndrome equations.
65 * Even though the complete lookup table uses 64kiB, only relatively small
66 * portion of it is used at the same time. Following shows number of accessed
67 * bytes for different cases:
68 * - 1 failed disk: 256B (1 mul. coefficient)
69 * - 2 failed disks: 512B (2 mul. coefficients)
70 * - 3 failed disks: 1536B (6 mul. coefficients)
72 * Size of actually accessed lookup table regions is only larger for
73 * reconstruction of 3 failed disks, when compared to traditional log/exp
74 * method. But since the result is obtained in one lookup step performance is
77 static uint8_t vdev_raidz_mul_lt
[256][256] __attribute__((aligned(256)));
80 raidz_init_scalar(void)
83 for (c
= 0; c
< 256; c
++)
84 for (i
= 0; i
< 256; i
++)
85 vdev_raidz_mul_lt
[c
][i
] = gf_mul(c
, i
);
89 #define PREFETCHNTA(ptr, offset) {}
90 #define PREFETCH(ptr, offset) {}
92 #define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e
93 #define XOR(src, acc) acc.e ^= src.e
94 #define ZERO(acc) acc.e = 0
95 #define COPY(src, dst) dst = src
96 #define LOAD(src, val) val = ((v_t *)src)[0]
97 #define STORE(dst, val) ((v_t *)dst)[0] = val
100 * Constants used for optimized multiplication by 2.
102 static const struct {
106 } scalar_mul2_consts
= {
108 .mod
= 0x1d1d1d1d1d1d1d1dULL
,
109 .mask
= 0xfefefefefefefefeULL
,
110 .msb
= 0x8080808080808080ULL
,
112 .mod
= 0x1d1d1d1dULL
,
113 .mask
= 0xfefefefeULL
,
114 .msb
= 0x80808080ULL
,
118 #define MUL2_SETUP() {}
124 _mask = (a).e & scalar_mul2_consts.msb; \
125 _mask = (_mask << 1) - (_mask >> 7); \
126 (a).e = ((a).e << 1) & scalar_mul2_consts.mask; \
127 (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \
138 const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \
139 switch (ELEM_SIZE) { \
141 a.b[7] = mul_lt[a.b[7]]; \
142 a.b[6] = mul_lt[a.b[6]]; \
143 a.b[5] = mul_lt[a.b[5]]; \
144 a.b[4] = mul_lt[a.b[4]]; \
146 a.b[3] = mul_lt[a.b[3]]; \
147 a.b[2] = mul_lt[a.b[2]]; \
148 a.b[1] = mul_lt[a.b[1]]; \
149 a.b[0] = mul_lt[a.b[0]]; \
154 #define raidz_math_begin() {}
155 #define raidz_math_end() {}
159 #define ZERO_DEFINE() v_t d0
160 #define ZERO_STRIDE 1
163 #define COPY_DEFINE() v_t d0
164 #define COPY_STRIDE 1
167 #define ADD_DEFINE() v_t d0
171 #define MUL_DEFINE() v_t d0
175 #define GEN_P_STRIDE 1
176 #define GEN_P_DEFINE() v_t p0
179 #define GEN_PQ_STRIDE 1
180 #define GEN_PQ_DEFINE() v_t d0, c0
184 #define GEN_PQR_STRIDE 1
185 #define GEN_PQR_DEFINE() v_t d0, c0
189 #define SYN_Q_DEFINE() v_t d0, x0
194 #define SYN_R_DEFINE() v_t d0, x0
199 #define SYN_PQ_DEFINE() v_t d0, x0
204 #define REC_PQ_STRIDE 1
205 #define REC_PQ_DEFINE() v_t x0, y0, t0
211 #define SYN_PR_DEFINE() v_t d0, x0
215 #define REC_PR_STRIDE 1
216 #define REC_PR_DEFINE() v_t x0, y0, t0
222 #define SYN_QR_DEFINE() v_t d0, x0
227 #define REC_QR_STRIDE 1
228 #define REC_QR_DEFINE() v_t x0, y0, t0
234 #define SYN_PQR_DEFINE() v_t d0, x0
238 #define REC_PQR_STRIDE 1
239 #define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0
243 #define REC_PQR_XS xs0
244 #define REC_PQR_YS ys0
246 #include "vdev_raidz_math_impl.h"
248 DEFINE_GEN_METHODS(scalar
);
249 DEFINE_REC_METHODS(scalar
);
252 raidz_will_scalar_work(void)
254 return (B_TRUE
); /* always */
257 const raidz_impl_ops_t vdev_raidz_scalar_impl
= {
258 .init
= raidz_init_scalar
,
260 .gen
= RAIDZ_GEN_METHODS(scalar
),
261 .rec
= RAIDZ_REC_METHODS(scalar
),
262 .is_supported
= &raidz_will_scalar_work
,
266 /* Powers of 2 in the RAID-Z Galois field. */
267 const uint8_t vdev_raidz_pow2
[256] __attribute__((aligned(256))) = {
268 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
269 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
270 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
271 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
272 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
273 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
274 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
275 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
276 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
277 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
278 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
279 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
280 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
281 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
282 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
283 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
284 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
285 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
286 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
287 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
288 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
289 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
290 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
291 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
292 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
293 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
294 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
295 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
296 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
297 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
298 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
299 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
302 /* Logs of 2 in the RAID-Z Galois field. */
303 const uint8_t vdev_raidz_log2
[256] __attribute__((aligned(256))) = {
304 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
305 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
306 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
307 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
308 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
309 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
310 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
311 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
312 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
313 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
314 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
315 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
316 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
317 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
318 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
319 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
320 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
321 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
322 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
323 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
324 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
325 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
326 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
327 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
328 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
329 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
330 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
331 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
332 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
333 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
334 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
335 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,