4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
26 #include <sys/vdev_raidz_impl.h>
28 * Provide native CPU scalar routines.
29 * Support 32bit and 64bit CPUs.
31 #if ((~(0x0ULL)) >> 24) == 0xffULL
33 typedef uint32_t iv_t
;
34 #elif ((~(0x0ULL)) >> 56) == 0xffULL
36 typedef uint64_t iv_t
;
40 * Vector type used in scalar implementation
42 * The union is expected to be of native CPU register size. Since addition
43 * uses XOR operation, it can be performed an all byte elements at once.
44 * Multiplication requires per byte access.
52 * Precomputed lookup tables for multiplication by a constant
54 * Reconstruction path requires multiplication by a constant factors. Instead of
55 * performing two step lookup (log & exp tables), a direct lookup can be used
56 * instead. Multiplication of element 'a' by a constant 'c' is obtained as:
58 * r = vdev_raidz_mul_lt[c_log][a];
60 * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
61 * they are faster to obtain while solving the syndrome equations.
64 * Even though the complete lookup table uses 64kiB, only relatively small
65 * portion of it is used at the same time. Following shows number of accessed
66 * bytes for different cases:
67 * - 1 failed disk: 256B (1 mul. coefficient)
68 * - 2 failed disks: 512B (2 mul. coefficients)
69 * - 3 failed disks: 1536B (6 mul. coefficients)
71 * Size of actually accessed lookup table regions is only larger for
72 * reconstruction of 3 failed disks, when compared to traditional log/exp
73 * method. But since the result is obtained in one lookup step performance is
76 static uint8_t vdev_raidz_mul_lt
[256][256] __attribute__((aligned(256)));
79 raidz_init_scalar(void)
82 for (c
= 0; c
< 256; c
++)
83 for (i
= 0; i
< 256; i
++)
84 vdev_raidz_mul_lt
[c
][i
] = gf_mul(c
, i
);
88 #define PREFETCHNTA(ptr, offset) {}
89 #define PREFETCH(ptr, offset) {}
91 #define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e
92 #define XOR(src, acc) acc.e ^= src.e
93 #define ZERO(acc) acc.e = 0
94 #define COPY(src, dst) dst = src
95 #define LOAD(src, val) val = ((v_t *)src)[0]
96 #define STORE(dst, val) ((v_t *)dst)[0] = val
99 * Constants used for optimized multiplication by 2.
101 static const struct {
105 } scalar_mul2_consts
= {
107 .mod
= 0x1d1d1d1d1d1d1d1dULL
,
108 .mask
= 0xfefefefefefefefeULL
,
109 .msb
= 0x8080808080808080ULL
,
111 .mod
= 0x1d1d1d1dULL
,
112 .mask
= 0xfefefefeULL
,
113 .msb
= 0x80808080ULL
,
117 #define MUL2_SETUP() {}
123 _mask = (a).e & scalar_mul2_consts.msb; \
124 _mask = (_mask << 1) - (_mask >> 7); \
125 (a).e = ((a).e << 1) & scalar_mul2_consts.mask; \
126 (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \
137 const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \
138 switch (ELEM_SIZE) { \
140 a.b[7] = mul_lt[a.b[7]]; \
141 a.b[6] = mul_lt[a.b[6]]; \
142 a.b[5] = mul_lt[a.b[5]]; \
143 a.b[4] = mul_lt[a.b[4]]; \
145 a.b[3] = mul_lt[a.b[3]]; \
146 a.b[2] = mul_lt[a.b[2]]; \
147 a.b[1] = mul_lt[a.b[1]]; \
148 a.b[0] = mul_lt[a.b[0]]; \
153 #define raidz_math_begin() {}
154 #define raidz_math_end() {}
156 #define GEN_P_DEFINE() v_t p0
157 #define GEN_P_STRIDE 1
160 #define GEN_PQ_DEFINE() v_t d0, p0, q0
161 #define GEN_PQ_STRIDE 1
166 #define GEN_PQR_DEFINE() v_t d0, p0, q0, r0
167 #define GEN_PQR_STRIDE 1
173 #define REC_P_DEFINE() v_t x0
174 #define REC_P_STRIDE 1
177 #define REC_Q_DEFINE() v_t x0
178 #define REC_Q_STRIDE 1
181 #define REC_R_DEFINE() v_t x0
182 #define REC_R_STRIDE 1
185 #define REC_PQ_DEFINE() v_t x0, y0, d0
186 #define REC_PQ_STRIDE 1
191 #define REC_PR_DEFINE() v_t x0, y0, d0
192 #define REC_PR_STRIDE 1
197 #define REC_QR_DEFINE() v_t x0, y0, d0
198 #define REC_QR_STRIDE 1
203 #define REC_PQR_DEFINE() v_t x0, y0, z0, d0, t0
204 #define REC_PQR_STRIDE 1
209 #define REC_PQR_XS d0
210 #define REC_PQR_YS t0
212 #include "vdev_raidz_math_impl.h"
215 * If compiled with -O0, gcc doesn't do any stack frame coalescing
216 * and -Wframe-larger-than=1024 is triggered in debug mode.
217 * Starting with gcc 4.8, new opt level -Og is introduced for debugging, which
218 * does not trigger this warning.
220 #pragma GCC diagnostic ignored "-Wframe-larger-than="
222 DEFINE_GEN_METHODS(scalar
);
223 DEFINE_REC_METHODS(scalar
);
226 raidz_will_scalar_work(void)
228 return (B_TRUE
); /* always */
231 const raidz_impl_ops_t vdev_raidz_scalar_impl
= {
232 .init
= raidz_init_scalar
,
234 .gen
= RAIDZ_GEN_METHODS(scalar
),
235 .rec
= RAIDZ_REC_METHODS(scalar
),
236 .is_supported
= &raidz_will_scalar_work
,
240 /* Powers of 2 in the RAID-Z Galois field. */
241 const uint8_t vdev_raidz_pow2
[256] __attribute__((aligned(256))) = {
242 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
243 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
244 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
245 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
246 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
247 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
248 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
249 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
250 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
251 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
252 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
253 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
254 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
255 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
256 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
257 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
258 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
259 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
260 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
261 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
262 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
263 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
264 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
265 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
266 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
267 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
268 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
269 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
270 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
271 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
272 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
273 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
276 /* Logs of 2 in the RAID-Z Galois field. */
277 const uint8_t vdev_raidz_log2
[256] __attribute__((aligned(256))) = {
278 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
279 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
280 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
281 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
282 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
283 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
284 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
285 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
286 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
287 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
288 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
289 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
290 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
291 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
292 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
293 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
294 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
295 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
296 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
297 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
298 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
299 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
300 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
301 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
302 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
303 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
304 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
305 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
306 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
307 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
308 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
309 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,