]>
Commit | Line | Data |
---|---|---|
ab9f4b0b GN |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
23 | * Copyright (C) 2016 Gvozden Nešković. All rights reserved. | |
24 | */ | |
25 | ||
26 | #include <sys/vdev_raidz_impl.h> | |
cbf484f8 | 27 | |
ab9f4b0b GN |
28 | /* |
29 | * Provide native CPU scalar routines. | |
30 | * Support 32bit and 64bit CPUs. | |
31 | */ | |
32 | #if ((~(0x0ULL)) >> 24) == 0xffULL | |
33 | #define ELEM_SIZE 4 | |
34 | typedef uint32_t iv_t; | |
35 | #elif ((~(0x0ULL)) >> 56) == 0xffULL | |
36 | #define ELEM_SIZE 8 | |
37 | typedef uint64_t iv_t; | |
38 | #endif | |
39 | ||
40 | /* | |
41 | * Vector type used in scalar implementation | |
42 | * | |
43 | * The union is expected to be of native CPU register size. Since addition | |
44 | * uses XOR operation, it can be performed an all byte elements at once. | |
45 | * Multiplication requires per byte access. | |
46 | */ | |
47 | typedef union { | |
48 | iv_t e; | |
49 | uint8_t b[ELEM_SIZE]; | |
50 | } v_t; | |
51 | ||
52 | /* | |
53 | * Precomputed lookup tables for multiplication by a constant | |
54 | * | |
55 | * Reconstruction path requires multiplication by a constant factors. Instead of | |
56 | * performing two step lookup (log & exp tables), a direct lookup can be used | |
57 | * instead. Multiplication of element 'a' by a constant 'c' is obtained as: | |
58 | * | |
59 | * r = vdev_raidz_mul_lt[c_log][a]; | |
60 | * | |
61 | * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because | |
62 | * they are faster to obtain while solving the syndrome equations. | |
63 | * | |
64 | * PERFORMANCE NOTE: | |
65 | * Even though the complete lookup table uses 64kiB, only relatively small | |
66 | * portion of it is used at the same time. Following shows number of accessed | |
67 | * bytes for different cases: | |
68 | * - 1 failed disk: 256B (1 mul. coefficient) | |
69 | * - 2 failed disks: 512B (2 mul. coefficients) | |
70 | * - 3 failed disks: 1536B (6 mul. coefficients) | |
71 | * | |
72 | * Size of actually accessed lookup table regions is only larger for | |
73 | * reconstruction of 3 failed disks, when compared to traditional log/exp | |
74 | * method. But since the result is obtained in one lookup step performance is | |
75 | * doubled. | |
76 | */ | |
77 | static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256))); | |
78 | ||
79 | static void | |
80 | raidz_init_scalar(void) | |
81 | { | |
82 | int c, i; | |
83 | for (c = 0; c < 256; c++) | |
84 | for (i = 0; i < 256; i++) | |
85 | vdev_raidz_mul_lt[c][i] = gf_mul(c, i); | |
86 | ||
87 | } | |
88 | ||
89 | #define PREFETCHNTA(ptr, offset) {} | |
90 | #define PREFETCH(ptr, offset) {} | |
91 | ||
92 | #define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e | |
93 | #define XOR(src, acc) acc.e ^= src.e | |
62a65a65 | 94 | #define ZERO(acc) acc.e = 0 |
ab9f4b0b GN |
95 | #define COPY(src, dst) dst = src |
96 | #define LOAD(src, val) val = ((v_t *)src)[0] | |
97 | #define STORE(dst, val) ((v_t *)dst)[0] = val | |
98 | ||
99 | /* | |
100 | * Constants used for optimized multiplication by 2. | |
101 | */ | |
102 | static const struct { | |
103 | iv_t mod; | |
104 | iv_t mask; | |
105 | iv_t msb; | |
106 | } scalar_mul2_consts = { | |
107 | #if ELEM_SIZE == 8 | |
108 | .mod = 0x1d1d1d1d1d1d1d1dULL, | |
109 | .mask = 0xfefefefefefefefeULL, | |
110 | .msb = 0x8080808080808080ULL, | |
111 | #else | |
112 | .mod = 0x1d1d1d1dULL, | |
113 | .mask = 0xfefefefeULL, | |
114 | .msb = 0x80808080ULL, | |
115 | #endif | |
116 | }; | |
117 | ||
118 | #define MUL2_SETUP() {} | |
119 | ||
120 | #define MUL2(a) \ | |
121 | { \ | |
122 | iv_t _mask; \ | |
123 | \ | |
124 | _mask = (a).e & scalar_mul2_consts.msb; \ | |
125 | _mask = (_mask << 1) - (_mask >> 7); \ | |
126 | (a).e = ((a).e << 1) & scalar_mul2_consts.mask; \ | |
127 | (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \ | |
128 | } | |
129 | ||
130 | #define MUL4(a) \ | |
131 | { \ | |
132 | MUL2(a); \ | |
133 | MUL2(a); \ | |
134 | } | |
135 | ||
136 | #define MUL(c, a) \ | |
137 | { \ | |
138 | const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \ | |
139 | switch (ELEM_SIZE) { \ | |
140 | case 8: \ | |
141 | a.b[7] = mul_lt[a.b[7]]; \ | |
142 | a.b[6] = mul_lt[a.b[6]]; \ | |
143 | a.b[5] = mul_lt[a.b[5]]; \ | |
144 | a.b[4] = mul_lt[a.b[4]]; \ | |
145 | case 4: \ | |
146 | a.b[3] = mul_lt[a.b[3]]; \ | |
147 | a.b[2] = mul_lt[a.b[2]]; \ | |
148 | a.b[1] = mul_lt[a.b[1]]; \ | |
149 | a.b[0] = mul_lt[a.b[0]]; \ | |
150 | break; \ | |
151 | } \ | |
152 | } | |
153 | ||
154 | #define raidz_math_begin() {} | |
155 | #define raidz_math_end() {} | |
156 | ||
cbf484f8 | 157 | #define SYN_STRIDE 1 |
ab9f4b0b | 158 | |
cbf484f8 GN |
159 | #define ZERO_DEFINE() v_t d0 |
160 | #define ZERO_STRIDE 1 | |
161 | #define ZERO_D d0 | |
ab9f4b0b | 162 | |
cbf484f8 GN |
163 | #define COPY_DEFINE() v_t d0 |
164 | #define COPY_STRIDE 1 | |
165 | #define COPY_D d0 | |
166 | ||
167 | #define ADD_DEFINE() v_t d0 | |
168 | #define ADD_STRIDE 1 | |
169 | #define ADD_D d0 | |
170 | ||
171 | #define MUL_DEFINE() v_t d0 | |
172 | #define MUL_STRIDE 1 | |
173 | #define MUL_D d0 | |
174 | ||
175 | #define GEN_P_STRIDE 1 | |
176 | #define GEN_P_DEFINE() v_t p0 | |
177 | #define GEN_P_P p0 | |
178 | ||
179 | #define GEN_PQ_STRIDE 1 | |
180 | #define GEN_PQ_DEFINE() v_t d0, c0 | |
181 | #define GEN_PQ_D d0 | |
182 | #define GEN_PQ_C c0 | |
183 | ||
184 | #define GEN_PQR_STRIDE 1 | |
185 | #define GEN_PQR_DEFINE() v_t d0, c0 | |
186 | #define GEN_PQR_D d0 | |
187 | #define GEN_PQR_C c0 | |
188 | ||
189 | #define SYN_Q_DEFINE() v_t d0, x0 | |
190 | #define SYN_Q_D d0 | |
191 | #define SYN_Q_X x0 | |
192 | ||
193 | ||
194 | #define SYN_R_DEFINE() v_t d0, x0 | |
195 | #define SYN_R_D d0 | |
196 | #define SYN_R_X x0 | |
197 | ||
198 | ||
199 | #define SYN_PQ_DEFINE() v_t d0, x0 | |
200 | #define SYN_PQ_D d0 | |
201 | #define SYN_PQ_X x0 | |
202 | ||
203 | ||
204 | #define REC_PQ_STRIDE 1 | |
205 | #define REC_PQ_DEFINE() v_t x0, y0, t0 | |
206 | #define REC_PQ_X x0 | |
207 | #define REC_PQ_Y y0 | |
208 | #define REC_PQ_T t0 | |
209 | ||
210 | ||
211 | #define SYN_PR_DEFINE() v_t d0, x0 | |
212 | #define SYN_PR_D d0 | |
213 | #define SYN_PR_X x0 | |
214 | ||
215 | #define REC_PR_STRIDE 1 | |
216 | #define REC_PR_DEFINE() v_t x0, y0, t0 | |
217 | #define REC_PR_X x0 | |
218 | #define REC_PR_Y y0 | |
219 | #define REC_PR_T t0 | |
220 | ||
221 | ||
222 | #define SYN_QR_DEFINE() v_t d0, x0 | |
223 | #define SYN_QR_D d0 | |
224 | #define SYN_QR_X x0 | |
225 | ||
226 | ||
227 | #define REC_QR_STRIDE 1 | |
228 | #define REC_QR_DEFINE() v_t x0, y0, t0 | |
229 | #define REC_QR_X x0 | |
230 | #define REC_QR_Y y0 | |
231 | #define REC_QR_T t0 | |
232 | ||
233 | ||
234 | #define SYN_PQR_DEFINE() v_t d0, x0 | |
235 | #define SYN_PQR_D d0 | |
236 | #define SYN_PQR_X x0 | |
237 | ||
238 | #define REC_PQR_STRIDE 1 | |
239 | #define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0 | |
240 | #define REC_PQR_X x0 | |
241 | #define REC_PQR_Y y0 | |
242 | #define REC_PQR_Z z0 | |
243 | #define REC_PQR_XS xs0 | |
244 | #define REC_PQR_YS ys0 | |
245 | ||
246 | #include "vdev_raidz_math_impl.h" | |
590c9a09 | 247 | |
ab9f4b0b GN |
248 | DEFINE_GEN_METHODS(scalar); |
249 | DEFINE_REC_METHODS(scalar); | |
250 | ||
c9187d86 | 251 | boolean_t |
ab9f4b0b GN |
252 | raidz_will_scalar_work(void) |
253 | { | |
254 | return (B_TRUE); /* always */ | |
255 | } | |
256 | ||
257 | const raidz_impl_ops_t vdev_raidz_scalar_impl = { | |
258 | .init = raidz_init_scalar, | |
259 | .fini = NULL, | |
260 | .gen = RAIDZ_GEN_METHODS(scalar), | |
261 | .rec = RAIDZ_REC_METHODS(scalar), | |
262 | .is_supported = &raidz_will_scalar_work, | |
263 | .name = "scalar" | |
264 | }; | |
265 | ||
266 | /* Powers of 2 in the RAID-Z Galois field. */ | |
267 | const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = { | |
268 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, | |
269 | 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, | |
270 | 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, | |
271 | 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, | |
272 | 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, | |
273 | 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, | |
274 | 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, | |
275 | 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, | |
276 | 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, | |
277 | 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, | |
278 | 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, | |
279 | 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, | |
280 | 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, | |
281 | 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, | |
282 | 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, | |
283 | 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, | |
284 | 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, | |
285 | 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, | |
286 | 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, | |
287 | 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, | |
288 | 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, | |
289 | 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, | |
290 | 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, | |
291 | 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, | |
292 | 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, | |
293 | 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, | |
294 | 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, | |
295 | 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, | |
296 | 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, | |
297 | 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, | |
298 | 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, | |
299 | 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 | |
300 | }; | |
301 | ||
302 | /* Logs of 2 in the RAID-Z Galois field. */ | |
303 | const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = { | |
304 | 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, | |
305 | 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, | |
306 | 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, | |
307 | 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71, | |
308 | 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, | |
309 | 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, | |
310 | 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, | |
311 | 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6, | |
312 | 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, | |
313 | 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, | |
314 | 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, | |
315 | 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, | |
316 | 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, | |
317 | 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, | |
318 | 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, | |
319 | 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, | |
320 | 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, | |
321 | 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, | |
322 | 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, | |
323 | 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, | |
324 | 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, | |
325 | 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, | |
326 | 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, | |
327 | 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, | |
328 | 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, | |
329 | 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, | |
330 | 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, | |
331 | 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, | |
332 | 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, | |
333 | 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, | |
334 | 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, | |
335 | 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf, | |
336 | }; |