]>
Commit | Line | Data |
---|---|---|
b886d83c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
13c520b2 GK |
2 | /* |
3 | * Copyright (C) 2016 Intel Corporation | |
4 | * | |
5 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | |
6 | * Author: Megha Dey <megha.dey@linux.intel.com> | |
13c520b2 GK |
7 | */ |
8 | ||
9 | #ifdef CONFIG_AS_AVX512 | |
10 | ||
11 | #include <linux/raid/pq.h> | |
12 | #include "x86.h" | |
13 | ||
14 | static int raid6_has_avx512(void) | |
15 | { | |
16 | return boot_cpu_has(X86_FEATURE_AVX2) && | |
17 | boot_cpu_has(X86_FEATURE_AVX) && | |
18 | boot_cpu_has(X86_FEATURE_AVX512F) && | |
19 | boot_cpu_has(X86_FEATURE_AVX512BW) && | |
20 | boot_cpu_has(X86_FEATURE_AVX512VL) && | |
21 | boot_cpu_has(X86_FEATURE_AVX512DQ); | |
22 | } | |
23 | ||
24 | static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, | |
25 | int failb, void **ptrs) | |
26 | { | |
27 | u8 *p, *q, *dp, *dq; | |
28 | const u8 *pbmul; /* P multiplier table for B data */ | |
29 | const u8 *qmul; /* Q multiplier table (for both) */ | |
30 | const u8 x0f = 0x0f; | |
31 | ||
32 | p = (u8 *)ptrs[disks-2]; | |
33 | q = (u8 *)ptrs[disks-1]; | |
34 | ||
35 | /* | |
36 | * Compute syndrome with zero for the missing data pages | |
37 | * Use the dead data pages as temporary storage for | |
38 | * delta p and delta q | |
39 | */ | |
40 | ||
41 | dp = (u8 *)ptrs[faila]; | |
42 | ptrs[faila] = (void *)raid6_empty_zero_page; | |
43 | ptrs[disks-2] = dp; | |
44 | dq = (u8 *)ptrs[failb]; | |
45 | ptrs[failb] = (void *)raid6_empty_zero_page; | |
46 | ptrs[disks-1] = dq; | |
47 | ||
48 | raid6_call.gen_syndrome(disks, bytes, ptrs); | |
49 | ||
50 | /* Restore pointer table */ | |
51 | ptrs[faila] = dp; | |
52 | ptrs[failb] = dq; | |
53 | ptrs[disks-2] = p; | |
54 | ptrs[disks-1] = q; | |
55 | ||
56 | /* Now, pick the proper data tables */ | |
57 | pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; | |
58 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ | |
59 | raid6_gfexp[failb]]]; | |
60 | ||
61 | kernel_fpu_begin(); | |
62 | ||
63 | /* zmm0 = x0f[16] */ | |
64 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | |
65 | ||
66 | while (bytes) { | |
67 | #ifdef CONFIG_X86_64 | |
68 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | |
69 | "vmovdqa64 %1, %%zmm9\n\t" | |
70 | "vmovdqa64 %2, %%zmm0\n\t" | |
71 | "vmovdqa64 %3, %%zmm8\n\t" | |
72 | "vpxorq %4, %%zmm1, %%zmm1\n\t" | |
73 | "vpxorq %5, %%zmm9, %%zmm9\n\t" | |
74 | "vpxorq %6, %%zmm0, %%zmm0\n\t" | |
75 | "vpxorq %7, %%zmm8, %%zmm8" | |
76 | : | |
77 | : "m" (q[0]), "m" (q[64]), "m" (p[0]), | |
78 | "m" (p[64]), "m" (dq[0]), "m" (dq[64]), | |
79 | "m" (dp[0]), "m" (dp[64])); | |
80 | ||
81 | /* | |
82 | * 1 = dq[0] ^ q[0] | |
83 | * 9 = dq[64] ^ q[64] | |
84 | * 0 = dp[0] ^ p[0] | |
85 | * 8 = dp[64] ^ p[64] | |
86 | */ | |
87 | ||
88 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | |
89 | "vbroadcasti64x2 %1, %%zmm5" | |
90 | : | |
91 | : "m" (qmul[0]), "m" (qmul[16])); | |
92 | ||
93 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | |
94 | "vpsraw $4, %%zmm9, %%zmm12\n\t" | |
95 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | |
96 | "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" | |
97 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | |
98 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | |
99 | "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" | |
100 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | |
101 | "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" | |
102 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | |
103 | "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" | |
104 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | |
105 | : | |
106 | : ); | |
107 | ||
108 | /* | |
109 | * 5 = qx[0] | |
110 | * 15 = qx[64] | |
111 | */ | |
112 | ||
113 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | |
114 | "vbroadcasti64x2 %1, %%zmm1\n\t" | |
115 | "vpsraw $4, %%zmm0, %%zmm2\n\t" | |
116 | "vpsraw $4, %%zmm8, %%zmm6\n\t" | |
117 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | |
118 | "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" | |
119 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | |
120 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | |
121 | "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" | |
122 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | |
123 | "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" | |
124 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | |
125 | "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" | |
126 | "vpxorq %%zmm12, %%zmm13, %%zmm13" | |
127 | : | |
128 | : "m" (pbmul[0]), "m" (pbmul[16])); | |
129 | ||
130 | /* | |
131 | * 1 = pbmul[px[0]] | |
132 | * 13 = pbmul[px[64]] | |
133 | */ | |
134 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | |
135 | "vpxorq %%zmm15, %%zmm13, %%zmm13" | |
136 | : | |
137 | : ); | |
138 | ||
139 | /* | |
140 | * 1 = db = DQ | |
141 | * 13 = db[64] = DQ[64] | |
142 | */ | |
143 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | |
144 | "vmovdqa64 %%zmm13,%1\n\t" | |
145 | "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | |
146 | "vpxorq %%zmm13, %%zmm8, %%zmm8" | |
147 | : | |
148 | : "m" (dq[0]), "m" (dq[64])); | |
149 | ||
150 | asm volatile("vmovdqa64 %%zmm0, %0\n\t" | |
151 | "vmovdqa64 %%zmm8, %1" | |
152 | : | |
153 | : "m" (dp[0]), "m" (dp[64])); | |
154 | ||
155 | bytes -= 128; | |
156 | p += 128; | |
157 | q += 128; | |
158 | dp += 128; | |
159 | dq += 128; | |
160 | #else | |
161 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | |
162 | "vmovdqa64 %1, %%zmm0\n\t" | |
163 | "vpxorq %2, %%zmm1, %%zmm1\n\t" | |
164 | "vpxorq %3, %%zmm0, %%zmm0" | |
165 | : | |
166 | : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); | |
167 | ||
168 | /* 1 = dq ^ q; 0 = dp ^ p */ | |
169 | ||
170 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | |
171 | "vbroadcasti64x2 %1, %%zmm5" | |
172 | : | |
173 | : "m" (qmul[0]), "m" (qmul[16])); | |
174 | ||
175 | /* | |
176 | * 1 = dq ^ q | |
177 | * 3 = dq ^ p >> 4 | |
178 | */ | |
179 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | |
180 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | |
181 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | |
182 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | |
183 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | |
184 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | |
185 | : | |
186 | : ); | |
187 | ||
188 | /* 5 = qx */ | |
189 | ||
190 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | |
191 | "vbroadcasti64x2 %1, %%zmm1" | |
192 | : | |
193 | : "m" (pbmul[0]), "m" (pbmul[16])); | |
194 | ||
195 | asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" | |
196 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | |
197 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | |
198 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | |
199 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | |
200 | "vpxorq %%zmm4, %%zmm1, %%zmm1" | |
201 | : | |
202 | : ); | |
203 | ||
204 | /* 1 = pbmul[px] */ | |
205 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | |
206 | /* 1 = db = DQ */ | |
207 | "vmovdqa64 %%zmm1, %0\n\t" | |
208 | : | |
209 | : "m" (dq[0])); | |
210 | ||
211 | asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | |
212 | "vmovdqa64 %%zmm0, %0" | |
213 | : | |
214 | : "m" (dp[0])); | |
215 | ||
216 | bytes -= 64; | |
217 | p += 64; | |
218 | q += 64; | |
219 | dp += 64; | |
220 | dq += 64; | |
221 | #endif | |
222 | } | |
223 | ||
224 | kernel_fpu_end(); | |
225 | } | |
226 | ||
227 | static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, | |
228 | void **ptrs) | |
229 | { | |
230 | u8 *p, *q, *dq; | |
231 | const u8 *qmul; /* Q multiplier table */ | |
232 | const u8 x0f = 0x0f; | |
233 | ||
234 | p = (u8 *)ptrs[disks-2]; | |
235 | q = (u8 *)ptrs[disks-1]; | |
236 | ||
237 | /* | |
238 | * Compute syndrome with zero for the missing data page | |
239 | * Use the dead data page as temporary storage for delta q | |
240 | */ | |
241 | ||
242 | dq = (u8 *)ptrs[faila]; | |
243 | ptrs[faila] = (void *)raid6_empty_zero_page; | |
244 | ptrs[disks-1] = dq; | |
245 | ||
246 | raid6_call.gen_syndrome(disks, bytes, ptrs); | |
247 | ||
248 | /* Restore pointer table */ | |
249 | ptrs[faila] = dq; | |
250 | ptrs[disks-1] = q; | |
251 | ||
252 | /* Now, pick the proper data tables */ | |
253 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; | |
254 | ||
255 | kernel_fpu_begin(); | |
256 | ||
257 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | |
258 | ||
259 | while (bytes) { | |
260 | #ifdef CONFIG_X86_64 | |
261 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | |
262 | "vmovdqa64 %1, %%zmm8\n\t" | |
263 | "vpxorq %2, %%zmm3, %%zmm3\n\t" | |
264 | "vpxorq %3, %%zmm8, %%zmm8" | |
265 | : | |
266 | : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), | |
267 | "m" (q[64])); | |
268 | ||
269 | /* | |
270 | * 3 = q[0] ^ dq[0] | |
271 | * 8 = q[64] ^ dq[64] | |
272 | */ | |
273 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | |
274 | "vmovapd %%zmm0, %%zmm13\n\t" | |
275 | "vbroadcasti64x2 %1, %%zmm1\n\t" | |
276 | "vmovapd %%zmm1, %%zmm14" | |
277 | : | |
278 | : "m" (qmul[0]), "m" (qmul[16])); | |
279 | ||
280 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | |
281 | "vpsraw $4, %%zmm8, %%zmm12\n\t" | |
282 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | |
283 | "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" | |
284 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | |
285 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | |
286 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | |
287 | "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" | |
288 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | |
289 | "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" | |
290 | "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" | |
291 | "vpxorq %%zmm13, %%zmm14, %%zmm14" | |
292 | : | |
293 | : ); | |
294 | ||
295 | /* | |
296 | * 1 = qmul[q[0] ^ dq[0]] | |
297 | * 14 = qmul[q[64] ^ dq[64]] | |
298 | */ | |
299 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | |
300 | "vmovdqa64 %1, %%zmm12\n\t" | |
301 | "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" | |
302 | "vpxorq %%zmm14, %%zmm12, %%zmm12" | |
303 | : | |
304 | : "m" (p[0]), "m" (p[64])); | |
305 | ||
306 | /* | |
307 | * 2 = p[0] ^ qmul[q[0] ^ dq[0]] | |
308 | * 12 = p[64] ^ qmul[q[64] ^ dq[64]] | |
309 | */ | |
310 | ||
311 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | |
312 | "vmovdqa64 %%zmm14, %1\n\t" | |
313 | "vmovdqa64 %%zmm2, %2\n\t" | |
314 | "vmovdqa64 %%zmm12,%3" | |
315 | : | |
316 | : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), | |
317 | "m" (p[64])); | |
318 | ||
319 | bytes -= 128; | |
320 | p += 128; | |
321 | q += 128; | |
322 | dq += 128; | |
323 | #else | |
324 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | |
325 | "vpxorq %1, %%zmm3, %%zmm3" | |
326 | : | |
327 | : "m" (dq[0]), "m" (q[0])); | |
328 | ||
329 | /* 3 = q ^ dq */ | |
330 | ||
331 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | |
332 | "vbroadcasti64x2 %1, %%zmm1" | |
333 | : | |
334 | : "m" (qmul[0]), "m" (qmul[16])); | |
335 | ||
336 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | |
337 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | |
338 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | |
339 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | |
340 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | |
341 | "vpxorq %%zmm0, %%zmm1, %%zmm1" | |
342 | : | |
343 | : ); | |
344 | ||
345 | /* 1 = qmul[q ^ dq] */ | |
346 | ||
347 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | |
348 | "vpxorq %%zmm1, %%zmm2, %%zmm2" | |
349 | : | |
350 | : "m" (p[0])); | |
351 | ||
352 | /* 2 = p ^ qmul[q ^ dq] */ | |
353 | ||
354 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | |
355 | "vmovdqa64 %%zmm2, %1" | |
356 | : | |
357 | : "m" (dq[0]), "m" (p[0])); | |
358 | ||
359 | bytes -= 64; | |
360 | p += 64; | |
361 | q += 64; | |
362 | dq += 64; | |
363 | #endif | |
364 | } | |
365 | ||
366 | kernel_fpu_end(); | |
367 | } | |
368 | ||
369 | const struct raid6_recov_calls raid6_recov_avx512 = { | |
370 | .data2 = raid6_2data_recov_avx512, | |
371 | .datap = raid6_datap_recov_avx512, | |
372 | .valid = raid6_has_avx512, | |
373 | #ifdef CONFIG_X86_64 | |
374 | .name = "avx512x2", | |
375 | #else | |
376 | .name = "avx512x1", | |
377 | #endif | |
378 | .priority = 3, | |
379 | }; | |
380 | ||
381 | #else | |
382 | #warning "your version of binutils lacks AVX512 support" | |
383 | #endif |