]>
Commit | Line | Data |
---|---|---|
7f547f85 RD |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (C) 2016 Romain Dolbeau. All rights reserved. | |
23 | */ | |
24 | ||
25 | #include <sys/isa_defs.h> | |
26 | ||
cbf484f8 | 27 | #if 0 // defined(__x86_64) && defined(HAVE_AVX512F) |
7f547f85 RD |
28 | |
29 | #include <sys/types.h> | |
30 | #include <linux/simd_x86.h> | |
31 | ||
32 | #define __asm __asm__ __volatile__ | |
33 | ||
34 | #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N | |
35 | #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1) | |
36 | ||
37 | #define VR0_(REG, ...) "zmm"#REG | |
38 | #define VR1_(_1, REG, ...) "zmm"#REG | |
39 | #define VR2_(_1, _2, REG, ...) "zmm"#REG | |
40 | #define VR3_(_1, _2, _3, REG, ...) "zmm"#REG | |
41 | #define VR4_(_1, _2, _3, _4, REG, ...) "zmm"#REG | |
42 | #define VR5_(_1, _2, _3, _4, _5, REG, ...) "zmm"#REG | |
43 | #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "zmm"#REG | |
44 | #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "zmm"#REG | |
45 | ||
46 | #define VR0(r...) VR0_(r) | |
47 | #define VR1(r...) VR1_(r) | |
48 | #define VR2(r...) VR2_(r, 1) | |
49 | #define VR3(r...) VR3_(r, 1, 2) | |
50 | #define VR4(r...) VR4_(r, 1, 2) | |
51 | #define VR5(r...) VR5_(r, 1, 2, 3) | |
52 | #define VR6(r...) VR6_(r, 1, 2, 3, 4) | |
53 | #define VR7(r...) VR7_(r, 1, 2, 3, 4, 5) | |
54 | ||
55 | #define VRy0_(REG, ...) "ymm"#REG | |
56 | #define VRy1_(_1, REG, ...) "ymm"#REG | |
57 | #define VRy2_(_1, _2, REG, ...) "ymm"#REG | |
58 | #define VRy3_(_1, _2, _3, REG, ...) "ymm"#REG | |
59 | #define VRy4_(_1, _2, _3, _4, REG, ...) "ymm"#REG | |
60 | #define VRy5_(_1, _2, _3, _4, _5, REG, ...) "ymm"#REG | |
61 | #define VRy6_(_1, _2, _3, _4, _5, _6, REG, ...) "ymm"#REG | |
62 | #define VRy7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "ymm"#REG | |
63 | ||
64 | #define VRy0(r...) VRy0_(r) | |
65 | #define VRy1(r...) VRy1_(r) | |
66 | #define VRy2(r...) VRy2_(r, 1) | |
67 | #define VRy3(r...) VRy3_(r, 1, 2) | |
68 | #define VRy4(r...) VRy4_(r, 1, 2) | |
69 | #define VRy5(r...) VRy5_(r, 1, 2, 3) | |
70 | #define VRy6(r...) VRy6_(r, 1, 2, 3, 4) | |
71 | #define VRy7(r...) VRy7_(r, 1, 2, 3, 4, 5) | |
72 | ||
73 | #define R_01(REG1, REG2, ...) REG1, REG2 | |
74 | #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3 | |
75 | #define R_23(REG...) _R_23(REG, 1, 2, 3) | |
76 | ||
77 | #define ASM_BUG() ASSERT(0) | |
78 | ||
79 | extern const uint8_t gf_clmul_mod_lt[4*256][16]; | |
80 | ||
81 | #define ELEM_SIZE 64 | |
82 | ||
83 | typedef struct v { | |
84 | uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE))); | |
85 | } v_t; | |
86 | ||
87 | #define PREFETCHNTA(ptr, offset) \ | |
88 | { \ | |
89 | __asm( \ | |
90 | "prefetchnta " #offset "(%[MEM])\n" \ | |
91 | : : [MEM] "r" (ptr)); \ | |
92 | } | |
93 | ||
94 | #define PREFETCH(ptr, offset) \ | |
95 | { \ | |
96 | __asm( \ | |
97 | "prefetcht0 " #offset "(%[MEM])\n" \ | |
98 | : : [MEM] "r" (ptr)); \ | |
99 | } | |
100 | ||
101 | #define XOR_ACC(src, r...) \ | |
102 | { \ | |
103 | switch (REG_CNT(r)) { \ | |
104 | case 4: \ | |
105 | __asm( \ | |
106 | "vpxorq 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \ | |
107 | "vpxorq 0x40(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \ | |
108 | "vpxorq 0x80(%[SRC]), %%" VR2(r)", %%" VR2(r) "\n" \ | |
109 | "vpxorq 0xc0(%[SRC]), %%" VR3(r)", %%" VR3(r) "\n" \ | |
110 | : : [SRC] "r" (src)); \ | |
111 | break; \ | |
112 | case 2: \ | |
113 | __asm( \ | |
114 | "vpxorq 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \ | |
115 | "vpxorq 0x40(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \ | |
116 | : : [SRC] "r" (src)); \ | |
117 | break; \ | |
118 | default: \ | |
119 | ASM_BUG(); \ | |
120 | } \ | |
121 | } | |
122 | ||
123 | #define XOR(r...) \ | |
124 | { \ | |
125 | switch (REG_CNT(r)) { \ | |
126 | case 8: \ | |
127 | __asm( \ | |
128 | "vpxorq %" VR0(r) ", %" VR4(r)", %" VR4(r) "\n" \ | |
129 | "vpxorq %" VR1(r) ", %" VR5(r)", %" VR5(r) "\n" \ | |
130 | "vpxorq %" VR2(r) ", %" VR6(r)", %" VR6(r) "\n" \ | |
131 | "vpxorq %" VR3(r) ", %" VR7(r)", %" VR7(r)); \ | |
132 | break; \ | |
133 | case 4: \ | |
134 | __asm( \ | |
135 | "vpxorq %" VR0(r) ", %" VR2(r)", %" VR2(r) "\n" \ | |
136 | "vpxorq %" VR1(r) ", %" VR3(r)", %" VR3(r)); \ | |
137 | break; \ | |
138 | default: \ | |
139 | ASM_BUG(); \ | |
140 | } \ | |
141 | } | |
142 | ||
143 | #define ZERO(r...) \ | |
144 | { \ | |
145 | switch (REG_CNT(r)) { \ | |
146 | case 4: \ | |
147 | __asm( \ | |
148 | "vpxorq %" VR0(r) ", %" VR0(r)", %" VR0(r) "\n" \ | |
149 | "vpxorq %" VR1(r) ", %" VR1(r)", %" VR1(r) "\n" \ | |
150 | "vpxorq %" VR2(r) ", %" VR2(r)", %" VR2(r) "\n" \ | |
151 | "vpxorq %" VR3(r) ", %" VR3(r)", %" VR3(r)); \ | |
152 | break; \ | |
153 | case 2: \ | |
154 | __asm( \ | |
155 | "vpxorq %" VR0(r) ", %" VR0(r)", %" VR0(r) "\n" \ | |
156 | "vpxorq %" VR1(r) ", %" VR1(r)", %" VR1(r)); \ | |
157 | break; \ | |
158 | default: \ | |
159 | ASM_BUG(); \ | |
160 | } \ | |
161 | } | |
162 | ||
163 | #define COPY(r...) \ | |
164 | { \ | |
165 | switch (REG_CNT(r)) { \ | |
166 | case 8: \ | |
167 | __asm( \ | |
168 | "vmovdqa64 %" VR0(r) ", %" VR4(r) "\n" \ | |
169 | "vmovdqa64 %" VR1(r) ", %" VR5(r) "\n" \ | |
170 | "vmovdqa64 %" VR2(r) ", %" VR6(r) "\n" \ | |
171 | "vmovdqa64 %" VR3(r) ", %" VR7(r)); \ | |
172 | break; \ | |
173 | case 4: \ | |
174 | __asm( \ | |
175 | "vmovdqa64 %" VR0(r) ", %" VR2(r) "\n" \ | |
176 | "vmovdqa64 %" VR1(r) ", %" VR3(r)); \ | |
177 | break; \ | |
178 | default: \ | |
179 | ASM_BUG(); \ | |
180 | } \ | |
181 | } | |
182 | ||
183 | #define LOAD(src, r...) \ | |
184 | { \ | |
185 | switch (REG_CNT(r)) { \ | |
186 | case 4: \ | |
187 | __asm( \ | |
188 | "vmovdqa64 0x00(%[SRC]), %%" VR0(r) "\n" \ | |
189 | "vmovdqa64 0x40(%[SRC]), %%" VR1(r) "\n" \ | |
190 | "vmovdqa64 0x80(%[SRC]), %%" VR2(r) "\n" \ | |
191 | "vmovdqa64 0xc0(%[SRC]), %%" VR3(r) "\n" \ | |
192 | : : [SRC] "r" (src)); \ | |
193 | break; \ | |
194 | case 2: \ | |
195 | __asm( \ | |
196 | "vmovdqa64 0x00(%[SRC]), %%" VR0(r) "\n" \ | |
197 | "vmovdqa64 0x40(%[SRC]), %%" VR1(r) "\n" \ | |
198 | : : [SRC] "r" (src)); \ | |
199 | break; \ | |
200 | default: \ | |
201 | ASM_BUG(); \ | |
202 | } \ | |
203 | } | |
204 | ||
205 | #define STORE(dst, r...) \ | |
206 | { \ | |
207 | switch (REG_CNT(r)) { \ | |
208 | case 4: \ | |
209 | __asm( \ | |
210 | "vmovdqa64 %%" VR0(r) ", 0x00(%[DST])\n" \ | |
211 | "vmovdqa64 %%" VR1(r) ", 0x40(%[DST])\n" \ | |
212 | "vmovdqa64 %%" VR2(r) ", 0x80(%[DST])\n" \ | |
213 | "vmovdqa64 %%" VR3(r) ", 0xc0(%[DST])\n" \ | |
214 | : : [DST] "r" (dst)); \ | |
215 | break; \ | |
216 | case 2: \ | |
217 | __asm( \ | |
218 | "vmovdqa64 %%" VR0(r) ", 0x00(%[DST])\n" \ | |
219 | "vmovdqa64 %%" VR1(r) ", 0x40(%[DST])\n" \ | |
220 | : : [DST] "r" (dst)); \ | |
221 | break; \ | |
222 | default: \ | |
223 | ASM_BUG(); \ | |
224 | } \ | |
225 | } | |
226 | ||
227 | #define FLUSH() \ | |
228 | { \ | |
229 | __asm("vzeroupper"); \ | |
230 | } | |
231 | ||
232 | #define MUL2_SETUP() \ | |
233 | { \ | |
234 | __asm("vmovq %0, %%xmm14" :: "r"(0x1d1d1d1d1d1d1d1d)); \ | |
235 | __asm("vpbroadcastq %xmm14, %zmm14"); \ | |
236 | __asm("vmovq %0, %%xmm13" :: "r"(0x8080808080808080)); \ | |
237 | __asm("vpbroadcastq %xmm13, %zmm13"); \ | |
238 | __asm("vmovq %0, %%xmm12" :: "r"(0xfefefefefefefefe)); \ | |
239 | __asm("vpbroadcastq %xmm12, %zmm12"); \ | |
240 | __asm("vpxorq %zmm0, %zmm0 ,%zmm0"); \ | |
241 | } | |
242 | ||
243 | #define _MUL2(r...) \ | |
244 | { \ | |
245 | switch (REG_CNT(r)) { \ | |
246 | case 2: \ | |
247 | __asm( \ | |
248 | "vpandq %" VR0(r)", %zmm13, %zmm10\n" \ | |
249 | "vpandq %" VR1(r)", %zmm13, %zmm11\n" \ | |
250 | "vpsrlq $7, %zmm10, %zmm30\n" \ | |
251 | "vpsrlq $7, %zmm11, %zmm31\n" \ | |
252 | "vpsllq $1, %zmm10, %zmm10\n" \ | |
253 | "vpsllq $1, %zmm11, %zmm11\n" \ | |
254 | "vpsubq %zmm30, %zmm10, %zmm10\n" \ | |
255 | "vpsubq %zmm31, %zmm11, %zmm11\n" \ | |
256 | "vpsllq $1, %" VR0(r)", %" VR0(r) "\n" \ | |
257 | "vpsllq $1, %" VR1(r)", %" VR1(r) "\n" \ | |
258 | "vpandq %zmm10, %zmm14, %zmm10\n" \ | |
259 | "vpandq %zmm11, %zmm14, %zmm11\n" \ | |
260 | "vpternlogd $0x6c,%zmm12, %zmm10, %" VR0(r) "\n" \ | |
261 | "vpternlogd $0x6c,%zmm12, %zmm11, %" VR1(r)); \ | |
262 | break; \ | |
263 | default: \ | |
264 | ASM_BUG(); \ | |
265 | } \ | |
266 | } | |
267 | ||
268 | #define MUL2(r...) \ | |
269 | { \ | |
270 | switch (REG_CNT(r)) { \ | |
271 | case 4: \ | |
272 | _MUL2(R_01(r)); \ | |
273 | _MUL2(R_23(r)); \ | |
274 | break; \ | |
275 | case 2: \ | |
276 | _MUL2(r); \ | |
277 | break; \ | |
278 | default: \ | |
279 | ASM_BUG(); \ | |
280 | } \ | |
281 | } | |
282 | ||
283 | #define MUL4(r...) \ | |
284 | { \ | |
285 | MUL2(r); \ | |
286 | MUL2(r); \ | |
287 | } | |
288 | ||
289 | /* | |
290 | * Must match the init above | |
291 | */ | |
292 | #define _0f "zmm0" | |
293 | #define _as "zmm14" | |
294 | #define _bs "zmm13" | |
295 | #define _ltmod "zmm12" | |
296 | #define _ltmul "zmm11" | |
297 | #define _ta "zmm10" | |
298 | #define _tb "zmm15" | |
299 | ||
300 | /* | |
301 | * Must be in the first 16, otherwise an EVEX pshufb is generated | |
302 | * Must match above | |
303 | */ | |
304 | #define _asYlo "ymm14" | |
305 | #define _bsYlo "ymm13" | |
306 | #define _ltmodYlo "ymm12" | |
307 | #define _ltmulYlo "ymm11" | |
308 | #define _taYlo "ymm10" | |
309 | #define _tbYlo "ymm15" | |
310 | ||
311 | /* | |
312 | * Must be in the first 16, otherwise an EVEX pshufb is generated | |
313 | * ... | |
314 | */ | |
315 | #define _asYhi "ymm9" | |
316 | #define _bsYhi "ymm8" | |
317 | #define _ltmodYhi "ymm7" | |
318 | #define _ltmulYhi "ymm6" | |
319 | #define _taYhi "ymm5" | |
320 | #define _tbYhi "ymm4" | |
321 | ||
322 | /* | |
323 | * This uses a pair of AVX2 pshufb to emulate the missing AVX512 pshufb. | |
324 | * AVX512BW has the full pshufb | |
325 | * To get VEX pshufb (AVX2, supported in KNL) instead of EVEX pshufb | |
326 | * (AVX512BW, not supported on KNL, probably also requiring AVX51VL | |
327 | * since we use a 256 bits version), all registers in parameters to | |
328 | * pshufb must be among ymm0-ymm15, since only EVEX can encore | |
329 | * ymm16-ymm31 | |
330 | * This is a bit hackish, but short of encoding the instruction in | |
331 | * binary, how do we force the use of AVX2 pshufb ? | |
332 | * Note that the other way round (forcing AVX512) is easy, just encode | |
333 | * k0 as the mask register (k0 is all-1). | |
334 | */ | |
335 | #define _MULx2(c, r...) \ | |
336 | { \ | |
337 | switch (REG_CNT(r)) { \ | |
338 | case 2: \ | |
339 | __asm( \ | |
340 | "vmovq %[c0f], %%xmm0\n" \ | |
341 | "vpbroadcastq %%xmm0, %%" _0f "\n" \ | |
342 | /* upper bits */ \ | |
343 | "vbroadcasti32x4 0x00(%[lt]), %%" _ltmod "\n" \ | |
344 | "vbroadcasti32x4 0x10(%[lt]), %%" _ltmul "\n" \ | |
345 | \ | |
346 | "vpsrad $0x4, %%" VR0(r) ", %%"_as "\n" \ | |
347 | "vpsrad $0x4, %%" VR1(r) ", %%"_bs "\n" \ | |
348 | "vpandq %%" _0f ", %%" VR0(r) ", %%" VR0(r) "\n" \ | |
349 | "vpandq %%" _0f ", %%" VR1(r) ", %%" VR1(r) "\n" \ | |
350 | "vpandq %%" _0f ", %%" _as ", %%" _as "\n" \ | |
351 | "vpandq %%" _0f ", %%" _bs ", %%" _bs "\n" \ | |
352 | \ | |
353 | "vextracti64x4 $1,%%" _ltmod ",%%" _ltmodYhi"\n" \ | |
354 | \ | |
355 | "vextracti64x4 $1,%%" _as ",%%" _asYhi"\n" \ | |
356 | "vpshufb %%" _asYlo ", %%" _ltmodYlo ", %%" _taYlo "\n" \ | |
357 | "vpshufb %%" _asYhi ", %%" _ltmodYhi ", %%" _taYhi "\n" \ | |
358 | "vinserti64x4 $1,%%" _taYhi ",%%" _ta ",%%" _ta "\n" \ | |
359 | \ | |
360 | "vextracti64x4 $1,%%" _bs ",%%" _bsYhi"\n" \ | |
361 | "vpshufb %%" _bsYlo ", %%" _ltmodYlo ", %%" _tbYlo "\n" \ | |
362 | "vpshufb %%" _bsYhi ", %%" _ltmodYhi ", %%" _tbYhi "\n" \ | |
363 | "vinserti64x4 $1,%%" _tbYhi ",%%" _tb ",%%" _tb "\n" \ | |
364 | \ | |
365 | "vextracti64x4 $1,%%" _ltmul ",%%" _ltmulYhi"\n" \ | |
366 | \ | |
367 | "vpshufb %%" _asYlo ", %%" _ltmulYlo ", %%" _asYlo "\n" \ | |
368 | "vpshufb %%" _asYhi ", %%" _ltmulYhi ", %%" _asYhi "\n" \ | |
369 | "vinserti64x4 $1,%%" _asYhi ",%%" _as ",%%" _as "\n" \ | |
370 | \ | |
371 | "vpshufb %%" _bsYlo ", %%" _ltmulYlo ", %%" _bsYlo "\n" \ | |
372 | "vpshufb %%" _bsYhi ", %%" _ltmulYhi ", %%" _bsYhi "\n" \ | |
373 | "vinserti64x4 $1,%%" _bsYhi ",%%" _bs ",%%" _bs "\n" \ | |
374 | \ | |
375 | /* lower bits */ \ | |
376 | "vbroadcasti32x4 0x20(%[lt]), %%" _ltmod "\n" \ | |
377 | "vbroadcasti32x4 0x30(%[lt]), %%" _ltmul "\n" \ | |
378 | \ | |
379 | "vpxorq %%" _ta ", %%" _as ", %%" _as "\n" \ | |
380 | "vpxorq %%" _tb ", %%" _bs ", %%" _bs "\n" \ | |
381 | \ | |
382 | "vextracti64x4 $1,%%" _ltmod ",%%" _ltmodYhi"\n" \ | |
383 | \ | |
384 | "vextracti64x4 $0,%%" VR0(r) ",%%" "ymm1" "\n" \ | |
385 | "vextracti64x4 $1,%%" VR0(r) ",%%" _asYhi"\n" \ | |
386 | "vpshufb %%" "ymm1" ", %%" _ltmodYlo ", %%" _taYlo "\n" \ | |
387 | "vpshufb %%" _asYhi ", %%" _ltmodYhi ", %%" _taYhi "\n" \ | |
388 | "vinserti64x4 $1,%%" _taYhi ",%%" _ta ",%%" _ta "\n" \ | |
389 | \ | |
390 | "vextracti64x4 $0,%%" VR1(r) ",%%" "ymm2" "\n" \ | |
391 | "vextracti64x4 $1,%%" VR1(r) ",%%" _bsYhi"\n" \ | |
392 | "vpshufb %%" "ymm2" ", %%" _ltmodYlo ", %%" _tbYlo "\n" \ | |
393 | "vpshufb %%" _bsYhi ", %%" _ltmodYhi ", %%" _tbYhi "\n" \ | |
394 | "vinserti64x4 $1,%%" _tbYhi ",%%" _tb ",%%" _tb "\n" \ | |
395 | \ | |
396 | "vextracti64x4 $1,%%" _ltmul ",%%" _ltmulYhi"\n" \ | |
397 | \ | |
398 | "vpshufb %%" "ymm1" ", %%" _ltmulYlo ", %%" "ymm1" "\n" \ | |
399 | "vpshufb %%" _asYhi ", %%" _ltmulYhi ", %%" _asYhi "\n" \ | |
400 | "vinserti64x4 $1,%%" _asYhi ",%%" "zmm1" ",%%" VR0(r) "\n" \ | |
401 | \ | |
402 | "vpshufb %%" "ymm2" ", %%" _ltmulYlo ", %%" "ymm2" "\n" \ | |
403 | "vpshufb %%" _bsYhi ", %%" _ltmulYhi ", %%" _bsYhi "\n" \ | |
404 | "vinserti64x4 $1,%%" _bsYhi ",%%" "zmm2" ",%%" VR1(r) "\n" \ | |
405 | \ | |
406 | "vpxorq %%" _ta ", %%" VR0(r) ", %%" VR0(r) "\n" \ | |
407 | "vpxorq %%" _as ", %%" VR0(r) ", %%" VR0(r) "\n" \ | |
408 | "vpxorq %%" _tb ", %%" VR1(r) ", %%" VR1(r) "\n" \ | |
409 | "vpxorq %%" _bs ", %%" VR1(r) ", %%" VR1(r) "\n" \ | |
410 | : : [c0f] "r" (0x0f0f0f0f0f0f0f0f), \ | |
411 | [lt] "r" (gf_clmul_mod_lt[4*(c)])); \ | |
412 | break; \ | |
413 | default: \ | |
414 | ASM_BUG(); \ | |
415 | } \ | |
416 | } | |
417 | ||
418 | #define MUL(c, r...) \ | |
419 | { \ | |
420 | switch (REG_CNT(r)) { \ | |
421 | case 4: \ | |
422 | _MULx2(c, R_01(r)); \ | |
423 | _MULx2(c, R_23(r)); \ | |
424 | break; \ | |
425 | case 2: \ | |
426 | _MULx2(c, R_01(r)); \ | |
427 | break; \ | |
428 | default: \ | |
429 | ASM_BUG(); \ | |
430 | } \ | |
431 | } | |
432 | ||
433 | #define raidz_math_begin() kfpu_begin() | |
434 | #define raidz_math_end() \ | |
435 | { \ | |
436 | FLUSH(); \ | |
437 | kfpu_end(); \ | |
438 | } | |
439 | ||
cbf484f8 GN |
440 | #define ZERO_STRIDE 4 |
441 | #define ZERO_DEFINE() {} | |
442 | #define ZERO_D 20, 21, 22, 23 | |
443 | ||
444 | #define COPY_STRIDE 4 | |
445 | #define COPY_DEFINE() {} | |
446 | #define COPY_D 20, 21, 22, 23 | |
447 | ||
448 | #define ADD_STRIDE 4 | |
449 | #define ADD_DEFINE() {} | |
450 | #define ADD_D 20, 21, 22, 23 | |
451 | ||
452 | #define MUL_STRIDE 4 | |
453 | #define MUL_DEFINE() {} | |
454 | #define MUL_D 20, 21, 22, 23 | |
7f547f85 RD |
455 | /* |
456 | * This use zmm16-zmm31 registers to free up zmm0-zmm15 | |
457 | * to use with the AVX2 pshufb, see above | |
458 | */ | |
459 | #define GEN_P_DEFINE() {} | |
460 | #define GEN_P_STRIDE 4 | |
461 | #define GEN_P_P 20, 21, 22, 23 | |
462 | ||
463 | #define GEN_PQ_DEFINE() {} | |
464 | #define GEN_PQ_STRIDE 4 | |
465 | #define GEN_PQ_D 20, 21, 22, 23 | |
466 | #define GEN_PQ_P 24, 25, 26, 27 | |
467 | #define GEN_PQ_Q 28, 29, 3, 4 | |
468 | ||
469 | #define GEN_PQR_DEFINE() {} | |
470 | #define GEN_PQR_STRIDE 2 | |
471 | #define GEN_PQR_D 20, 21 | |
472 | #define GEN_PQR_P 22, 23 | |
473 | #define GEN_PQR_Q 24, 25 | |
474 | #define GEN_PQR_R 26, 27 | |
475 | ||
476 | #define REC_P_DEFINE() {} | |
477 | #define REC_P_STRIDE 4 | |
478 | #define REC_P_X 20, 21, 22, 23 | |
479 | ||
480 | #define REC_Q_DEFINE() {} | |
481 | #define REC_Q_STRIDE 4 | |
482 | #define REC_Q_X 20, 21, 22, 23 | |
483 | ||
484 | #define REC_R_DEFINE() {} | |
485 | #define REC_R_STRIDE 4 | |
486 | #define REC_R_X 20, 21, 22, 23 | |
487 | ||
488 | #define REC_PQ_DEFINE() {} | |
489 | #define REC_PQ_STRIDE 2 | |
490 | #define REC_PQ_X 20, 21 | |
491 | #define REC_PQ_Y 22, 23 | |
492 | #define REC_PQ_D 24, 25 | |
493 | ||
494 | #define REC_PR_DEFINE() {} | |
495 | #define REC_PR_STRIDE 2 | |
496 | #define REC_PR_X 20, 21 | |
497 | #define REC_PR_Y 22, 23 | |
498 | #define REC_PR_D 24, 25 | |
499 | ||
500 | #define REC_QR_DEFINE() {} | |
501 | #define REC_QR_STRIDE 2 | |
502 | #define REC_QR_X 20, 21 | |
503 | #define REC_QR_Y 22, 23 | |
504 | #define REC_QR_D 24, 25 | |
505 | ||
506 | #define REC_PQR_DEFINE() {} | |
507 | #define REC_PQR_STRIDE 2 | |
508 | #define REC_PQR_X 20, 21 | |
509 | #define REC_PQR_Y 22, 23 | |
510 | #define REC_PQR_Z 24, 25 | |
511 | #define REC_PQR_D 26, 27 | |
512 | #define REC_PQR_XS 26, 27 | |
513 | #define REC_PQR_YS 28, 29 | |
514 | ||
515 | ||
516 | #include <sys/vdev_raidz_impl.h> | |
517 | #include "vdev_raidz_math_impl.h" | |
518 | ||
519 | DEFINE_GEN_METHODS(avx512f); | |
520 | DEFINE_REC_METHODS(avx512f); | |
521 | ||
522 | static boolean_t | |
523 | raidz_will_avx512f_work(void) | |
524 | { | |
525 | return (zfs_avx_available() && | |
526 | zfs_avx512f_available()); | |
527 | } | |
528 | ||
529 | const raidz_impl_ops_t vdev_raidz_avx512f_impl = { | |
530 | .init = NULL, | |
531 | .fini = NULL, | |
532 | .gen = RAIDZ_GEN_METHODS(avx512f), | |
533 | .rec = RAIDZ_REC_METHODS(avx512f), | |
534 | .is_supported = &raidz_will_avx512f_work, | |
535 | .name = "avx512f" | |
536 | }; | |
537 | ||
538 | #endif /* defined(__x86_64) && defined(HAVE_AVX512F) */ |