]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/vdev_raidz_math_powerpc_altivec_common.h
ddt: document the theory and the key data structures
[mirror_zfs.git] / module / zfs / vdev_raidz_math_powerpc_altivec_common.h
CommitLineData
35b07497
RD
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1d3ba0bf 9 * or https://opensource.org/licenses/CDDL-1.0.
35b07497
RD
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2019 Romain Dolbeau. All rights reserved.
23 * <romain.dolbeau@european-processor-initiative.eu>
24 */
25
26#include <sys/types.h>
27#include <sys/simd.h>
28
35b07497
RD
29#define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
30#define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
31
32#define VR0_(REG, ...) "%[w"#REG"]"
33#define VR1_(_1, REG, ...) "%[w"#REG"]"
34#define VR2_(_1, _2, REG, ...) "%[w"#REG"]"
35#define VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
36#define VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
37#define VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
38#define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
39#define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
40
41/*
42 * Here we need registers not used otherwise.
43 * They will be used in unused ASM for the case
44 * with more registers than required... but GCC
45 * will still need to make sure the constraints
46 * are correct, and duplicate constraints are illegal
47 * ... and we use the "register" number as a name
48 */
49
50#define VR0(r...) VR0_(r)
51#define VR1(r...) VR1_(r)
52#define VR2(r...) VR2_(r, 36)
53#define VR3(r...) VR3_(r, 36, 35)
54#define VR4(r...) VR4_(r, 36, 35, 34, 33)
55#define VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
56#define VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
57#define VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
58
59#define VR(X) "%[w"#X"]"
60
61#define RVR0_(REG, ...) [w##REG] "v" (w##REG)
62#define RVR1_(_1, REG, ...) [w##REG] "v" (w##REG)
63#define RVR2_(_1, _2, REG, ...) [w##REG] "v" (w##REG)
64#define RVR3_(_1, _2, _3, REG, ...) [w##REG] "v" (w##REG)
65#define RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "v" (w##REG)
66#define RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "v" (w##REG)
67#define RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "v" (w##REG)
68#define RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "v" (w##REG)
69
70#define RVR0(r...) RVR0_(r)
71#define RVR1(r...) RVR1_(r)
72#define RVR2(r...) RVR2_(r, 36)
73#define RVR3(r...) RVR3_(r, 36, 35)
74#define RVR4(r...) RVR4_(r, 36, 35, 34, 33)
75#define RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
76#define RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
77#define RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
78
79#define RVR(X) [w##X] "v" (w##X)
80
81#define WVR0_(REG, ...) [w##REG] "=v" (w##REG)
82#define WVR1_(_1, REG, ...) [w##REG] "=v" (w##REG)
83#define WVR2_(_1, _2, REG, ...) [w##REG] "=v" (w##REG)
84#define WVR3_(_1, _2, _3, REG, ...) [w##REG] "=v" (w##REG)
85#define WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=v" (w##REG)
86#define WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=v" (w##REG)
87#define WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=v" (w##REG)
88#define WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=v" (w##REG)
89
90#define WVR0(r...) WVR0_(r)
91#define WVR1(r...) WVR1_(r)
92#define WVR2(r...) WVR2_(r, 36)
93#define WVR3(r...) WVR3_(r, 36, 35)
94#define WVR4(r...) WVR4_(r, 36, 35, 34, 33)
95#define WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
96#define WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
97#define WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
98
99#define WVR(X) [w##X] "=v" (w##X)
100
101#define UVR0_(REG, ...) [w##REG] "+&v" (w##REG)
102#define UVR1_(_1, REG, ...) [w##REG] "+&v" (w##REG)
103#define UVR2_(_1, _2, REG, ...) [w##REG] "+&v" (w##REG)
104#define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&v" (w##REG)
105#define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&v" (w##REG)
106#define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&v" (w##REG)
107#define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&v" (w##REG)
108#define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&v" (w##REG)
109
110#define UVR0(r...) UVR0_(r)
111#define UVR1(r...) UVR1_(r)
112#define UVR2(r...) UVR2_(r, 36)
113#define UVR3(r...) UVR3_(r, 36, 35)
114#define UVR4(r...) UVR4_(r, 36, 35, 34, 33)
115#define UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
116#define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
117#define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
118
119#define UVR(X) [w##X] "+&v" (w##X)
120
121#define R_01(REG1, REG2, ...) REG1, REG2
122#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
123#define R_23(REG...) _R_23(REG, 1, 2, 3)
124
125#define ZFS_ASM_BUG() ASSERT(0)
126
127#define OFFSET(ptr, val) (((unsigned char *)(ptr))+val)
128
129extern const uint8_t gf_clmul_mod_lt[4*256][16];
130
131#define ELEM_SIZE 16
132
133typedef struct v {
134 uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
135} v_t;
136
137#define XOR_ACC(src, r...) \
138{ \
139 switch (REG_CNT(r)) { \
140 case 8: \
d27c7ba6 141 __asm__ __volatile__( \
35b07497
RD
142 "lvx 21,0,%[SRC0]\n" \
143 "lvx 20,0,%[SRC1]\n" \
144 "lvx 19,0,%[SRC2]\n" \
145 "lvx 18,0,%[SRC3]\n" \
146 "vxor " VR0(r) "," VR0(r) ",21\n" \
147 "vxor " VR1(r) "," VR1(r) ",20\n" \
148 "vxor " VR2(r) "," VR2(r) ",19\n" \
149 "vxor " VR3(r) "," VR3(r) ",18\n" \
150 "lvx 21,0,%[SRC4]\n" \
151 "lvx 20,0,%[SRC5]\n" \
152 "lvx 19,0,%[SRC6]\n" \
153 "lvx 18,0,%[SRC7]\n" \
154 "vxor " VR4(r) "," VR4(r) ",21\n" \
155 "vxor " VR5(r) "," VR5(r) ",20\n" \
156 "vxor " VR6(r) "," VR6(r) ",19\n" \
157 "vxor " VR7(r) "," VR7(r) ",18\n" \
158 : UVR0(r), UVR1(r), UVR2(r), UVR3(r), \
159 UVR4(r), UVR5(r), UVR6(r), UVR7(r) \
160 : [SRC0] "r" ((OFFSET(src, 0))), \
161 [SRC1] "r" ((OFFSET(src, 16))), \
162 [SRC2] "r" ((OFFSET(src, 32))), \
163 [SRC3] "r" ((OFFSET(src, 48))), \
164 [SRC4] "r" ((OFFSET(src, 64))), \
165 [SRC5] "r" ((OFFSET(src, 80))), \
166 [SRC6] "r" ((OFFSET(src, 96))), \
167 [SRC7] "r" ((OFFSET(src, 112))) \
168 : "v18", "v19", "v20", "v21"); \
169 break; \
170 case 4: \
d27c7ba6 171 __asm__ __volatile__( \
35b07497
RD
172 "lvx 21,0,%[SRC0]\n" \
173 "lvx 20,0,%[SRC1]\n" \
174 "lvx 19,0,%[SRC2]\n" \
175 "lvx 18,0,%[SRC3]\n" \
176 "vxor " VR0(r) "," VR0(r) ",21\n" \
177 "vxor " VR1(r) "," VR1(r) ",20\n" \
178 "vxor " VR2(r) "," VR2(r) ",19\n" \
179 "vxor " VR3(r) "," VR3(r) ",18\n" \
180 : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \
181 : [SRC0] "r" ((OFFSET(src, 0))), \
182 [SRC1] "r" ((OFFSET(src, 16))), \
183 [SRC2] "r" ((OFFSET(src, 32))), \
184 [SRC3] "r" ((OFFSET(src, 48))) \
185 : "v18", "v19", "v20", "v21"); \
186 break; \
187 case 2: \
d27c7ba6 188 __asm__ __volatile__( \
35b07497
RD
189 "lvx 21,0,%[SRC0]\n" \
190 "lvx 20,0,%[SRC1]\n" \
191 "vxor " VR0(r) "," VR0(r) ",21\n" \
192 "vxor " VR1(r) "," VR1(r) ",20\n" \
193 : UVR0(r), UVR1(r) \
194 : [SRC0] "r" ((OFFSET(src, 0))), \
195 [SRC1] "r" ((OFFSET(src, 16))) \
196 : "v20", "v21"); \
197 break; \
198 default: \
199 ZFS_ASM_BUG(); \
200 } \
201}
202
203#define XOR(r...) \
204{ \
205 switch (REG_CNT(r)) { \
206 case 8: \
d27c7ba6 207 __asm__ __volatile__( \
35b07497
RD
208 "vxor " VR4(r) "," VR4(r) "," VR0(r) "\n" \
209 "vxor " VR5(r) "," VR5(r) "," VR1(r) "\n" \
210 "vxor " VR6(r) "," VR6(r) "," VR2(r) "\n" \
211 "vxor " VR7(r) "," VR7(r) "," VR3(r) "\n" \
212 : UVR4(r), UVR5(r), UVR6(r), UVR7(r) \
213 : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \
214 break; \
215 case 4: \
d27c7ba6 216 __asm__ __volatile__( \
35b07497
RD
217 "vxor " VR2(r) "," VR2(r) "," VR0(r) "\n" \
218 "vxor " VR3(r) "," VR3(r) "," VR1(r) "\n" \
219 : UVR2(r), UVR3(r) \
220 : RVR0(r), RVR1(r)); \
221 break; \
222 default: \
223 ZFS_ASM_BUG(); \
224 } \
225}
226
227#define ZERO(r...) \
228{ \
229 switch (REG_CNT(r)) { \
230 case 8: \
d27c7ba6 231 __asm__ __volatile__( \
35b07497
RD
232 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \
233 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \
234 "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n" \
235 "vxor " VR3(r) "," VR3(r) "," VR3(r) "\n" \
236 "vxor " VR4(r) "," VR4(r) "," VR4(r) "\n" \
237 "vxor " VR5(r) "," VR5(r) "," VR5(r) "\n" \
238 "vxor " VR6(r) "," VR6(r) "," VR6(r) "\n" \
239 "vxor " VR7(r) "," VR7(r) "," VR7(r) "\n" \
240 : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \
241 WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \
242 break; \
243 case 4: \
d27c7ba6 244 __asm__ __volatile__( \
35b07497
RD
245 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \
246 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \
247 "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n" \
248 "vxor " VR3(r) "," VR3(r) "," VR3(r) "\n" \
249 : WVR0(r), WVR1(r), WVR2(r), WVR3(r)); \
250 break; \
251 case 2: \
d27c7ba6 252 __asm__ __volatile__( \
35b07497
RD
253 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \
254 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \
255 : WVR0(r), WVR1(r)); \
256 break; \
257 default: \
258 ZFS_ASM_BUG(); \
259 } \
260}
261
262#define COPY(r...) \
263{ \
264 switch (REG_CNT(r)) { \
265 case 8: \
d27c7ba6 266 __asm__ __volatile__( \
35b07497
RD
267 "vor " VR4(r) "," VR0(r) "," VR0(r) "\n" \
268 "vor " VR5(r) "," VR1(r) "," VR1(r) "\n" \
269 "vor " VR6(r) "," VR2(r) "," VR2(r) "\n" \
270 "vor " VR7(r) "," VR3(r) "," VR3(r) "\n" \
271 : WVR4(r), WVR5(r), WVR6(r), WVR7(r) \
272 : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \
273 break; \
274 case 4: \
d27c7ba6 275 __asm__ __volatile__( \
35b07497
RD
276 "vor " VR2(r) "," VR0(r) "," VR0(r) "\n" \
277 "vor " VR3(r) "," VR1(r) "," VR1(r) "\n" \
278 : WVR2(r), WVR3(r) \
279 : RVR0(r), RVR1(r)); \
280 break; \
281 default: \
282 ZFS_ASM_BUG(); \
283 } \
284}
285
286#define LOAD(src, r...) \
287{ \
288 switch (REG_CNT(r)) { \
289 case 8: \
d27c7ba6 290 __asm__ __volatile__( \
35b07497
RD
291 "lvx " VR0(r) " ,0,%[SRC0]\n" \
292 "lvx " VR1(r) " ,0,%[SRC1]\n" \
293 "lvx " VR2(r) " ,0,%[SRC2]\n" \
294 "lvx " VR3(r) " ,0,%[SRC3]\n" \
295 "lvx " VR4(r) " ,0,%[SRC4]\n" \
296 "lvx " VR5(r) " ,0,%[SRC5]\n" \
297 "lvx " VR6(r) " ,0,%[SRC6]\n" \
298 "lvx " VR7(r) " ,0,%[SRC7]\n" \
299 : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \
300 WVR4(r), WVR5(r), WVR6(r), WVR7(r) \
301 : [SRC0] "r" ((OFFSET(src, 0))), \
302 [SRC1] "r" ((OFFSET(src, 16))), \
303 [SRC2] "r" ((OFFSET(src, 32))), \
304 [SRC3] "r" ((OFFSET(src, 48))), \
305 [SRC4] "r" ((OFFSET(src, 64))), \
306 [SRC5] "r" ((OFFSET(src, 80))), \
307 [SRC6] "r" ((OFFSET(src, 96))), \
308 [SRC7] "r" ((OFFSET(src, 112)))); \
309 break; \
310 case 4: \
d27c7ba6 311 __asm__ __volatile__( \
35b07497
RD
312 "lvx " VR0(r) " ,0,%[SRC0]\n" \
313 "lvx " VR1(r) " ,0,%[SRC1]\n" \
314 "lvx " VR2(r) " ,0,%[SRC2]\n" \
315 "lvx " VR3(r) " ,0,%[SRC3]\n" \
316 : WVR0(r), WVR1(r), WVR2(r), WVR3(r) \
317 : [SRC0] "r" ((OFFSET(src, 0))), \
318 [SRC1] "r" ((OFFSET(src, 16))), \
319 [SRC2] "r" ((OFFSET(src, 32))), \
320 [SRC3] "r" ((OFFSET(src, 48)))); \
321 break; \
322 case 2: \
d27c7ba6 323 __asm__ __volatile__( \
35b07497
RD
324 "lvx " VR0(r) " ,0,%[SRC0]\n" \
325 "lvx " VR1(r) " ,0,%[SRC1]\n" \
326 : WVR0(r), WVR1(r) \
327 : [SRC0] "r" ((OFFSET(src, 0))), \
328 [SRC1] "r" ((OFFSET(src, 16)))); \
329 break; \
330 default: \
331 ZFS_ASM_BUG(); \
332 } \
333}
334
335#define STORE(dst, r...) \
336{ \
337 switch (REG_CNT(r)) { \
338 case 8: \
d27c7ba6 339 __asm__ __volatile__( \
35b07497
RD
340 "stvx " VR0(r) " ,0,%[DST0]\n" \
341 "stvx " VR1(r) " ,0,%[DST1]\n" \
342 "stvx " VR2(r) " ,0,%[DST2]\n" \
343 "stvx " VR3(r) " ,0,%[DST3]\n" \
344 "stvx " VR4(r) " ,0,%[DST4]\n" \
345 "stvx " VR5(r) " ,0,%[DST5]\n" \
346 "stvx " VR6(r) " ,0,%[DST6]\n" \
347 "stvx " VR7(r) " ,0,%[DST7]\n" \
348 : : [DST0] "r" ((OFFSET(dst, 0))), \
349 [DST1] "r" ((OFFSET(dst, 16))), \
350 [DST2] "r" ((OFFSET(dst, 32))), \
351 [DST3] "r" ((OFFSET(dst, 48))), \
352 [DST4] "r" ((OFFSET(dst, 64))), \
353 [DST5] "r" ((OFFSET(dst, 80))), \
354 [DST6] "r" ((OFFSET(dst, 96))), \
355 [DST7] "r" ((OFFSET(dst, 112))), \
356 RVR0(r), RVR1(r), RVR2(r), RVR3(r), \
357 RVR4(r), RVR5(r), RVR6(r), RVR7(r) \
358 : "memory"); \
359 break; \
360 case 4: \
d27c7ba6 361 __asm__ __volatile__( \
35b07497
RD
362 "stvx " VR0(r) " ,0,%[DST0]\n" \
363 "stvx " VR1(r) " ,0,%[DST1]\n" \
364 "stvx " VR2(r) " ,0,%[DST2]\n" \
365 "stvx " VR3(r) " ,0,%[DST3]\n" \
366 : : [DST0] "r" ((OFFSET(dst, 0))), \
367 [DST1] "r" ((OFFSET(dst, 16))), \
368 [DST2] "r" ((OFFSET(dst, 32))), \
369 [DST3] "r" ((OFFSET(dst, 48))), \
370 RVR0(r), RVR1(r), RVR2(r), RVR3(r) \
371 : "memory"); \
372 break; \
373 case 2: \
d27c7ba6 374 __asm__ __volatile__( \
35b07497
RD
375 "stvx " VR0(r) " ,0,%[DST0]\n" \
376 "stvx " VR1(r) " ,0,%[DST1]\n" \
377 : : [DST0] "r" ((OFFSET(dst, 0))), \
378 [DST1] "r" ((OFFSET(dst, 16))), \
379 RVR0(r), RVR1(r) : "memory"); \
380 break; \
381 default: \
382 ZFS_ASM_BUG(); \
383 } \
384}
385
386/*
387 * Unfortunately cannot use the macro, because GCC
388 * will try to use the macro name and not value
389 * later on...
390 * Kept as a reference to what a numbered variable is
391 */
392#define _00 "17"
393#define _1d "16"
394#define _temp0 "19"
395#define _temp1 "18"
396
397#define MUL2_SETUP() \
398{ \
d27c7ba6 399 __asm__ __volatile__( \
35b07497
RD
400 "vspltisb " VR(16) ",14\n" \
401 "vspltisb " VR(17) ",15\n" \
402 "vaddubm " VR(16) "," VR(17) "," VR(16) "\n" \
403 "vxor " VR(17) "," VR(17) "," VR(17) "\n" \
404 : WVR(16), WVR(17)); \
405}
406
407#define MUL2(r...) \
408{ \
409 switch (REG_CNT(r)) { \
410 case 4: \
d27c7ba6 411 __asm__ __volatile__( \
35b07497
RD
412 "vcmpgtsb 19," VR(17) "," VR0(r) "\n" \
413 "vcmpgtsb 18," VR(17) "," VR1(r) "\n" \
414 "vcmpgtsb 21," VR(17) "," VR2(r) "\n" \
415 "vcmpgtsb 20," VR(17) "," VR3(r) "\n" \
416 "vand 19,19," VR(16) "\n" \
417 "vand 18,18," VR(16) "\n" \
418 "vand 21,21," VR(16) "\n" \
419 "vand 20,20," VR(16) "\n" \
420 "vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n" \
421 "vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n" \
422 "vaddubm " VR2(r) "," VR2(r) "," VR2(r) "\n" \
423 "vaddubm " VR3(r) "," VR3(r) "," VR3(r) "\n" \
424 "vxor " VR0(r) ",19," VR0(r) "\n" \
425 "vxor " VR1(r) ",18," VR1(r) "\n" \
426 "vxor " VR2(r) ",21," VR2(r) "\n" \
427 "vxor " VR3(r) ",20," VR3(r) "\n" \
428 : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \
429 : RVR(17), RVR(16) \
430 : "v18", "v19", "v20", "v21"); \
431 break; \
432 case 2: \
d27c7ba6 433 __asm__ __volatile__( \
35b07497
RD
434 "vcmpgtsb 19," VR(17) "," VR0(r) "\n" \
435 "vcmpgtsb 18," VR(17) "," VR1(r) "\n" \
436 "vand 19,19," VR(16) "\n" \
437 "vand 18,18," VR(16) "\n" \
438 "vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n" \
439 "vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n" \
440 "vxor " VR0(r) ",19," VR0(r) "\n" \
441 "vxor " VR1(r) ",18," VR1(r) "\n" \
442 : UVR0(r), UVR1(r) \
443 : RVR(17), RVR(16) \
444 : "v18", "v19"); \
445 break; \
446 default: \
447 ZFS_ASM_BUG(); \
448 } \
449}
450
451#define MUL4(r...) \
452{ \
453 MUL2(r); \
454 MUL2(r); \
455}
456
457/*
458 * Unfortunately cannot use the macro, because GCC
459 * will try to use the macro name and not value
460 * later on...
461 * Kept as a reference to what a register is
462 * (here we're using actual registers for the
463 * clobbered ones)
464 */
465#define _0f "15"
466#define _a_save "14"
467#define _b_save "13"
468#define _lt_mod_a "12"
469#define _lt_clmul_a "11"
470#define _lt_mod_b "10"
471#define _lt_clmul_b "15"
472
473#define _MULx2(c, r...) \
474{ \
475 switch (REG_CNT(r)) { \
476 case 2: \
d27c7ba6 477 __asm__ __volatile__( \
35b07497
RD
478 /* lts for upper part */ \
479 "vspltisb 15,15\n" \
480 "lvx 10,0,%[lt0]\n" \
481 "lvx 11,0,%[lt1]\n" \
482 /* upper part */ \
483 "vand 14," VR0(r) ",15\n" \
484 "vand 13," VR1(r) ",15\n" \
485 "vspltisb 15,4\n" \
486 "vsrab " VR0(r) "," VR0(r) ",15\n" \
487 "vsrab " VR1(r) "," VR1(r) ",15\n" \
488 \
489 "vperm 12,10,10," VR0(r) "\n" \
490 "vperm 10,10,10," VR1(r) "\n" \
491 "vperm 15,11,11," VR0(r) "\n" \
492 "vperm 11,11,11," VR1(r) "\n" \
493 \
494 "vxor " VR0(r) ",15,12\n" \
495 "vxor " VR1(r) ",11,10\n" \
496 /* lts for lower part */ \
497 "lvx 10,0,%[lt2]\n" \
498 "lvx 15,0,%[lt3]\n" \
499 /* lower part */ \
500 "vperm 12,10,10,14\n" \
501 "vperm 10,10,10,13\n" \
502 "vperm 11,15,15,14\n" \
503 "vperm 15,15,15,13\n" \
504 \
505 "vxor " VR0(r) "," VR0(r) ",12\n" \
506 "vxor " VR1(r) "," VR1(r) ",10\n" \
507 "vxor " VR0(r) "," VR0(r) ",11\n" \
508 "vxor " VR1(r) "," VR1(r) ",15\n" \
509 : UVR0(r), UVR1(r) \
510 : [lt0] "r" (&(gf_clmul_mod_lt[4*(c)+0][0])), \
511 [lt1] "r" (&(gf_clmul_mod_lt[4*(c)+1][0])), \
512 [lt2] "r" (&(gf_clmul_mod_lt[4*(c)+2][0])), \
513 [lt3] "r" (&(gf_clmul_mod_lt[4*(c)+3][0])) \
514 : "v10", "v11", "v12", "v13", "v14", "v15"); \
515 break; \
516 default: \
517 ZFS_ASM_BUG(); \
518 } \
519}
520
521#define MUL(c, r...) \
522{ \
523 switch (REG_CNT(r)) { \
524 case 4: \
525 _MULx2(c, R_23(r)); \
526 _MULx2(c, R_01(r)); \
527 break; \
528 case 2: \
529 _MULx2(c, R_01(r)); \
530 break; \
531 default: \
532 ZFS_ASM_BUG(); \
533 } \
534}
535
536#define raidz_math_begin() kfpu_begin()
537#define raidz_math_end() kfpu_end()
538
539/* Overkill... */
540#if 0 // defined(_KERNEL)
541#define GEN_X_DEFINE_0_3() \
542register unsigned char w0 asm("0") __attribute__((vector_size(16))); \
543register unsigned char w1 asm("1") __attribute__((vector_size(16))); \
544register unsigned char w2 asm("2") __attribute__((vector_size(16))); \
545register unsigned char w3 asm("3") __attribute__((vector_size(16)));
546#define GEN_X_DEFINE_4_5() \
547register unsigned char w4 asm("4") __attribute__((vector_size(16))); \
548register unsigned char w5 asm("5") __attribute__((vector_size(16)));
549#define GEN_X_DEFINE_6_7() \
550register unsigned char w6 asm("6") __attribute__((vector_size(16))); \
551register unsigned char w7 asm("7") __attribute__((vector_size(16)));
552#define GEN_X_DEFINE_8_9() \
553register unsigned char w8 asm("8") __attribute__((vector_size(16))); \
554register unsigned char w9 asm("9") __attribute__((vector_size(16)));
555#define GEN_X_DEFINE_10_11() \
556register unsigned char w10 asm("10") __attribute__((vector_size(16))); \
557register unsigned char w11 asm("11") __attribute__((vector_size(16)));
558#define GEN_X_DEFINE_12_15() \
559register unsigned char w12 asm("12") __attribute__((vector_size(16))); \
560register unsigned char w13 asm("13") __attribute__((vector_size(16))); \
561register unsigned char w14 asm("14") __attribute__((vector_size(16))); \
562register unsigned char w15 asm("15") __attribute__((vector_size(16)));
563#define GEN_X_DEFINE_16() \
564register unsigned char w16 asm("16") __attribute__((vector_size(16)));
565#define GEN_X_DEFINE_17() \
566register unsigned char w17 asm("17") __attribute__((vector_size(16)));
567#define GEN_X_DEFINE_18_21() \
568register unsigned char w18 asm("18") __attribute__((vector_size(16))); \
569register unsigned char w19 asm("19") __attribute__((vector_size(16))); \
570register unsigned char w20 asm("20") __attribute__((vector_size(16))); \
571register unsigned char w21 asm("21") __attribute__((vector_size(16)));
572#define GEN_X_DEFINE_22_23() \
573register unsigned char w22 asm("22") __attribute__((vector_size(16))); \
574register unsigned char w23 asm("23") __attribute__((vector_size(16)));
575#define GEN_X_DEFINE_24_27() \
576register unsigned char w24 asm("24") __attribute__((vector_size(16))); \
577register unsigned char w25 asm("25") __attribute__((vector_size(16))); \
578register unsigned char w26 asm("26") __attribute__((vector_size(16))); \
579register unsigned char w27 asm("27") __attribute__((vector_size(16)));
580#define GEN_X_DEFINE_28_30() \
581register unsigned char w28 asm("28") __attribute__((vector_size(16))); \
582register unsigned char w29 asm("29") __attribute__((vector_size(16))); \
583register unsigned char w30 asm("30") __attribute__((vector_size(16)));
584#define GEN_X_DEFINE_31() \
585register unsigned char w31 asm("31") __attribute__((vector_size(16)));
586#define GEN_X_DEFINE_32() \
587register unsigned char w32 asm("31") __attribute__((vector_size(16)));
588#define GEN_X_DEFINE_33_36() \
589register unsigned char w33 asm("31") __attribute__((vector_size(16))); \
590register unsigned char w34 asm("31") __attribute__((vector_size(16))); \
591register unsigned char w35 asm("31") __attribute__((vector_size(16))); \
592register unsigned char w36 asm("31") __attribute__((vector_size(16)));
593#define GEN_X_DEFINE_37_38() \
594register unsigned char w37 asm("31") __attribute__((vector_size(16))); \
595register unsigned char w38 asm("31") __attribute__((vector_size(16)));
596#define GEN_X_DEFINE_ALL() \
597 GEN_X_DEFINE_0_3() \
598 GEN_X_DEFINE_4_5() \
599 GEN_X_DEFINE_6_7() \
600 GEN_X_DEFINE_8_9() \
601 GEN_X_DEFINE_10_11() \
602 GEN_X_DEFINE_12_15() \
603 GEN_X_DEFINE_16() \
604 GEN_X_DEFINE_17() \
605 GEN_X_DEFINE_18_21() \
606 GEN_X_DEFINE_22_23() \
607 GEN_X_DEFINE_24_27() \
608 GEN_X_DEFINE_28_30() \
609 GEN_X_DEFINE_31() \
610 GEN_X_DEFINE_32() \
611 GEN_X_DEFINE_33_36() \
612 GEN_X_DEFINE_37_38()
613#else
614#define GEN_X_DEFINE_0_3() \
615 unsigned char w0 __attribute__((vector_size(16))); \
616 unsigned char w1 __attribute__((vector_size(16))); \
617 unsigned char w2 __attribute__((vector_size(16))); \
618 unsigned char w3 __attribute__((vector_size(16)));
619#define GEN_X_DEFINE_4_5() \
620 unsigned char w4 __attribute__((vector_size(16))); \
621 unsigned char w5 __attribute__((vector_size(16)));
622#define GEN_X_DEFINE_6_7() \
623 unsigned char w6 __attribute__((vector_size(16))); \
624 unsigned char w7 __attribute__((vector_size(16)));
625#define GEN_X_DEFINE_8_9() \
626 unsigned char w8 __attribute__((vector_size(16))); \
627 unsigned char w9 __attribute__((vector_size(16)));
628#define GEN_X_DEFINE_10_11() \
629 unsigned char w10 __attribute__((vector_size(16))); \
630 unsigned char w11 __attribute__((vector_size(16)));
631#define GEN_X_DEFINE_12_15() \
632 unsigned char w12 __attribute__((vector_size(16))); \
633 unsigned char w13 __attribute__((vector_size(16))); \
634 unsigned char w14 __attribute__((vector_size(16))); \
635 unsigned char w15 __attribute__((vector_size(16)));
636#define GEN_X_DEFINE_16() \
637 unsigned char w16 __attribute__((vector_size(16)));
638#define GEN_X_DEFINE_17() \
639 unsigned char w17 __attribute__((vector_size(16)));
640#define GEN_X_DEFINE_18_21() \
641 unsigned char w18 __attribute__((vector_size(16))); \
642 unsigned char w19 __attribute__((vector_size(16))); \
643 unsigned char w20 __attribute__((vector_size(16))); \
644 unsigned char w21 __attribute__((vector_size(16)));
645#define GEN_X_DEFINE_22_23() \
646 unsigned char w22 __attribute__((vector_size(16))); \
647 unsigned char w23 __attribute__((vector_size(16)));
648#define GEN_X_DEFINE_24_27() \
649 unsigned char w24 __attribute__((vector_size(16))); \
650 unsigned char w25 __attribute__((vector_size(16))); \
651 unsigned char w26 __attribute__((vector_size(16))); \
652 unsigned char w27 __attribute__((vector_size(16)));
653#define GEN_X_DEFINE_28_30() \
654 unsigned char w28 __attribute__((vector_size(16))); \
655 unsigned char w29 __attribute__((vector_size(16))); \
656 unsigned char w30 __attribute__((vector_size(16)));
657#define GEN_X_DEFINE_31() \
658 unsigned char w31 __attribute__((vector_size(16)));
659#define GEN_X_DEFINE_32() \
660 unsigned char w32 __attribute__((vector_size(16)));
661#define GEN_X_DEFINE_33_36() \
662 unsigned char w33 __attribute__((vector_size(16))); \
663 unsigned char w34 __attribute__((vector_size(16))); \
664 unsigned char w35 __attribute__((vector_size(16))); \
665 unsigned char w36 __attribute__((vector_size(16)));
666#define GEN_X_DEFINE_37_38() \
667 unsigned char w37 __attribute__((vector_size(16))); \
668 unsigned char w38 __attribute__((vector_size(16)));
669#define GEN_X_DEFINE_ALL() \
670 GEN_X_DEFINE_0_3() \
671 GEN_X_DEFINE_4_5() \
672 GEN_X_DEFINE_6_7() \
673 GEN_X_DEFINE_8_9() \
674 GEN_X_DEFINE_10_11() \
675 GEN_X_DEFINE_12_15() \
676 GEN_X_DEFINE_16() \
677 GEN_X_DEFINE_17() \
678 GEN_X_DEFINE_18_21() \
679 GEN_X_DEFINE_22_23() \
680 GEN_X_DEFINE_24_27() \
681 GEN_X_DEFINE_28_30() \
682 GEN_X_DEFINE_31() \
683 GEN_X_DEFINE_32() \
684 GEN_X_DEFINE_33_36() \
685 GEN_X_DEFINE_37_38()
686#endif