]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/vdev_raidz_math_aarch64_neon_common.h
Linux 5.0 compat: ASM_BUG macro
[mirror_zfs.git] / module / zfs / vdev_raidz_math_aarch64_neon_common.h
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
23 */
24
25 #include <sys/types.h>
26 #include <linux/simd_aarch64.h>
27
28 #define __asm __asm__ __volatile__
29
30 #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
31 #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
32
33 #define VR0_(REG, ...) "%[w"#REG"]"
34 #define VR1_(_1, REG, ...) "%[w"#REG"]"
35 #define VR2_(_1, _2, REG, ...) "%[w"#REG"]"
36 #define VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
37 #define VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
38 #define VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
39 #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
40 #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
41
42 /*
43 * Here we need registers not used otherwise.
44 * They will be used in unused ASM for the case
45 * with more registers than required... but GGC
46 * will still need to make sure the constraints
47 * are correct, and duplicate constraints are illegal
48 * ... and we use the "register" number as a name
49 */
50
51 #define VR0(r...) VR0_(r)
52 #define VR1(r...) VR1_(r)
53 #define VR2(r...) VR2_(r, 36)
54 #define VR3(r...) VR3_(r, 36, 35)
55 #define VR4(r...) VR4_(r, 36, 35, 34, 33)
56 #define VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
57 #define VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
58 #define VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
59
60 #define VR(X) "%[w"#X"]"
61
62 #define RVR0_(REG, ...) [w##REG] "w" (w##REG)
63 #define RVR1_(_1, REG, ...) [w##REG] "w" (w##REG)
64 #define RVR2_(_1, _2, REG, ...) [w##REG] "w" (w##REG)
65 #define RVR3_(_1, _2, _3, REG, ...) [w##REG] "w" (w##REG)
66 #define RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "w" (w##REG)
67 #define RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "w" (w##REG)
68 #define RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "w" (w##REG)
69 #define RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "w" (w##REG)
70
71 #define RVR0(r...) RVR0_(r)
72 #define RVR1(r...) RVR1_(r)
73 #define RVR2(r...) RVR2_(r, 36)
74 #define RVR3(r...) RVR3_(r, 36, 35)
75 #define RVR4(r...) RVR4_(r, 36, 35, 34, 33)
76 #define RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
77 #define RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
78 #define RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
79
80 #define RVR(X) [w##X] "w" (w##X)
81
82 #define WVR0_(REG, ...) [w##REG] "=w" (w##REG)
83 #define WVR1_(_1, REG, ...) [w##REG] "=w" (w##REG)
84 #define WVR2_(_1, _2, REG, ...) [w##REG] "=w" (w##REG)
85 #define WVR3_(_1, _2, _3, REG, ...) [w##REG] "=w" (w##REG)
86 #define WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=w" (w##REG)
87 #define WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=w" (w##REG)
88 #define WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=w" (w##REG)
89 #define WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=w" (w##REG)
90
91 #define WVR0(r...) WVR0_(r)
92 #define WVR1(r...) WVR1_(r)
93 #define WVR2(r...) WVR2_(r, 36)
94 #define WVR3(r...) WVR3_(r, 36, 35)
95 #define WVR4(r...) WVR4_(r, 36, 35, 34, 33)
96 #define WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
97 #define WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
98 #define WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
99
100 #define WVR(X) [w##X] "=w" (w##X)
101
102 #define UVR0_(REG, ...) [w##REG] "+&w" (w##REG)
103 #define UVR1_(_1, REG, ...) [w##REG] "+&w" (w##REG)
104 #define UVR2_(_1, _2, REG, ...) [w##REG] "+&w" (w##REG)
105 #define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&w" (w##REG)
106 #define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&w" (w##REG)
107 #define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&w" (w##REG)
108 #define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&w" (w##REG)
109 #define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&w" (w##REG)
110
111 #define UVR0(r...) UVR0_(r)
112 #define UVR1(r...) UVR1_(r)
113 #define UVR2(r...) UVR2_(r, 36)
114 #define UVR3(r...) UVR3_(r, 36, 35)
115 #define UVR4(r...) UVR4_(r, 36, 35, 34, 33)
116 #define UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
117 #define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
118 #define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
119
120 #define UVR(X) [w##X] "+&w" (w##X)
121
122 #define R_01(REG1, REG2, ...) REG1, REG2
123 #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
124 #define R_23(REG...) _R_23(REG, 1, 2, 3)
125
126 #define ZFS_ASM_BUG() ASSERT(0)
127
128 #define OFFSET(ptr, val) (((unsigned char *)(ptr))+val)
129
130 extern const uint8_t gf_clmul_mod_lt[4*256][16];
131
132 #define ELEM_SIZE 16
133
134 typedef struct v {
135 uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
136 } v_t;
137
138 #define XOR_ACC(src, r...) \
139 { \
140 switch (REG_CNT(r)) { \
141 case 8: \
142 __asm( \
143 "ld1 { v21.4s },%[SRC0]\n" \
144 "ld1 { v20.4s },%[SRC1]\n" \
145 "ld1 { v19.4s },%[SRC2]\n" \
146 "ld1 { v18.4s },%[SRC3]\n" \
147 "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \
148 "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \
149 "eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n" \
150 "eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n" \
151 "ld1 { v21.4s },%[SRC4]\n" \
152 "ld1 { v20.4s },%[SRC5]\n" \
153 "ld1 { v19.4s },%[SRC6]\n" \
154 "ld1 { v18.4s },%[SRC7]\n" \
155 "eor " VR4(r) ".16b," VR4(r) ".16b,v21.16b\n" \
156 "eor " VR5(r) ".16b," VR5(r) ".16b,v20.16b\n" \
157 "eor " VR6(r) ".16b," VR6(r) ".16b,v19.16b\n" \
158 "eor " VR7(r) ".16b," VR7(r) ".16b,v18.16b\n" \
159 : UVR0(r), UVR1(r), UVR2(r), UVR3(r), \
160 UVR4(r), UVR5(r), UVR6(r), UVR7(r) \
161 : [SRC0] "Q" (*(OFFSET(src, 0))), \
162 [SRC1] "Q" (*(OFFSET(src, 16))), \
163 [SRC2] "Q" (*(OFFSET(src, 32))), \
164 [SRC3] "Q" (*(OFFSET(src, 48))), \
165 [SRC4] "Q" (*(OFFSET(src, 64))), \
166 [SRC5] "Q" (*(OFFSET(src, 80))), \
167 [SRC6] "Q" (*(OFFSET(src, 96))), \
168 [SRC7] "Q" (*(OFFSET(src, 112))) \
169 : "v18", "v19", "v20", "v21"); \
170 break; \
171 case 4: \
172 __asm( \
173 "ld1 { v21.4s },%[SRC0]\n" \
174 "ld1 { v20.4s },%[SRC1]\n" \
175 "ld1 { v19.4s },%[SRC2]\n" \
176 "ld1 { v18.4s },%[SRC3]\n" \
177 "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \
178 "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \
179 "eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n" \
180 "eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n" \
181 : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \
182 : [SRC0] "Q" (*(OFFSET(src, 0))), \
183 [SRC1] "Q" (*(OFFSET(src, 16))), \
184 [SRC2] "Q" (*(OFFSET(src, 32))), \
185 [SRC3] "Q" (*(OFFSET(src, 48))) \
186 : "v18", "v19", "v20", "v21"); \
187 break; \
188 case 2: \
189 __asm( \
190 "ld1 { v21.4s },%[SRC0]\n" \
191 "ld1 { v20.4s },%[SRC1]\n" \
192 "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \
193 "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \
194 : UVR0(r), UVR1(r) \
195 : [SRC0] "Q" (*(OFFSET(src, 0))), \
196 [SRC1] "Q" (*(OFFSET(src, 16))) \
197 : "v20", "v21"); \
198 break; \
199 default: \
200 ZFS_ASM_BUG(); \
201 } \
202 }
203
204 #define XOR(r...) \
205 { \
206 switch (REG_CNT(r)) { \
207 case 8: \
208 __asm( \
209 "eor " VR4(r) ".16b," VR4(r) ".16b," VR0(r) ".16b\n" \
210 "eor " VR5(r) ".16b," VR5(r) ".16b," VR1(r) ".16b\n" \
211 "eor " VR6(r) ".16b," VR6(r) ".16b," VR2(r) ".16b\n" \
212 "eor " VR7(r) ".16b," VR7(r) ".16b," VR3(r) ".16b\n" \
213 : UVR4(r), UVR5(r), UVR6(r), UVR7(r) \
214 : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \
215 break; \
216 case 4: \
217 __asm( \
218 "eor " VR2(r) ".16b," VR2(r) ".16b," VR0(r) ".16b\n" \
219 "eor " VR3(r) ".16b," VR3(r) ".16b," VR1(r) ".16b\n" \
220 : UVR2(r), UVR3(r) \
221 : RVR0(r), RVR1(r)); \
222 break; \
223 default: \
224 ZFS_ASM_BUG(); \
225 } \
226 }
227
228 #define ZERO(r...) \
229 { \
230 switch (REG_CNT(r)) { \
231 case 8: \
232 __asm( \
233 "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
234 "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \
235 "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n" \
236 "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n" \
237 "eor " VR4(r) ".16b," VR4(r) ".16b," VR4(r) ".16b\n" \
238 "eor " VR5(r) ".16b," VR5(r) ".16b," VR5(r) ".16b\n" \
239 "eor " VR6(r) ".16b," VR6(r) ".16b," VR6(r) ".16b\n" \
240 "eor " VR7(r) ".16b," VR7(r) ".16b," VR7(r) ".16b\n" \
241 : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \
242 WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \
243 break; \
244 case 4: \
245 __asm( \
246 "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
247 "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \
248 "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n" \
249 "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n" \
250 : WVR0(r), WVR1(r), WVR2(r), WVR3(r)); \
251 break; \
252 case 2: \
253 __asm( \
254 "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
255 "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \
256 : WVR0(r), WVR1(r)); \
257 break; \
258 default: \
259 ZFS_ASM_BUG(); \
260 } \
261 }
262
263 #define COPY(r...) \
264 { \
265 switch (REG_CNT(r)) { \
266 case 8: \
267 __asm( \
268 "mov " VR4(r) ".16b," VR0(r) ".16b\n" \
269 "mov " VR5(r) ".16b," VR1(r) ".16b\n" \
270 "mov " VR6(r) ".16b," VR2(r) ".16b\n" \
271 "mov " VR7(r) ".16b," VR3(r) ".16b\n" \
272 : WVR4(r), WVR5(r), WVR6(r), WVR7(r) \
273 : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \
274 break; \
275 case 4: \
276 __asm( \
277 "mov " VR2(r) ".16b," VR0(r) ".16b\n" \
278 "mov " VR3(r) ".16b," VR1(r) ".16b\n" \
279 : WVR2(r), WVR3(r) \
280 : RVR0(r), RVR1(r)); \
281 break; \
282 default: \
283 ZFS_ASM_BUG(); \
284 } \
285 }
286
287 #define LOAD(src, r...) \
288 { \
289 switch (REG_CNT(r)) { \
290 case 8: \
291 __asm( \
292 "ld1 { " VR0(r) ".4s },%[SRC0]\n" \
293 "ld1 { " VR1(r) ".4s },%[SRC1]\n" \
294 "ld1 { " VR2(r) ".4s },%[SRC2]\n" \
295 "ld1 { " VR3(r) ".4s },%[SRC3]\n" \
296 "ld1 { " VR4(r) ".4s },%[SRC4]\n" \
297 "ld1 { " VR5(r) ".4s },%[SRC5]\n" \
298 "ld1 { " VR6(r) ".4s },%[SRC6]\n" \
299 "ld1 { " VR7(r) ".4s },%[SRC7]\n" \
300 : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \
301 WVR4(r), WVR5(r), WVR6(r), WVR7(r) \
302 : [SRC0] "Q" (*(OFFSET(src, 0))), \
303 [SRC1] "Q" (*(OFFSET(src, 16))), \
304 [SRC2] "Q" (*(OFFSET(src, 32))), \
305 [SRC3] "Q" (*(OFFSET(src, 48))), \
306 [SRC4] "Q" (*(OFFSET(src, 64))), \
307 [SRC5] "Q" (*(OFFSET(src, 80))), \
308 [SRC6] "Q" (*(OFFSET(src, 96))), \
309 [SRC7] "Q" (*(OFFSET(src, 112)))); \
310 break; \
311 case 4: \
312 __asm( \
313 "ld1 { " VR0(r) ".4s },%[SRC0]\n" \
314 "ld1 { " VR1(r) ".4s },%[SRC1]\n" \
315 "ld1 { " VR2(r) ".4s },%[SRC2]\n" \
316 "ld1 { " VR3(r) ".4s },%[SRC3]\n" \
317 : WVR0(r), WVR1(r), WVR2(r), WVR3(r) \
318 : [SRC0] "Q" (*(OFFSET(src, 0))), \
319 [SRC1] "Q" (*(OFFSET(src, 16))), \
320 [SRC2] "Q" (*(OFFSET(src, 32))), \
321 [SRC3] "Q" (*(OFFSET(src, 48)))); \
322 break; \
323 case 2: \
324 __asm( \
325 "ld1 { " VR0(r) ".4s },%[SRC0]\n" \
326 "ld1 { " VR1(r) ".4s },%[SRC1]\n" \
327 : WVR0(r), WVR1(r) \
328 : [SRC0] "Q" (*(OFFSET(src, 0))), \
329 [SRC1] "Q" (*(OFFSET(src, 16)))); \
330 break; \
331 default: \
332 ZFS_ASM_BUG(); \
333 } \
334 }
335
336 #define STORE(dst, r...) \
337 { \
338 switch (REG_CNT(r)) { \
339 case 8: \
340 __asm( \
341 "st1 { " VR0(r) ".4s },%[DST0]\n" \
342 "st1 { " VR1(r) ".4s },%[DST1]\n" \
343 "st1 { " VR2(r) ".4s },%[DST2]\n" \
344 "st1 { " VR3(r) ".4s },%[DST3]\n" \
345 "st1 { " VR4(r) ".4s },%[DST4]\n" \
346 "st1 { " VR5(r) ".4s },%[DST5]\n" \
347 "st1 { " VR6(r) ".4s },%[DST6]\n" \
348 "st1 { " VR7(r) ".4s },%[DST7]\n" \
349 : [DST0] "=Q" (*(OFFSET(dst, 0))), \
350 [DST1] "=Q" (*(OFFSET(dst, 16))), \
351 [DST2] "=Q" (*(OFFSET(dst, 32))), \
352 [DST3] "=Q" (*(OFFSET(dst, 48))), \
353 [DST4] "=Q" (*(OFFSET(dst, 64))), \
354 [DST5] "=Q" (*(OFFSET(dst, 80))), \
355 [DST6] "=Q" (*(OFFSET(dst, 96))), \
356 [DST7] "=Q" (*(OFFSET(dst, 112))) \
357 : RVR0(r), RVR1(r), RVR2(r), RVR3(r), \
358 RVR4(r), RVR5(r), RVR6(r), RVR7(r)); \
359 break; \
360 case 4: \
361 __asm( \
362 "st1 { " VR0(r) ".4s },%[DST0]\n" \
363 "st1 { " VR1(r) ".4s },%[DST1]\n" \
364 "st1 { " VR2(r) ".4s },%[DST2]\n" \
365 "st1 { " VR3(r) ".4s },%[DST3]\n" \
366 : [DST0] "=Q" (*(OFFSET(dst, 0))), \
367 [DST1] "=Q" (*(OFFSET(dst, 16))), \
368 [DST2] "=Q" (*(OFFSET(dst, 32))), \
369 [DST3] "=Q" (*(OFFSET(dst, 48))) \
370 : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \
371 break; \
372 case 2: \
373 __asm( \
374 "st1 { " VR0(r) ".4s },%[DST0]\n" \
375 "st1 { " VR1(r) ".4s },%[DST1]\n" \
376 : [DST0] "=Q" (*(OFFSET(dst, 0))), \
377 [DST1] "=Q" (*(OFFSET(dst, 16))) \
378 : RVR0(r), RVR1(r)); \
379 break; \
380 default: \
381 ZFS_ASM_BUG(); \
382 } \
383 }
384
385 /*
386 * Unfortunately cannot use the macro, because GCC
387 * will try to use the macro name and not value
388 * later on...
389 * Kept as a reference to what a numbered variable is
390 */
391 #define _00 "v17"
392 #define _1d "v16"
393 #define _temp0 "v19"
394 #define _temp1 "v18"
395
396 #define MUL2_SETUP() \
397 { \
398 __asm( \
399 "eor " VR(17) ".16b," VR(17) ".16b," VR(17) ".16b\n" \
400 "movi " VR(16) ".16b,#0x1d\n" \
401 : WVR(16), WVR(17)); \
402 }
403
404 #define MUL2(r...) \
405 { \
406 switch (REG_CNT(r)) { \
407 case 4: \
408 __asm( \
409 "cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n" \
410 "cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n" \
411 "cmgt v21.16b," VR(17) ".16b," VR2(r) ".16b\n" \
412 "cmgt v20.16b," VR(17) ".16b," VR3(r) ".16b\n" \
413 "and v19.16b,v19.16b," VR(16) ".16b\n" \
414 "and v18.16b,v18.16b," VR(16) ".16b\n" \
415 "and v21.16b,v21.16b," VR(16) ".16b\n" \
416 "and v20.16b,v20.16b," VR(16) ".16b\n" \
417 "shl " VR0(r) ".16b," VR0(r) ".16b,#1\n" \
418 "shl " VR1(r) ".16b," VR1(r) ".16b,#1\n" \
419 "shl " VR2(r) ".16b," VR2(r) ".16b,#1\n" \
420 "shl " VR3(r) ".16b," VR3(r) ".16b,#1\n" \
421 "eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n" \
422 "eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n" \
423 "eor " VR2(r) ".16b,v21.16b," VR2(r) ".16b\n" \
424 "eor " VR3(r) ".16b,v20.16b," VR3(r) ".16b\n" \
425 : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \
426 : RVR(17), RVR(16) \
427 : "v18", "v19", "v20", "v21"); \
428 break; \
429 case 2: \
430 __asm( \
431 "cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n" \
432 "cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n" \
433 "and v19.16b,v19.16b," VR(16) ".16b\n" \
434 "and v18.16b,v18.16b," VR(16) ".16b\n" \
435 "shl " VR0(r) ".16b," VR0(r) ".16b,#1\n" \
436 "shl " VR1(r) ".16b," VR1(r) ".16b,#1\n" \
437 "eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n" \
438 "eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n" \
439 : UVR0(r), UVR1(r) \
440 : RVR(17), RVR(16) \
441 : "v18", "v19"); \
442 break; \
443 default: \
444 ZFS_ASM_BUG(); \
445 } \
446 }
447
448 #define MUL4(r...) \
449 { \
450 MUL2(r); \
451 MUL2(r); \
452 }
453
454 /*
455 * Unfortunately cannot use the macro, because GCC
456 * will try to use the macro name and not value
457 * later on...
458 * Kept as a reference to what a register is
459 * (here we're using actual registers for the
460 * clobbered ones)
461 */
462 #define _0f "v15"
463 #define _a_save "v14"
464 #define _b_save "v13"
465 #define _lt_mod_a "v12"
466 #define _lt_clmul_a "v11"
467 #define _lt_mod_b "v10"
468 #define _lt_clmul_b "v15"
469
470 #define _MULx2(c, r...) \
471 { \
472 switch (REG_CNT(r)) { \
473 case 2: \
474 __asm( \
475 /* lts for upper part */ \
476 "movi v15.16b,#0x0f\n" \
477 "ld1 { v10.4s },%[lt0]\n" \
478 "ld1 { v11.4s },%[lt1]\n" \
479 /* upper part */ \
480 "and v14.16b," VR0(r) ".16b,v15.16b\n" \
481 "and v13.16b," VR1(r) ".16b,v15.16b\n" \
482 "sshr " VR0(r) ".8h," VR0(r) ".8h,#4\n" \
483 "sshr " VR1(r) ".8h," VR1(r) ".8h,#4\n" \
484 "and " VR0(r) ".16b," VR0(r) ".16b,v15.16b\n" \
485 "and " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n" \
486 \
487 "tbl v12.16b,{v10.16b}," VR0(r) ".16b\n" \
488 "tbl v10.16b,{v10.16b}," VR1(r) ".16b\n" \
489 "tbl v15.16b,{v11.16b}," VR0(r) ".16b\n" \
490 "tbl v11.16b,{v11.16b}," VR1(r) ".16b\n" \
491 \
492 "eor " VR0(r) ".16b,v15.16b,v12.16b\n" \
493 "eor " VR1(r) ".16b,v11.16b,v10.16b\n" \
494 /* lts for lower part */ \
495 "ld1 { v10.4s },%[lt2]\n" \
496 "ld1 { v15.4s },%[lt3]\n" \
497 /* lower part */ \
498 "tbl v12.16b,{v10.16b},v14.16b\n" \
499 "tbl v10.16b,{v10.16b},v13.16b\n" \
500 "tbl v11.16b,{v15.16b},v14.16b\n" \
501 "tbl v15.16b,{v15.16b},v13.16b\n" \
502 \
503 "eor " VR0(r) ".16b," VR0(r) ".16b,v12.16b\n" \
504 "eor " VR1(r) ".16b," VR1(r) ".16b,v10.16b\n" \
505 "eor " VR0(r) ".16b," VR0(r) ".16b,v11.16b\n" \
506 "eor " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n" \
507 : UVR0(r), UVR1(r) \
508 : [lt0] "Q" ((gf_clmul_mod_lt[4*(c)+0][0])), \
509 [lt1] "Q" ((gf_clmul_mod_lt[4*(c)+1][0])), \
510 [lt2] "Q" ((gf_clmul_mod_lt[4*(c)+2][0])), \
511 [lt3] "Q" ((gf_clmul_mod_lt[4*(c)+3][0])) \
512 : "v10", "v11", "v12", "v13", "v14", "v15"); \
513 break; \
514 default: \
515 ZFS_ASM_BUG(); \
516 } \
517 }
518
519 #define MUL(c, r...) \
520 { \
521 switch (REG_CNT(r)) { \
522 case 4: \
523 _MULx2(c, R_23(r)); \
524 _MULx2(c, R_01(r)); \
525 break; \
526 case 2: \
527 _MULx2(c, R_01(r)); \
528 break; \
529 default: \
530 ZFS_ASM_BUG(); \
531 } \
532 }
533
534 #define raidz_math_begin() kfpu_begin()
535 #define raidz_math_end() kfpu_end()
536
537 /* Overkill... */
538 #if defined(_KERNEL)
539 #define GEN_X_DEFINE_0_3() \
540 register unsigned char w0 asm("v0") __attribute__((vector_size(16))); \
541 register unsigned char w1 asm("v1") __attribute__((vector_size(16))); \
542 register unsigned char w2 asm("v2") __attribute__((vector_size(16))); \
543 register unsigned char w3 asm("v3") __attribute__((vector_size(16)));
544 #define GEN_X_DEFINE_4_5() \
545 register unsigned char w4 asm("v4") __attribute__((vector_size(16))); \
546 register unsigned char w5 asm("v5") __attribute__((vector_size(16)));
547 #define GEN_X_DEFINE_6_7() \
548 register unsigned char w6 asm("v6") __attribute__((vector_size(16))); \
549 register unsigned char w7 asm("v7") __attribute__((vector_size(16)));
550 #define GEN_X_DEFINE_8_9() \
551 register unsigned char w8 asm("v8") __attribute__((vector_size(16))); \
552 register unsigned char w9 asm("v9") __attribute__((vector_size(16)));
553 #define GEN_X_DEFINE_10_11() \
554 register unsigned char w10 asm("v10") __attribute__((vector_size(16))); \
555 register unsigned char w11 asm("v11") __attribute__((vector_size(16)));
556 #define GEN_X_DEFINE_12_15() \
557 register unsigned char w12 asm("v12") __attribute__((vector_size(16))); \
558 register unsigned char w13 asm("v13") __attribute__((vector_size(16))); \
559 register unsigned char w14 asm("v14") __attribute__((vector_size(16))); \
560 register unsigned char w15 asm("v15") __attribute__((vector_size(16)));
561 #define GEN_X_DEFINE_16() \
562 register unsigned char w16 asm("v16") __attribute__((vector_size(16)));
563 #define GEN_X_DEFINE_17() \
564 register unsigned char w17 asm("v17") __attribute__((vector_size(16)));
565 #define GEN_X_DEFINE_18_21() \
566 register unsigned char w18 asm("v18") __attribute__((vector_size(16))); \
567 register unsigned char w19 asm("v19") __attribute__((vector_size(16))); \
568 register unsigned char w20 asm("v20") __attribute__((vector_size(16))); \
569 register unsigned char w21 asm("v21") __attribute__((vector_size(16)));
570 #define GEN_X_DEFINE_22_23() \
571 register unsigned char w22 asm("v22") __attribute__((vector_size(16))); \
572 register unsigned char w23 asm("v23") __attribute__((vector_size(16)));
573 #define GEN_X_DEFINE_24_27() \
574 register unsigned char w24 asm("v24") __attribute__((vector_size(16))); \
575 register unsigned char w25 asm("v25") __attribute__((vector_size(16))); \
576 register unsigned char w26 asm("v26") __attribute__((vector_size(16))); \
577 register unsigned char w27 asm("v27") __attribute__((vector_size(16)));
578 #define GEN_X_DEFINE_28_30() \
579 register unsigned char w28 asm("v28") __attribute__((vector_size(16))); \
580 register unsigned char w29 asm("v29") __attribute__((vector_size(16))); \
581 register unsigned char w30 asm("v30") __attribute__((vector_size(16)));
582 #define GEN_X_DEFINE_31() \
583 register unsigned char w31 asm("v31") __attribute__((vector_size(16)));
584 #define GEN_X_DEFINE_32() \
585 register unsigned char w32 asm("v31") __attribute__((vector_size(16)));
586 #define GEN_X_DEFINE_33_36() \
587 register unsigned char w33 asm("v31") __attribute__((vector_size(16))); \
588 register unsigned char w34 asm("v31") __attribute__((vector_size(16))); \
589 register unsigned char w35 asm("v31") __attribute__((vector_size(16))); \
590 register unsigned char w36 asm("v31") __attribute__((vector_size(16)));
591 #define GEN_X_DEFINE_37_38() \
592 register unsigned char w37 asm("v31") __attribute__((vector_size(16))); \
593 register unsigned char w38 asm("v31") __attribute__((vector_size(16)));
594 #define GEN_X_DEFINE_ALL() \
595 GEN_X_DEFINE_0_3() \
596 GEN_X_DEFINE_4_5() \
597 GEN_X_DEFINE_6_7() \
598 GEN_X_DEFINE_8_9() \
599 GEN_X_DEFINE_10_11() \
600 GEN_X_DEFINE_12_15() \
601 GEN_X_DEFINE_16() \
602 GEN_X_DEFINE_17() \
603 GEN_X_DEFINE_18_21() \
604 GEN_X_DEFINE_22_23() \
605 GEN_X_DEFINE_24_27() \
606 GEN_X_DEFINE_28_30() \
607 GEN_X_DEFINE_31() \
608 GEN_X_DEFINE_32() \
609 GEN_X_DEFINE_33_36() \
610 GEN_X_DEFINE_37_38()
611 #else
612 #define GEN_X_DEFINE_0_3() \
613 unsigned char w0 __attribute__((vector_size(16))); \
614 unsigned char w1 __attribute__((vector_size(16))); \
615 unsigned char w2 __attribute__((vector_size(16))); \
616 unsigned char w3 __attribute__((vector_size(16)));
617 #define GEN_X_DEFINE_4_5() \
618 unsigned char w4 __attribute__((vector_size(16))); \
619 unsigned char w5 __attribute__((vector_size(16)));
620 #define GEN_X_DEFINE_6_7() \
621 unsigned char w6 __attribute__((vector_size(16))); \
622 unsigned char w7 __attribute__((vector_size(16)));
623 #define GEN_X_DEFINE_8_9() \
624 unsigned char w8 __attribute__((vector_size(16))); \
625 unsigned char w9 __attribute__((vector_size(16)));
626 #define GEN_X_DEFINE_10_11() \
627 unsigned char w10 __attribute__((vector_size(16))); \
628 unsigned char w11 __attribute__((vector_size(16)));
629 #define GEN_X_DEFINE_12_15() \
630 unsigned char w12 __attribute__((vector_size(16))); \
631 unsigned char w13 __attribute__((vector_size(16))); \
632 unsigned char w14 __attribute__((vector_size(16))); \
633 unsigned char w15 __attribute__((vector_size(16)));
634 #define GEN_X_DEFINE_16() \
635 unsigned char w16 __attribute__((vector_size(16)));
636 #define GEN_X_DEFINE_17() \
637 unsigned char w17 __attribute__((vector_size(16)));
638 #define GEN_X_DEFINE_18_21() \
639 unsigned char w18 __attribute__((vector_size(16))); \
640 unsigned char w19 __attribute__((vector_size(16))); \
641 unsigned char w20 __attribute__((vector_size(16))); \
642 unsigned char w21 __attribute__((vector_size(16)));
643 #define GEN_X_DEFINE_22_23() \
644 unsigned char w22 __attribute__((vector_size(16))); \
645 unsigned char w23 __attribute__((vector_size(16)));
646 #define GEN_X_DEFINE_24_27() \
647 unsigned char w24 __attribute__((vector_size(16))); \
648 unsigned char w25 __attribute__((vector_size(16))); \
649 unsigned char w26 __attribute__((vector_size(16))); \
650 unsigned char w27 __attribute__((vector_size(16)));
651 #define GEN_X_DEFINE_28_30() \
652 unsigned char w28 __attribute__((vector_size(16))); \
653 unsigned char w29 __attribute__((vector_size(16))); \
654 unsigned char w30 __attribute__((vector_size(16)));
655 #define GEN_X_DEFINE_31() \
656 unsigned char w31 __attribute__((vector_size(16)));
657 #define GEN_X_DEFINE_32() \
658 unsigned char w32 __attribute__((vector_size(16)));
659 #define GEN_X_DEFINE_33_36() \
660 unsigned char w33 __attribute__((vector_size(16))); \
661 unsigned char w34 __attribute__((vector_size(16))); \
662 unsigned char w35 __attribute__((vector_size(16))); \
663 unsigned char w36 __attribute__((vector_size(16)));
664 #define GEN_X_DEFINE_37_38() \
665 unsigned char w37 __attribute__((vector_size(16))); \
666 unsigned char w38 __attribute__((vector_size(16)));
667 #define GEN_X_DEFINE_ALL() \
668 GEN_X_DEFINE_0_3() \
669 GEN_X_DEFINE_4_5() \
670 GEN_X_DEFINE_6_7() \
671 GEN_X_DEFINE_8_9() \
672 GEN_X_DEFINE_10_11() \
673 GEN_X_DEFINE_12_15() \
674 GEN_X_DEFINE_16() \
675 GEN_X_DEFINE_17() \
676 GEN_X_DEFINE_18_21() \
677 GEN_X_DEFINE_22_23() \
678 GEN_X_DEFINE_24_27() \
679 GEN_X_DEFINE_28_30() \
680 GEN_X_DEFINE_31() \
681 GEN_X_DEFINE_32() \
682 GEN_X_DEFINE_33_36() \
683 GEN_X_DEFINE_37_38()
684 #endif