]>
Commit | Line | Data |
---|---|---|
35b07497 RD |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
35b07497 RD |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (C) 2019 Romain Dolbeau. All rights reserved. | |
23 | * <romain.dolbeau@european-processor-initiative.eu> | |
24 | */ | |
25 | ||
26 | #include <sys/types.h> | |
27 | #include <sys/simd.h> | |
28 | ||
35b07497 RD |
29 | #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N |
30 | #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1) | |
31 | ||
32 | #define VR0_(REG, ...) "%[w"#REG"]" | |
33 | #define VR1_(_1, REG, ...) "%[w"#REG"]" | |
34 | #define VR2_(_1, _2, REG, ...) "%[w"#REG"]" | |
35 | #define VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]" | |
36 | #define VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]" | |
37 | #define VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]" | |
38 | #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]" | |
39 | #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]" | |
40 | ||
41 | /* | |
42 | * Here we need registers not used otherwise. | |
43 | * They will be used in unused ASM for the case | |
44 | * with more registers than required... but GCC | |
45 | * will still need to make sure the constraints | |
46 | * are correct, and duplicate constraints are illegal | |
47 | * ... and we use the "register" number as a name | |
48 | */ | |
49 | ||
50 | #define VR0(r...) VR0_(r) | |
51 | #define VR1(r...) VR1_(r) | |
52 | #define VR2(r...) VR2_(r, 36) | |
53 | #define VR3(r...) VR3_(r, 36, 35) | |
54 | #define VR4(r...) VR4_(r, 36, 35, 34, 33) | |
55 | #define VR5(r...) VR5_(r, 36, 35, 34, 33, 32) | |
56 | #define VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31) | |
57 | #define VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30) | |
58 | ||
59 | #define VR(X) "%[w"#X"]" | |
60 | ||
61 | #define RVR0_(REG, ...) [w##REG] "v" (w##REG) | |
62 | #define RVR1_(_1, REG, ...) [w##REG] "v" (w##REG) | |
63 | #define RVR2_(_1, _2, REG, ...) [w##REG] "v" (w##REG) | |
64 | #define RVR3_(_1, _2, _3, REG, ...) [w##REG] "v" (w##REG) | |
65 | #define RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "v" (w##REG) | |
66 | #define RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "v" (w##REG) | |
67 | #define RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "v" (w##REG) | |
68 | #define RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "v" (w##REG) | |
69 | ||
70 | #define RVR0(r...) RVR0_(r) | |
71 | #define RVR1(r...) RVR1_(r) | |
72 | #define RVR2(r...) RVR2_(r, 36) | |
73 | #define RVR3(r...) RVR3_(r, 36, 35) | |
74 | #define RVR4(r...) RVR4_(r, 36, 35, 34, 33) | |
75 | #define RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32) | |
76 | #define RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31) | |
77 | #define RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30) | |
78 | ||
79 | #define RVR(X) [w##X] "v" (w##X) | |
80 | ||
81 | #define WVR0_(REG, ...) [w##REG] "=v" (w##REG) | |
82 | #define WVR1_(_1, REG, ...) [w##REG] "=v" (w##REG) | |
83 | #define WVR2_(_1, _2, REG, ...) [w##REG] "=v" (w##REG) | |
84 | #define WVR3_(_1, _2, _3, REG, ...) [w##REG] "=v" (w##REG) | |
85 | #define WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=v" (w##REG) | |
86 | #define WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=v" (w##REG) | |
87 | #define WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=v" (w##REG) | |
88 | #define WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=v" (w##REG) | |
89 | ||
90 | #define WVR0(r...) WVR0_(r) | |
91 | #define WVR1(r...) WVR1_(r) | |
92 | #define WVR2(r...) WVR2_(r, 36) | |
93 | #define WVR3(r...) WVR3_(r, 36, 35) | |
94 | #define WVR4(r...) WVR4_(r, 36, 35, 34, 33) | |
95 | #define WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32) | |
96 | #define WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31) | |
97 | #define WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30) | |
98 | ||
99 | #define WVR(X) [w##X] "=v" (w##X) | |
100 | ||
101 | #define UVR0_(REG, ...) [w##REG] "+&v" (w##REG) | |
102 | #define UVR1_(_1, REG, ...) [w##REG] "+&v" (w##REG) | |
103 | #define UVR2_(_1, _2, REG, ...) [w##REG] "+&v" (w##REG) | |
104 | #define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&v" (w##REG) | |
105 | #define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&v" (w##REG) | |
106 | #define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&v" (w##REG) | |
107 | #define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&v" (w##REG) | |
108 | #define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&v" (w##REG) | |
109 | ||
110 | #define UVR0(r...) UVR0_(r) | |
111 | #define UVR1(r...) UVR1_(r) | |
112 | #define UVR2(r...) UVR2_(r, 36) | |
113 | #define UVR3(r...) UVR3_(r, 36, 35) | |
114 | #define UVR4(r...) UVR4_(r, 36, 35, 34, 33) | |
115 | #define UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32) | |
116 | #define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31) | |
117 | #define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30) | |
118 | ||
119 | #define UVR(X) [w##X] "+&v" (w##X) | |
120 | ||
121 | #define R_01(REG1, REG2, ...) REG1, REG2 | |
122 | #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3 | |
123 | #define R_23(REG...) _R_23(REG, 1, 2, 3) | |
124 | ||
125 | #define ZFS_ASM_BUG() ASSERT(0) | |
126 | ||
127 | #define OFFSET(ptr, val) (((unsigned char *)(ptr))+val) | |
128 | ||
129 | extern const uint8_t gf_clmul_mod_lt[4*256][16]; | |
130 | ||
131 | #define ELEM_SIZE 16 | |
132 | ||
133 | typedef struct v { | |
134 | uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE))); | |
135 | } v_t; | |
136 | ||
137 | #define XOR_ACC(src, r...) \ | |
138 | { \ | |
139 | switch (REG_CNT(r)) { \ | |
140 | case 8: \ | |
d27c7ba6 | 141 | __asm__ __volatile__( \ |
35b07497 RD |
142 | "lvx 21,0,%[SRC0]\n" \ |
143 | "lvx 20,0,%[SRC1]\n" \ | |
144 | "lvx 19,0,%[SRC2]\n" \ | |
145 | "lvx 18,0,%[SRC3]\n" \ | |
146 | "vxor " VR0(r) "," VR0(r) ",21\n" \ | |
147 | "vxor " VR1(r) "," VR1(r) ",20\n" \ | |
148 | "vxor " VR2(r) "," VR2(r) ",19\n" \ | |
149 | "vxor " VR3(r) "," VR3(r) ",18\n" \ | |
150 | "lvx 21,0,%[SRC4]\n" \ | |
151 | "lvx 20,0,%[SRC5]\n" \ | |
152 | "lvx 19,0,%[SRC6]\n" \ | |
153 | "lvx 18,0,%[SRC7]\n" \ | |
154 | "vxor " VR4(r) "," VR4(r) ",21\n" \ | |
155 | "vxor " VR5(r) "," VR5(r) ",20\n" \ | |
156 | "vxor " VR6(r) "," VR6(r) ",19\n" \ | |
157 | "vxor " VR7(r) "," VR7(r) ",18\n" \ | |
158 | : UVR0(r), UVR1(r), UVR2(r), UVR3(r), \ | |
159 | UVR4(r), UVR5(r), UVR6(r), UVR7(r) \ | |
160 | : [SRC0] "r" ((OFFSET(src, 0))), \ | |
161 | [SRC1] "r" ((OFFSET(src, 16))), \ | |
162 | [SRC2] "r" ((OFFSET(src, 32))), \ | |
163 | [SRC3] "r" ((OFFSET(src, 48))), \ | |
164 | [SRC4] "r" ((OFFSET(src, 64))), \ | |
165 | [SRC5] "r" ((OFFSET(src, 80))), \ | |
166 | [SRC6] "r" ((OFFSET(src, 96))), \ | |
167 | [SRC7] "r" ((OFFSET(src, 112))) \ | |
168 | : "v18", "v19", "v20", "v21"); \ | |
169 | break; \ | |
170 | case 4: \ | |
d27c7ba6 | 171 | __asm__ __volatile__( \ |
35b07497 RD |
172 | "lvx 21,0,%[SRC0]\n" \ |
173 | "lvx 20,0,%[SRC1]\n" \ | |
174 | "lvx 19,0,%[SRC2]\n" \ | |
175 | "lvx 18,0,%[SRC3]\n" \ | |
176 | "vxor " VR0(r) "," VR0(r) ",21\n" \ | |
177 | "vxor " VR1(r) "," VR1(r) ",20\n" \ | |
178 | "vxor " VR2(r) "," VR2(r) ",19\n" \ | |
179 | "vxor " VR3(r) "," VR3(r) ",18\n" \ | |
180 | : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \ | |
181 | : [SRC0] "r" ((OFFSET(src, 0))), \ | |
182 | [SRC1] "r" ((OFFSET(src, 16))), \ | |
183 | [SRC2] "r" ((OFFSET(src, 32))), \ | |
184 | [SRC3] "r" ((OFFSET(src, 48))) \ | |
185 | : "v18", "v19", "v20", "v21"); \ | |
186 | break; \ | |
187 | case 2: \ | |
d27c7ba6 | 188 | __asm__ __volatile__( \ |
35b07497 RD |
189 | "lvx 21,0,%[SRC0]\n" \ |
190 | "lvx 20,0,%[SRC1]\n" \ | |
191 | "vxor " VR0(r) "," VR0(r) ",21\n" \ | |
192 | "vxor " VR1(r) "," VR1(r) ",20\n" \ | |
193 | : UVR0(r), UVR1(r) \ | |
194 | : [SRC0] "r" ((OFFSET(src, 0))), \ | |
195 | [SRC1] "r" ((OFFSET(src, 16))) \ | |
196 | : "v20", "v21"); \ | |
197 | break; \ | |
198 | default: \ | |
199 | ZFS_ASM_BUG(); \ | |
200 | } \ | |
201 | } | |
202 | ||
203 | #define XOR(r...) \ | |
204 | { \ | |
205 | switch (REG_CNT(r)) { \ | |
206 | case 8: \ | |
d27c7ba6 | 207 | __asm__ __volatile__( \ |
35b07497 RD |
208 | "vxor " VR4(r) "," VR4(r) "," VR0(r) "\n" \ |
209 | "vxor " VR5(r) "," VR5(r) "," VR1(r) "\n" \ | |
210 | "vxor " VR6(r) "," VR6(r) "," VR2(r) "\n" \ | |
211 | "vxor " VR7(r) "," VR7(r) "," VR3(r) "\n" \ | |
212 | : UVR4(r), UVR5(r), UVR6(r), UVR7(r) \ | |
213 | : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ | |
214 | break; \ | |
215 | case 4: \ | |
d27c7ba6 | 216 | __asm__ __volatile__( \ |
35b07497 RD |
217 | "vxor " VR2(r) "," VR2(r) "," VR0(r) "\n" \ |
218 | "vxor " VR3(r) "," VR3(r) "," VR1(r) "\n" \ | |
219 | : UVR2(r), UVR3(r) \ | |
220 | : RVR0(r), RVR1(r)); \ | |
221 | break; \ | |
222 | default: \ | |
223 | ZFS_ASM_BUG(); \ | |
224 | } \ | |
225 | } | |
226 | ||
227 | #define ZERO(r...) \ | |
228 | { \ | |
229 | switch (REG_CNT(r)) { \ | |
230 | case 8: \ | |
d27c7ba6 | 231 | __asm__ __volatile__( \ |
35b07497 RD |
232 | "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ |
233 | "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ | |
234 | "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n" \ | |
235 | "vxor " VR3(r) "," VR3(r) "," VR3(r) "\n" \ | |
236 | "vxor " VR4(r) "," VR4(r) "," VR4(r) "\n" \ | |
237 | "vxor " VR5(r) "," VR5(r) "," VR5(r) "\n" \ | |
238 | "vxor " VR6(r) "," VR6(r) "," VR6(r) "\n" \ | |
239 | "vxor " VR7(r) "," VR7(r) "," VR7(r) "\n" \ | |
240 | : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \ | |
241 | WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \ | |
242 | break; \ | |
243 | case 4: \ | |
d27c7ba6 | 244 | __asm__ __volatile__( \ |
35b07497 RD |
245 | "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ |
246 | "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ | |
247 | "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n" \ | |
248 | "vxor " VR3(r) "," VR3(r) "," VR3(r) "\n" \ | |
249 | : WVR0(r), WVR1(r), WVR2(r), WVR3(r)); \ | |
250 | break; \ | |
251 | case 2: \ | |
d27c7ba6 | 252 | __asm__ __volatile__( \ |
35b07497 RD |
253 | "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ |
254 | "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ | |
255 | : WVR0(r), WVR1(r)); \ | |
256 | break; \ | |
257 | default: \ | |
258 | ZFS_ASM_BUG(); \ | |
259 | } \ | |
260 | } | |
261 | ||
262 | #define COPY(r...) \ | |
263 | { \ | |
264 | switch (REG_CNT(r)) { \ | |
265 | case 8: \ | |
d27c7ba6 | 266 | __asm__ __volatile__( \ |
35b07497 RD |
267 | "vor " VR4(r) "," VR0(r) "," VR0(r) "\n" \ |
268 | "vor " VR5(r) "," VR1(r) "," VR1(r) "\n" \ | |
269 | "vor " VR6(r) "," VR2(r) "," VR2(r) "\n" \ | |
270 | "vor " VR7(r) "," VR3(r) "," VR3(r) "\n" \ | |
271 | : WVR4(r), WVR5(r), WVR6(r), WVR7(r) \ | |
272 | : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ | |
273 | break; \ | |
274 | case 4: \ | |
d27c7ba6 | 275 | __asm__ __volatile__( \ |
35b07497 RD |
276 | "vor " VR2(r) "," VR0(r) "," VR0(r) "\n" \ |
277 | "vor " VR3(r) "," VR1(r) "," VR1(r) "\n" \ | |
278 | : WVR2(r), WVR3(r) \ | |
279 | : RVR0(r), RVR1(r)); \ | |
280 | break; \ | |
281 | default: \ | |
282 | ZFS_ASM_BUG(); \ | |
283 | } \ | |
284 | } | |
285 | ||
286 | #define LOAD(src, r...) \ | |
287 | { \ | |
288 | switch (REG_CNT(r)) { \ | |
289 | case 8: \ | |
d27c7ba6 | 290 | __asm__ __volatile__( \ |
35b07497 RD |
291 | "lvx " VR0(r) " ,0,%[SRC0]\n" \ |
292 | "lvx " VR1(r) " ,0,%[SRC1]\n" \ | |
293 | "lvx " VR2(r) " ,0,%[SRC2]\n" \ | |
294 | "lvx " VR3(r) " ,0,%[SRC3]\n" \ | |
295 | "lvx " VR4(r) " ,0,%[SRC4]\n" \ | |
296 | "lvx " VR5(r) " ,0,%[SRC5]\n" \ | |
297 | "lvx " VR6(r) " ,0,%[SRC6]\n" \ | |
298 | "lvx " VR7(r) " ,0,%[SRC7]\n" \ | |
299 | : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \ | |
300 | WVR4(r), WVR5(r), WVR6(r), WVR7(r) \ | |
301 | : [SRC0] "r" ((OFFSET(src, 0))), \ | |
302 | [SRC1] "r" ((OFFSET(src, 16))), \ | |
303 | [SRC2] "r" ((OFFSET(src, 32))), \ | |
304 | [SRC3] "r" ((OFFSET(src, 48))), \ | |
305 | [SRC4] "r" ((OFFSET(src, 64))), \ | |
306 | [SRC5] "r" ((OFFSET(src, 80))), \ | |
307 | [SRC6] "r" ((OFFSET(src, 96))), \ | |
308 | [SRC7] "r" ((OFFSET(src, 112)))); \ | |
309 | break; \ | |
310 | case 4: \ | |
d27c7ba6 | 311 | __asm__ __volatile__( \ |
35b07497 RD |
312 | "lvx " VR0(r) " ,0,%[SRC0]\n" \ |
313 | "lvx " VR1(r) " ,0,%[SRC1]\n" \ | |
314 | "lvx " VR2(r) " ,0,%[SRC2]\n" \ | |
315 | "lvx " VR3(r) " ,0,%[SRC3]\n" \ | |
316 | : WVR0(r), WVR1(r), WVR2(r), WVR3(r) \ | |
317 | : [SRC0] "r" ((OFFSET(src, 0))), \ | |
318 | [SRC1] "r" ((OFFSET(src, 16))), \ | |
319 | [SRC2] "r" ((OFFSET(src, 32))), \ | |
320 | [SRC3] "r" ((OFFSET(src, 48)))); \ | |
321 | break; \ | |
322 | case 2: \ | |
d27c7ba6 | 323 | __asm__ __volatile__( \ |
35b07497 RD |
324 | "lvx " VR0(r) " ,0,%[SRC0]\n" \ |
325 | "lvx " VR1(r) " ,0,%[SRC1]\n" \ | |
326 | : WVR0(r), WVR1(r) \ | |
327 | : [SRC0] "r" ((OFFSET(src, 0))), \ | |
328 | [SRC1] "r" ((OFFSET(src, 16)))); \ | |
329 | break; \ | |
330 | default: \ | |
331 | ZFS_ASM_BUG(); \ | |
332 | } \ | |
333 | } | |
334 | ||
335 | #define STORE(dst, r...) \ | |
336 | { \ | |
337 | switch (REG_CNT(r)) { \ | |
338 | case 8: \ | |
d27c7ba6 | 339 | __asm__ __volatile__( \ |
35b07497 RD |
340 | "stvx " VR0(r) " ,0,%[DST0]\n" \ |
341 | "stvx " VR1(r) " ,0,%[DST1]\n" \ | |
342 | "stvx " VR2(r) " ,0,%[DST2]\n" \ | |
343 | "stvx " VR3(r) " ,0,%[DST3]\n" \ | |
344 | "stvx " VR4(r) " ,0,%[DST4]\n" \ | |
345 | "stvx " VR5(r) " ,0,%[DST5]\n" \ | |
346 | "stvx " VR6(r) " ,0,%[DST6]\n" \ | |
347 | "stvx " VR7(r) " ,0,%[DST7]\n" \ | |
348 | : : [DST0] "r" ((OFFSET(dst, 0))), \ | |
349 | [DST1] "r" ((OFFSET(dst, 16))), \ | |
350 | [DST2] "r" ((OFFSET(dst, 32))), \ | |
351 | [DST3] "r" ((OFFSET(dst, 48))), \ | |
352 | [DST4] "r" ((OFFSET(dst, 64))), \ | |
353 | [DST5] "r" ((OFFSET(dst, 80))), \ | |
354 | [DST6] "r" ((OFFSET(dst, 96))), \ | |
355 | [DST7] "r" ((OFFSET(dst, 112))), \ | |
356 | RVR0(r), RVR1(r), RVR2(r), RVR3(r), \ | |
357 | RVR4(r), RVR5(r), RVR6(r), RVR7(r) \ | |
358 | : "memory"); \ | |
359 | break; \ | |
360 | case 4: \ | |
d27c7ba6 | 361 | __asm__ __volatile__( \ |
35b07497 RD |
362 | "stvx " VR0(r) " ,0,%[DST0]\n" \ |
363 | "stvx " VR1(r) " ,0,%[DST1]\n" \ | |
364 | "stvx " VR2(r) " ,0,%[DST2]\n" \ | |
365 | "stvx " VR3(r) " ,0,%[DST3]\n" \ | |
366 | : : [DST0] "r" ((OFFSET(dst, 0))), \ | |
367 | [DST1] "r" ((OFFSET(dst, 16))), \ | |
368 | [DST2] "r" ((OFFSET(dst, 32))), \ | |
369 | [DST3] "r" ((OFFSET(dst, 48))), \ | |
370 | RVR0(r), RVR1(r), RVR2(r), RVR3(r) \ | |
371 | : "memory"); \ | |
372 | break; \ | |
373 | case 2: \ | |
d27c7ba6 | 374 | __asm__ __volatile__( \ |
35b07497 RD |
375 | "stvx " VR0(r) " ,0,%[DST0]\n" \ |
376 | "stvx " VR1(r) " ,0,%[DST1]\n" \ | |
377 | : : [DST0] "r" ((OFFSET(dst, 0))), \ | |
378 | [DST1] "r" ((OFFSET(dst, 16))), \ | |
379 | RVR0(r), RVR1(r) : "memory"); \ | |
380 | break; \ | |
381 | default: \ | |
382 | ZFS_ASM_BUG(); \ | |
383 | } \ | |
384 | } | |
385 | ||
386 | /* | |
387 | * Unfortunately cannot use the macro, because GCC | |
388 | * will try to use the macro name and not value | |
389 | * later on... | |
390 | * Kept as a reference to what a numbered variable is | |
391 | */ | |
392 | #define _00 "17" | |
393 | #define _1d "16" | |
394 | #define _temp0 "19" | |
395 | #define _temp1 "18" | |
396 | ||
397 | #define MUL2_SETUP() \ | |
398 | { \ | |
d27c7ba6 | 399 | __asm__ __volatile__( \ |
35b07497 RD |
400 | "vspltisb " VR(16) ",14\n" \ |
401 | "vspltisb " VR(17) ",15\n" \ | |
402 | "vaddubm " VR(16) "," VR(17) "," VR(16) "\n" \ | |
403 | "vxor " VR(17) "," VR(17) "," VR(17) "\n" \ | |
404 | : WVR(16), WVR(17)); \ | |
405 | } | |
406 | ||
407 | #define MUL2(r...) \ | |
408 | { \ | |
409 | switch (REG_CNT(r)) { \ | |
410 | case 4: \ | |
d27c7ba6 | 411 | __asm__ __volatile__( \ |
35b07497 RD |
412 | "vcmpgtsb 19," VR(17) "," VR0(r) "\n" \ |
413 | "vcmpgtsb 18," VR(17) "," VR1(r) "\n" \ | |
414 | "vcmpgtsb 21," VR(17) "," VR2(r) "\n" \ | |
415 | "vcmpgtsb 20," VR(17) "," VR3(r) "\n" \ | |
416 | "vand 19,19," VR(16) "\n" \ | |
417 | "vand 18,18," VR(16) "\n" \ | |
418 | "vand 21,21," VR(16) "\n" \ | |
419 | "vand 20,20," VR(16) "\n" \ | |
420 | "vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n" \ | |
421 | "vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n" \ | |
422 | "vaddubm " VR2(r) "," VR2(r) "," VR2(r) "\n" \ | |
423 | "vaddubm " VR3(r) "," VR3(r) "," VR3(r) "\n" \ | |
424 | "vxor " VR0(r) ",19," VR0(r) "\n" \ | |
425 | "vxor " VR1(r) ",18," VR1(r) "\n" \ | |
426 | "vxor " VR2(r) ",21," VR2(r) "\n" \ | |
427 | "vxor " VR3(r) ",20," VR3(r) "\n" \ | |
428 | : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \ | |
429 | : RVR(17), RVR(16) \ | |
430 | : "v18", "v19", "v20", "v21"); \ | |
431 | break; \ | |
432 | case 2: \ | |
d27c7ba6 | 433 | __asm__ __volatile__( \ |
35b07497 RD |
434 | "vcmpgtsb 19," VR(17) "," VR0(r) "\n" \ |
435 | "vcmpgtsb 18," VR(17) "," VR1(r) "\n" \ | |
436 | "vand 19,19," VR(16) "\n" \ | |
437 | "vand 18,18," VR(16) "\n" \ | |
438 | "vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n" \ | |
439 | "vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n" \ | |
440 | "vxor " VR0(r) ",19," VR0(r) "\n" \ | |
441 | "vxor " VR1(r) ",18," VR1(r) "\n" \ | |
442 | : UVR0(r), UVR1(r) \ | |
443 | : RVR(17), RVR(16) \ | |
444 | : "v18", "v19"); \ | |
445 | break; \ | |
446 | default: \ | |
447 | ZFS_ASM_BUG(); \ | |
448 | } \ | |
449 | } | |
450 | ||
451 | #define MUL4(r...) \ | |
452 | { \ | |
453 | MUL2(r); \ | |
454 | MUL2(r); \ | |
455 | } | |
456 | ||
457 | /* | |
458 | * Unfortunately cannot use the macro, because GCC | |
459 | * will try to use the macro name and not value | |
460 | * later on... | |
461 | * Kept as a reference to what a register is | |
462 | * (here we're using actual registers for the | |
463 | * clobbered ones) | |
464 | */ | |
465 | #define _0f "15" | |
466 | #define _a_save "14" | |
467 | #define _b_save "13" | |
468 | #define _lt_mod_a "12" | |
469 | #define _lt_clmul_a "11" | |
470 | #define _lt_mod_b "10" | |
471 | #define _lt_clmul_b "15" | |
472 | ||
473 | #define _MULx2(c, r...) \ | |
474 | { \ | |
475 | switch (REG_CNT(r)) { \ | |
476 | case 2: \ | |
d27c7ba6 | 477 | __asm__ __volatile__( \ |
35b07497 RD |
478 | /* lts for upper part */ \ |
479 | "vspltisb 15,15\n" \ | |
480 | "lvx 10,0,%[lt0]\n" \ | |
481 | "lvx 11,0,%[lt1]\n" \ | |
482 | /* upper part */ \ | |
483 | "vand 14," VR0(r) ",15\n" \ | |
484 | "vand 13," VR1(r) ",15\n" \ | |
485 | "vspltisb 15,4\n" \ | |
486 | "vsrab " VR0(r) "," VR0(r) ",15\n" \ | |
487 | "vsrab " VR1(r) "," VR1(r) ",15\n" \ | |
488 | \ | |
489 | "vperm 12,10,10," VR0(r) "\n" \ | |
490 | "vperm 10,10,10," VR1(r) "\n" \ | |
491 | "vperm 15,11,11," VR0(r) "\n" \ | |
492 | "vperm 11,11,11," VR1(r) "\n" \ | |
493 | \ | |
494 | "vxor " VR0(r) ",15,12\n" \ | |
495 | "vxor " VR1(r) ",11,10\n" \ | |
496 | /* lts for lower part */ \ | |
497 | "lvx 10,0,%[lt2]\n" \ | |
498 | "lvx 15,0,%[lt3]\n" \ | |
499 | /* lower part */ \ | |
500 | "vperm 12,10,10,14\n" \ | |
501 | "vperm 10,10,10,13\n" \ | |
502 | "vperm 11,15,15,14\n" \ | |
503 | "vperm 15,15,15,13\n" \ | |
504 | \ | |
505 | "vxor " VR0(r) "," VR0(r) ",12\n" \ | |
506 | "vxor " VR1(r) "," VR1(r) ",10\n" \ | |
507 | "vxor " VR0(r) "," VR0(r) ",11\n" \ | |
508 | "vxor " VR1(r) "," VR1(r) ",15\n" \ | |
509 | : UVR0(r), UVR1(r) \ | |
510 | : [lt0] "r" (&(gf_clmul_mod_lt[4*(c)+0][0])), \ | |
511 | [lt1] "r" (&(gf_clmul_mod_lt[4*(c)+1][0])), \ | |
512 | [lt2] "r" (&(gf_clmul_mod_lt[4*(c)+2][0])), \ | |
513 | [lt3] "r" (&(gf_clmul_mod_lt[4*(c)+3][0])) \ | |
514 | : "v10", "v11", "v12", "v13", "v14", "v15"); \ | |
515 | break; \ | |
516 | default: \ | |
517 | ZFS_ASM_BUG(); \ | |
518 | } \ | |
519 | } | |
520 | ||
521 | #define MUL(c, r...) \ | |
522 | { \ | |
523 | switch (REG_CNT(r)) { \ | |
524 | case 4: \ | |
525 | _MULx2(c, R_23(r)); \ | |
526 | _MULx2(c, R_01(r)); \ | |
527 | break; \ | |
528 | case 2: \ | |
529 | _MULx2(c, R_01(r)); \ | |
530 | break; \ | |
531 | default: \ | |
532 | ZFS_ASM_BUG(); \ | |
533 | } \ | |
534 | } | |
535 | ||
536 | #define raidz_math_begin() kfpu_begin() | |
537 | #define raidz_math_end() kfpu_end() | |
538 | ||
539 | /* Overkill... */ | |
540 | #if 0 // defined(_KERNEL) | |
541 | #define GEN_X_DEFINE_0_3() \ | |
542 | register unsigned char w0 asm("0") __attribute__((vector_size(16))); \ | |
543 | register unsigned char w1 asm("1") __attribute__((vector_size(16))); \ | |
544 | register unsigned char w2 asm("2") __attribute__((vector_size(16))); \ | |
545 | register unsigned char w3 asm("3") __attribute__((vector_size(16))); | |
546 | #define GEN_X_DEFINE_4_5() \ | |
547 | register unsigned char w4 asm("4") __attribute__((vector_size(16))); \ | |
548 | register unsigned char w5 asm("5") __attribute__((vector_size(16))); | |
549 | #define GEN_X_DEFINE_6_7() \ | |
550 | register unsigned char w6 asm("6") __attribute__((vector_size(16))); \ | |
551 | register unsigned char w7 asm("7") __attribute__((vector_size(16))); | |
552 | #define GEN_X_DEFINE_8_9() \ | |
553 | register unsigned char w8 asm("8") __attribute__((vector_size(16))); \ | |
554 | register unsigned char w9 asm("9") __attribute__((vector_size(16))); | |
555 | #define GEN_X_DEFINE_10_11() \ | |
556 | register unsigned char w10 asm("10") __attribute__((vector_size(16))); \ | |
557 | register unsigned char w11 asm("11") __attribute__((vector_size(16))); | |
558 | #define GEN_X_DEFINE_12_15() \ | |
559 | register unsigned char w12 asm("12") __attribute__((vector_size(16))); \ | |
560 | register unsigned char w13 asm("13") __attribute__((vector_size(16))); \ | |
561 | register unsigned char w14 asm("14") __attribute__((vector_size(16))); \ | |
562 | register unsigned char w15 asm("15") __attribute__((vector_size(16))); | |
563 | #define GEN_X_DEFINE_16() \ | |
564 | register unsigned char w16 asm("16") __attribute__((vector_size(16))); | |
565 | #define GEN_X_DEFINE_17() \ | |
566 | register unsigned char w17 asm("17") __attribute__((vector_size(16))); | |
567 | #define GEN_X_DEFINE_18_21() \ | |
568 | register unsigned char w18 asm("18") __attribute__((vector_size(16))); \ | |
569 | register unsigned char w19 asm("19") __attribute__((vector_size(16))); \ | |
570 | register unsigned char w20 asm("20") __attribute__((vector_size(16))); \ | |
571 | register unsigned char w21 asm("21") __attribute__((vector_size(16))); | |
572 | #define GEN_X_DEFINE_22_23() \ | |
573 | register unsigned char w22 asm("22") __attribute__((vector_size(16))); \ | |
574 | register unsigned char w23 asm("23") __attribute__((vector_size(16))); | |
575 | #define GEN_X_DEFINE_24_27() \ | |
576 | register unsigned char w24 asm("24") __attribute__((vector_size(16))); \ | |
577 | register unsigned char w25 asm("25") __attribute__((vector_size(16))); \ | |
578 | register unsigned char w26 asm("26") __attribute__((vector_size(16))); \ | |
579 | register unsigned char w27 asm("27") __attribute__((vector_size(16))); | |
580 | #define GEN_X_DEFINE_28_30() \ | |
581 | register unsigned char w28 asm("28") __attribute__((vector_size(16))); \ | |
582 | register unsigned char w29 asm("29") __attribute__((vector_size(16))); \ | |
583 | register unsigned char w30 asm("30") __attribute__((vector_size(16))); | |
584 | #define GEN_X_DEFINE_31() \ | |
585 | register unsigned char w31 asm("31") __attribute__((vector_size(16))); | |
586 | #define GEN_X_DEFINE_32() \ | |
587 | register unsigned char w32 asm("31") __attribute__((vector_size(16))); | |
588 | #define GEN_X_DEFINE_33_36() \ | |
589 | register unsigned char w33 asm("31") __attribute__((vector_size(16))); \ | |
590 | register unsigned char w34 asm("31") __attribute__((vector_size(16))); \ | |
591 | register unsigned char w35 asm("31") __attribute__((vector_size(16))); \ | |
592 | register unsigned char w36 asm("31") __attribute__((vector_size(16))); | |
593 | #define GEN_X_DEFINE_37_38() \ | |
594 | register unsigned char w37 asm("31") __attribute__((vector_size(16))); \ | |
595 | register unsigned char w38 asm("31") __attribute__((vector_size(16))); | |
596 | #define GEN_X_DEFINE_ALL() \ | |
597 | GEN_X_DEFINE_0_3() \ | |
598 | GEN_X_DEFINE_4_5() \ | |
599 | GEN_X_DEFINE_6_7() \ | |
600 | GEN_X_DEFINE_8_9() \ | |
601 | GEN_X_DEFINE_10_11() \ | |
602 | GEN_X_DEFINE_12_15() \ | |
603 | GEN_X_DEFINE_16() \ | |
604 | GEN_X_DEFINE_17() \ | |
605 | GEN_X_DEFINE_18_21() \ | |
606 | GEN_X_DEFINE_22_23() \ | |
607 | GEN_X_DEFINE_24_27() \ | |
608 | GEN_X_DEFINE_28_30() \ | |
609 | GEN_X_DEFINE_31() \ | |
610 | GEN_X_DEFINE_32() \ | |
611 | GEN_X_DEFINE_33_36() \ | |
612 | GEN_X_DEFINE_37_38() | |
613 | #else | |
614 | #define GEN_X_DEFINE_0_3() \ | |
615 | unsigned char w0 __attribute__((vector_size(16))); \ | |
616 | unsigned char w1 __attribute__((vector_size(16))); \ | |
617 | unsigned char w2 __attribute__((vector_size(16))); \ | |
618 | unsigned char w3 __attribute__((vector_size(16))); | |
619 | #define GEN_X_DEFINE_4_5() \ | |
620 | unsigned char w4 __attribute__((vector_size(16))); \ | |
621 | unsigned char w5 __attribute__((vector_size(16))); | |
622 | #define GEN_X_DEFINE_6_7() \ | |
623 | unsigned char w6 __attribute__((vector_size(16))); \ | |
624 | unsigned char w7 __attribute__((vector_size(16))); | |
625 | #define GEN_X_DEFINE_8_9() \ | |
626 | unsigned char w8 __attribute__((vector_size(16))); \ | |
627 | unsigned char w9 __attribute__((vector_size(16))); | |
628 | #define GEN_X_DEFINE_10_11() \ | |
629 | unsigned char w10 __attribute__((vector_size(16))); \ | |
630 | unsigned char w11 __attribute__((vector_size(16))); | |
631 | #define GEN_X_DEFINE_12_15() \ | |
632 | unsigned char w12 __attribute__((vector_size(16))); \ | |
633 | unsigned char w13 __attribute__((vector_size(16))); \ | |
634 | unsigned char w14 __attribute__((vector_size(16))); \ | |
635 | unsigned char w15 __attribute__((vector_size(16))); | |
636 | #define GEN_X_DEFINE_16() \ | |
637 | unsigned char w16 __attribute__((vector_size(16))); | |
638 | #define GEN_X_DEFINE_17() \ | |
639 | unsigned char w17 __attribute__((vector_size(16))); | |
640 | #define GEN_X_DEFINE_18_21() \ | |
641 | unsigned char w18 __attribute__((vector_size(16))); \ | |
642 | unsigned char w19 __attribute__((vector_size(16))); \ | |
643 | unsigned char w20 __attribute__((vector_size(16))); \ | |
644 | unsigned char w21 __attribute__((vector_size(16))); | |
645 | #define GEN_X_DEFINE_22_23() \ | |
646 | unsigned char w22 __attribute__((vector_size(16))); \ | |
647 | unsigned char w23 __attribute__((vector_size(16))); | |
648 | #define GEN_X_DEFINE_24_27() \ | |
649 | unsigned char w24 __attribute__((vector_size(16))); \ | |
650 | unsigned char w25 __attribute__((vector_size(16))); \ | |
651 | unsigned char w26 __attribute__((vector_size(16))); \ | |
652 | unsigned char w27 __attribute__((vector_size(16))); | |
653 | #define GEN_X_DEFINE_28_30() \ | |
654 | unsigned char w28 __attribute__((vector_size(16))); \ | |
655 | unsigned char w29 __attribute__((vector_size(16))); \ | |
656 | unsigned char w30 __attribute__((vector_size(16))); | |
657 | #define GEN_X_DEFINE_31() \ | |
658 | unsigned char w31 __attribute__((vector_size(16))); | |
659 | #define GEN_X_DEFINE_32() \ | |
660 | unsigned char w32 __attribute__((vector_size(16))); | |
661 | #define GEN_X_DEFINE_33_36() \ | |
662 | unsigned char w33 __attribute__((vector_size(16))); \ | |
663 | unsigned char w34 __attribute__((vector_size(16))); \ | |
664 | unsigned char w35 __attribute__((vector_size(16))); \ | |
665 | unsigned char w36 __attribute__((vector_size(16))); | |
666 | #define GEN_X_DEFINE_37_38() \ | |
667 | unsigned char w37 __attribute__((vector_size(16))); \ | |
668 | unsigned char w38 __attribute__((vector_size(16))); | |
669 | #define GEN_X_DEFINE_ALL() \ | |
670 | GEN_X_DEFINE_0_3() \ | |
671 | GEN_X_DEFINE_4_5() \ | |
672 | GEN_X_DEFINE_6_7() \ | |
673 | GEN_X_DEFINE_8_9() \ | |
674 | GEN_X_DEFINE_10_11() \ | |
675 | GEN_X_DEFINE_12_15() \ | |
676 | GEN_X_DEFINE_16() \ | |
677 | GEN_X_DEFINE_17() \ | |
678 | GEN_X_DEFINE_18_21() \ | |
679 | GEN_X_DEFINE_22_23() \ | |
680 | GEN_X_DEFINE_24_27() \ | |
681 | GEN_X_DEFINE_28_30() \ | |
682 | GEN_X_DEFINE_31() \ | |
683 | GEN_X_DEFINE_32() \ | |
684 | GEN_X_DEFINE_33_36() \ | |
685 | GEN_X_DEFINE_37_38() | |
686 | #endif |