]>
Commit | Line | Data |
---|---|---|
ae25d222 GN |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
ae25d222 GN |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (C) 2016 Gvozden Nešković. All rights reserved. | |
23 | */ | |
24 | ||
25 | #include <sys/isa_defs.h> | |
26 | ||
27 | #if defined(__x86_64) && defined(HAVE_SSE2) | |
28 | ||
29 | #include <sys/types.h> | |
006e9a40 | 30 | #include <sys/simd.h> |
c5858ff9 | 31 | #include <sys/debug.h> |
ae25d222 | 32 | |
3df0c2fa | 33 | #ifdef __linux__ |
ae25d222 | 34 | #define __asm __asm__ __volatile__ |
3df0c2fa | 35 | #endif |
ae25d222 GN |
36 | |
37 | #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N | |
38 | #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1) | |
39 | ||
40 | #define VR0_(REG, ...) "xmm"#REG | |
41 | #define VR1_(_1, REG, ...) "xmm"#REG | |
42 | #define VR2_(_1, _2, REG, ...) "xmm"#REG | |
43 | #define VR3_(_1, _2, _3, REG, ...) "xmm"#REG | |
44 | #define VR4_(_1, _2, _3, _4, REG, ...) "xmm"#REG | |
45 | #define VR5_(_1, _2, _3, _4, _5, REG, ...) "xmm"#REG | |
46 | #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "xmm"#REG | |
47 | #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "xmm"#REG | |
48 | ||
49 | #define VR0(r...) VR0_(r, 1, 2, 3, 4, 5, 6) | |
50 | #define VR1(r...) VR1_(r, 1, 2, 3, 4, 5, 6) | |
51 | #define VR2(r...) VR2_(r, 1, 2, 3, 4, 5, 6) | |
52 | #define VR3(r...) VR3_(r, 1, 2, 3, 4, 5, 6) | |
53 | #define VR4(r...) VR4_(r, 1, 2, 3, 4, 5, 6) | |
54 | #define VR5(r...) VR5_(r, 1, 2, 3, 4, 5, 6) | |
55 | #define VR6(r...) VR6_(r, 1, 2, 3, 4, 5, 6) | |
56 | #define VR7(r...) VR7_(r, 1, 2, 3, 4, 5, 6) | |
57 | ||
58 | #define ELEM_SIZE 16 | |
59 | ||
60 | typedef struct v { | |
61 | uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE))); | |
62 | } v_t; | |
63 | ||
ae25d222 GN |
64 | #define XOR_ACC(src, r...) \ |
65 | { \ | |
66 | switch (REG_CNT(r)) { \ | |
67 | case 4: \ | |
68 | __asm( \ | |
69 | "pxor 0x00(%[SRC]), %%" VR0(r) "\n" \ | |
70 | "pxor 0x10(%[SRC]), %%" VR1(r) "\n" \ | |
71 | "pxor 0x20(%[SRC]), %%" VR2(r) "\n" \ | |
72 | "pxor 0x30(%[SRC]), %%" VR3(r) "\n" \ | |
73 | : : [SRC] "r" (src)); \ | |
74 | break; \ | |
75 | case 2: \ | |
76 | __asm( \ | |
77 | "pxor 0x00(%[SRC]), %%" VR0(r) "\n" \ | |
78 | "pxor 0x10(%[SRC]), %%" VR1(r) "\n" \ | |
79 | : : [SRC] "r" (src)); \ | |
80 | break; \ | |
81 | case 1: \ | |
82 | __asm("pxor 0x00(%[SRC]), %%" VR0(r) "\n" \ | |
83 | : : [SRC] "r" (src)); \ | |
84 | break; \ | |
85 | } \ | |
86 | } | |
87 | ||
88 | #define XOR(r...) \ | |
89 | { \ | |
90 | switch (REG_CNT(r)) { \ | |
91 | case 8: \ | |
92 | __asm( \ | |
93 | "pxor %" VR0(r) ", %" VR4(r) "\n" \ | |
94 | "pxor %" VR1(r) ", %" VR5(r) "\n" \ | |
95 | "pxor %" VR2(r) ", %" VR6(r) "\n" \ | |
96 | "pxor %" VR3(r) ", %" VR7(r)); \ | |
97 | break; \ | |
98 | case 4: \ | |
99 | __asm( \ | |
100 | "pxor %" VR0(r) ", %" VR2(r) "\n" \ | |
101 | "pxor %" VR1(r) ", %" VR3(r)); \ | |
102 | break; \ | |
103 | case 2: \ | |
104 | __asm( \ | |
105 | "pxor %" VR0(r) ", %" VR1(r)); \ | |
106 | break; \ | |
107 | } \ | |
108 | } | |
65d71d42 GN |
109 | |
110 | #define ZERO(r...) XOR(r, r) | |
ae25d222 GN |
111 | |
112 | #define COPY(r...) \ | |
113 | { \ | |
114 | switch (REG_CNT(r)) { \ | |
115 | case 8: \ | |
116 | __asm( \ | |
117 | "movdqa %" VR0(r) ", %" VR4(r) "\n" \ | |
118 | "movdqa %" VR1(r) ", %" VR5(r) "\n" \ | |
119 | "movdqa %" VR2(r) ", %" VR6(r) "\n" \ | |
120 | "movdqa %" VR3(r) ", %" VR7(r)); \ | |
121 | break; \ | |
122 | case 4: \ | |
123 | __asm( \ | |
124 | "movdqa %" VR0(r) ", %" VR2(r) "\n" \ | |
125 | "movdqa %" VR1(r) ", %" VR3(r)); \ | |
126 | break; \ | |
127 | case 2: \ | |
128 | __asm( \ | |
129 | "movdqa %" VR0(r) ", %" VR1(r)); \ | |
130 | break; \ | |
c5858ff9 MM |
131 | default: \ |
132 | VERIFY(0); \ | |
ae25d222 GN |
133 | } \ |
134 | } | |
135 | ||
136 | #define LOAD(src, r...) \ | |
137 | { \ | |
138 | switch (REG_CNT(r)) { \ | |
139 | case 4: \ | |
140 | __asm( \ | |
141 | "movdqa 0x00(%[SRC]), %%" VR0(r) "\n" \ | |
142 | "movdqa 0x10(%[SRC]), %%" VR1(r) "\n" \ | |
143 | "movdqa 0x20(%[SRC]), %%" VR2(r) "\n" \ | |
144 | "movdqa 0x30(%[SRC]), %%" VR3(r) "\n" \ | |
145 | : : [SRC] "r" (src)); \ | |
146 | break; \ | |
147 | case 2: \ | |
148 | __asm( \ | |
149 | "movdqa 0x00(%[SRC]), %%" VR0(r) "\n" \ | |
150 | "movdqa 0x10(%[SRC]), %%" VR1(r) "\n" \ | |
151 | : : [SRC] "r" (src)); \ | |
152 | break; \ | |
153 | case 1: \ | |
154 | __asm( \ | |
155 | "movdqa 0x00(%[SRC]), %%" VR0(r) "\n" \ | |
156 | : : [SRC] "r" (src)); \ | |
157 | break; \ | |
158 | } \ | |
159 | } | |
160 | ||
161 | #define STORE(dst, r...) \ | |
162 | { \ | |
163 | switch (REG_CNT(r)) { \ | |
164 | case 4: \ | |
165 | __asm( \ | |
166 | "movdqa %%" VR0(r)", 0x00(%[DST])\n" \ | |
167 | "movdqa %%" VR1(r)", 0x10(%[DST])\n" \ | |
168 | "movdqa %%" VR2(r)", 0x20(%[DST])\n" \ | |
169 | "movdqa %%" VR3(r)", 0x30(%[DST])\n" \ | |
170 | : : [DST] "r" (dst)); \ | |
171 | break; \ | |
172 | case 2: \ | |
173 | __asm( \ | |
174 | "movdqa %%" VR0(r)", 0x00(%[DST])\n" \ | |
175 | "movdqa %%" VR1(r)", 0x10(%[DST])\n" \ | |
176 | : : [DST] "r" (dst)); \ | |
177 | break; \ | |
178 | case 1: \ | |
179 | __asm( \ | |
180 | "movdqa %%" VR0(r)", 0x00(%[DST])\n" \ | |
181 | : : [DST] "r" (dst)); \ | |
182 | break; \ | |
c5858ff9 MM |
183 | default: \ |
184 | VERIFY(0); \ | |
ae25d222 GN |
185 | } \ |
186 | } | |
187 | ||
188 | #define MUL2_SETUP() \ | |
189 | { \ | |
190 | __asm( \ | |
191 | "movd %[mask], %%xmm15\n" \ | |
192 | "pshufd $0x0, %%xmm15, %%xmm15\n" \ | |
193 | : : [mask] "r" (0x1d1d1d1d)); \ | |
194 | } | |
195 | ||
196 | #define _MUL2_x1(a0) \ | |
197 | { \ | |
198 | __asm( \ | |
199 | "pxor %xmm14, %xmm14\n" \ | |
200 | "pcmpgtb %" a0", %xmm14\n" \ | |
201 | "pand %xmm15, %xmm14\n" \ | |
202 | "paddb %" a0", %" a0 "\n" \ | |
203 | "pxor %xmm14, %" a0); \ | |
204 | } | |
205 | ||
206 | #define _MUL2_x2(a0, a1) \ | |
207 | { \ | |
208 | __asm( \ | |
209 | "pxor %xmm14, %xmm14\n" \ | |
210 | "pxor %xmm13, %xmm13\n" \ | |
211 | "pcmpgtb %" a0", %xmm14\n" \ | |
212 | "pcmpgtb %" a1", %xmm13\n" \ | |
213 | "pand %xmm15, %xmm14\n" \ | |
214 | "pand %xmm15, %xmm13\n" \ | |
215 | "paddb %" a0", %" a0 "\n" \ | |
216 | "paddb %" a1", %" a1 "\n" \ | |
217 | "pxor %xmm14, %" a0 "\n" \ | |
218 | "pxor %xmm13, %" a1); \ | |
219 | } | |
220 | ||
221 | #define MUL2(r...) \ | |
222 | { \ | |
223 | switch (REG_CNT(r)) { \ | |
cbf484f8 GN |
224 | case 4: \ |
225 | _MUL2_x2(VR0(r), VR1(r)); \ | |
226 | _MUL2_x2(VR2(r), VR3(r)); \ | |
227 | break; \ | |
ae25d222 GN |
228 | case 2: \ |
229 | _MUL2_x2(VR0(r), VR1(r)); \ | |
230 | break; \ | |
231 | case 1: \ | |
232 | _MUL2_x1(VR0(r)); \ | |
233 | break; \ | |
234 | } \ | |
235 | } | |
236 | ||
237 | #define MUL4(r...) \ | |
238 | { \ | |
239 | MUL2(r); \ | |
240 | MUL2(r); \ | |
241 | } | |
242 | ||
243 | /* General multiplication by adding powers of two */ | |
244 | ||
245 | #define _MUL_PARAM(x, in, acc) \ | |
246 | { \ | |
65d71d42 | 247 | if (x & 0x01) { COPY(in, acc); } else { ZERO(acc); } \ |
ae25d222 GN |
248 | if (x & 0xfe) { MUL2(in); } \ |
249 | if (x & 0x02) { XOR(in, acc); } \ | |
250 | if (x & 0xfc) { MUL2(in); } \ | |
251 | if (x & 0x04) { XOR(in, acc); } \ | |
252 | if (x & 0xf8) { MUL2(in); } \ | |
253 | if (x & 0x08) { XOR(in, acc); } \ | |
254 | if (x & 0xf0) { MUL2(in); } \ | |
255 | if (x & 0x10) { XOR(in, acc); } \ | |
256 | if (x & 0xe0) { MUL2(in); } \ | |
257 | if (x & 0x20) { XOR(in, acc); } \ | |
258 | if (x & 0xc0) { MUL2(in); } \ | |
259 | if (x & 0x40) { XOR(in, acc); } \ | |
260 | if (x & 0x80) { MUL2(in); XOR(in, acc); } \ | |
261 | } | |
262 | ||
cbf484f8 GN |
263 | #define _mul_x1_in 11 |
264 | #define _mul_x1_acc 12 | |
ae25d222 GN |
265 | |
266 | #define MUL_x1_DEFINE(x) \ | |
267 | static void \ | |
268 | mul_x1_ ## x(void) { _MUL_PARAM(x, _mul_x1_in, _mul_x1_acc); } | |
269 | ||
270 | #define _mul_x2_in 9, 10 | |
271 | #define _mul_x2_acc 11, 12 | |
272 | ||
273 | #define MUL_x2_DEFINE(x) \ | |
274 | static void \ | |
275 | mul_x2_ ## x(void) { _MUL_PARAM(x, _mul_x2_in, _mul_x2_acc); } | |
276 | ||
277 | MUL_x1_DEFINE(0); MUL_x1_DEFINE(1); MUL_x1_DEFINE(2); MUL_x1_DEFINE(3); | |
278 | MUL_x1_DEFINE(4); MUL_x1_DEFINE(5); MUL_x1_DEFINE(6); MUL_x1_DEFINE(7); | |
279 | MUL_x1_DEFINE(8); MUL_x1_DEFINE(9); MUL_x1_DEFINE(10); MUL_x1_DEFINE(11); | |
280 | MUL_x1_DEFINE(12); MUL_x1_DEFINE(13); MUL_x1_DEFINE(14); MUL_x1_DEFINE(15); | |
281 | MUL_x1_DEFINE(16); MUL_x1_DEFINE(17); MUL_x1_DEFINE(18); MUL_x1_DEFINE(19); | |
282 | MUL_x1_DEFINE(20); MUL_x1_DEFINE(21); MUL_x1_DEFINE(22); MUL_x1_DEFINE(23); | |
283 | MUL_x1_DEFINE(24); MUL_x1_DEFINE(25); MUL_x1_DEFINE(26); MUL_x1_DEFINE(27); | |
284 | MUL_x1_DEFINE(28); MUL_x1_DEFINE(29); MUL_x1_DEFINE(30); MUL_x1_DEFINE(31); | |
285 | MUL_x1_DEFINE(32); MUL_x1_DEFINE(33); MUL_x1_DEFINE(34); MUL_x1_DEFINE(35); | |
286 | MUL_x1_DEFINE(36); MUL_x1_DEFINE(37); MUL_x1_DEFINE(38); MUL_x1_DEFINE(39); | |
287 | MUL_x1_DEFINE(40); MUL_x1_DEFINE(41); MUL_x1_DEFINE(42); MUL_x1_DEFINE(43); | |
288 | MUL_x1_DEFINE(44); MUL_x1_DEFINE(45); MUL_x1_DEFINE(46); MUL_x1_DEFINE(47); | |
289 | MUL_x1_DEFINE(48); MUL_x1_DEFINE(49); MUL_x1_DEFINE(50); MUL_x1_DEFINE(51); | |
290 | MUL_x1_DEFINE(52); MUL_x1_DEFINE(53); MUL_x1_DEFINE(54); MUL_x1_DEFINE(55); | |
291 | MUL_x1_DEFINE(56); MUL_x1_DEFINE(57); MUL_x1_DEFINE(58); MUL_x1_DEFINE(59); | |
292 | MUL_x1_DEFINE(60); MUL_x1_DEFINE(61); MUL_x1_DEFINE(62); MUL_x1_DEFINE(63); | |
293 | MUL_x1_DEFINE(64); MUL_x1_DEFINE(65); MUL_x1_DEFINE(66); MUL_x1_DEFINE(67); | |
294 | MUL_x1_DEFINE(68); MUL_x1_DEFINE(69); MUL_x1_DEFINE(70); MUL_x1_DEFINE(71); | |
295 | MUL_x1_DEFINE(72); MUL_x1_DEFINE(73); MUL_x1_DEFINE(74); MUL_x1_DEFINE(75); | |
296 | MUL_x1_DEFINE(76); MUL_x1_DEFINE(77); MUL_x1_DEFINE(78); MUL_x1_DEFINE(79); | |
297 | MUL_x1_DEFINE(80); MUL_x1_DEFINE(81); MUL_x1_DEFINE(82); MUL_x1_DEFINE(83); | |
298 | MUL_x1_DEFINE(84); MUL_x1_DEFINE(85); MUL_x1_DEFINE(86); MUL_x1_DEFINE(87); | |
299 | MUL_x1_DEFINE(88); MUL_x1_DEFINE(89); MUL_x1_DEFINE(90); MUL_x1_DEFINE(91); | |
300 | MUL_x1_DEFINE(92); MUL_x1_DEFINE(93); MUL_x1_DEFINE(94); MUL_x1_DEFINE(95); | |
301 | MUL_x1_DEFINE(96); MUL_x1_DEFINE(97); MUL_x1_DEFINE(98); MUL_x1_DEFINE(99); | |
302 | MUL_x1_DEFINE(100); MUL_x1_DEFINE(101); MUL_x1_DEFINE(102); MUL_x1_DEFINE(103); | |
303 | MUL_x1_DEFINE(104); MUL_x1_DEFINE(105); MUL_x1_DEFINE(106); MUL_x1_DEFINE(107); | |
304 | MUL_x1_DEFINE(108); MUL_x1_DEFINE(109); MUL_x1_DEFINE(110); MUL_x1_DEFINE(111); | |
305 | MUL_x1_DEFINE(112); MUL_x1_DEFINE(113); MUL_x1_DEFINE(114); MUL_x1_DEFINE(115); | |
306 | MUL_x1_DEFINE(116); MUL_x1_DEFINE(117); MUL_x1_DEFINE(118); MUL_x1_DEFINE(119); | |
307 | MUL_x1_DEFINE(120); MUL_x1_DEFINE(121); MUL_x1_DEFINE(122); MUL_x1_DEFINE(123); | |
308 | MUL_x1_DEFINE(124); MUL_x1_DEFINE(125); MUL_x1_DEFINE(126); MUL_x1_DEFINE(127); | |
309 | MUL_x1_DEFINE(128); MUL_x1_DEFINE(129); MUL_x1_DEFINE(130); MUL_x1_DEFINE(131); | |
310 | MUL_x1_DEFINE(132); MUL_x1_DEFINE(133); MUL_x1_DEFINE(134); MUL_x1_DEFINE(135); | |
311 | MUL_x1_DEFINE(136); MUL_x1_DEFINE(137); MUL_x1_DEFINE(138); MUL_x1_DEFINE(139); | |
312 | MUL_x1_DEFINE(140); MUL_x1_DEFINE(141); MUL_x1_DEFINE(142); MUL_x1_DEFINE(143); | |
313 | MUL_x1_DEFINE(144); MUL_x1_DEFINE(145); MUL_x1_DEFINE(146); MUL_x1_DEFINE(147); | |
314 | MUL_x1_DEFINE(148); MUL_x1_DEFINE(149); MUL_x1_DEFINE(150); MUL_x1_DEFINE(151); | |
315 | MUL_x1_DEFINE(152); MUL_x1_DEFINE(153); MUL_x1_DEFINE(154); MUL_x1_DEFINE(155); | |
316 | MUL_x1_DEFINE(156); MUL_x1_DEFINE(157); MUL_x1_DEFINE(158); MUL_x1_DEFINE(159); | |
317 | MUL_x1_DEFINE(160); MUL_x1_DEFINE(161); MUL_x1_DEFINE(162); MUL_x1_DEFINE(163); | |
318 | MUL_x1_DEFINE(164); MUL_x1_DEFINE(165); MUL_x1_DEFINE(166); MUL_x1_DEFINE(167); | |
319 | MUL_x1_DEFINE(168); MUL_x1_DEFINE(169); MUL_x1_DEFINE(170); MUL_x1_DEFINE(171); | |
320 | MUL_x1_DEFINE(172); MUL_x1_DEFINE(173); MUL_x1_DEFINE(174); MUL_x1_DEFINE(175); | |
321 | MUL_x1_DEFINE(176); MUL_x1_DEFINE(177); MUL_x1_DEFINE(178); MUL_x1_DEFINE(179); | |
322 | MUL_x1_DEFINE(180); MUL_x1_DEFINE(181); MUL_x1_DEFINE(182); MUL_x1_DEFINE(183); | |
323 | MUL_x1_DEFINE(184); MUL_x1_DEFINE(185); MUL_x1_DEFINE(186); MUL_x1_DEFINE(187); | |
324 | MUL_x1_DEFINE(188); MUL_x1_DEFINE(189); MUL_x1_DEFINE(190); MUL_x1_DEFINE(191); | |
325 | MUL_x1_DEFINE(192); MUL_x1_DEFINE(193); MUL_x1_DEFINE(194); MUL_x1_DEFINE(195); | |
326 | MUL_x1_DEFINE(196); MUL_x1_DEFINE(197); MUL_x1_DEFINE(198); MUL_x1_DEFINE(199); | |
327 | MUL_x1_DEFINE(200); MUL_x1_DEFINE(201); MUL_x1_DEFINE(202); MUL_x1_DEFINE(203); | |
328 | MUL_x1_DEFINE(204); MUL_x1_DEFINE(205); MUL_x1_DEFINE(206); MUL_x1_DEFINE(207); | |
329 | MUL_x1_DEFINE(208); MUL_x1_DEFINE(209); MUL_x1_DEFINE(210); MUL_x1_DEFINE(211); | |
330 | MUL_x1_DEFINE(212); MUL_x1_DEFINE(213); MUL_x1_DEFINE(214); MUL_x1_DEFINE(215); | |
331 | MUL_x1_DEFINE(216); MUL_x1_DEFINE(217); MUL_x1_DEFINE(218); MUL_x1_DEFINE(219); | |
332 | MUL_x1_DEFINE(220); MUL_x1_DEFINE(221); MUL_x1_DEFINE(222); MUL_x1_DEFINE(223); | |
333 | MUL_x1_DEFINE(224); MUL_x1_DEFINE(225); MUL_x1_DEFINE(226); MUL_x1_DEFINE(227); | |
334 | MUL_x1_DEFINE(228); MUL_x1_DEFINE(229); MUL_x1_DEFINE(230); MUL_x1_DEFINE(231); | |
335 | MUL_x1_DEFINE(232); MUL_x1_DEFINE(233); MUL_x1_DEFINE(234); MUL_x1_DEFINE(235); | |
336 | MUL_x1_DEFINE(236); MUL_x1_DEFINE(237); MUL_x1_DEFINE(238); MUL_x1_DEFINE(239); | |
337 | MUL_x1_DEFINE(240); MUL_x1_DEFINE(241); MUL_x1_DEFINE(242); MUL_x1_DEFINE(243); | |
338 | MUL_x1_DEFINE(244); MUL_x1_DEFINE(245); MUL_x1_DEFINE(246); MUL_x1_DEFINE(247); | |
339 | MUL_x1_DEFINE(248); MUL_x1_DEFINE(249); MUL_x1_DEFINE(250); MUL_x1_DEFINE(251); | |
340 | MUL_x1_DEFINE(252); MUL_x1_DEFINE(253); MUL_x1_DEFINE(254); MUL_x1_DEFINE(255); | |
341 | ||
342 | MUL_x2_DEFINE(0); MUL_x2_DEFINE(1); MUL_x2_DEFINE(2); MUL_x2_DEFINE(3); | |
343 | MUL_x2_DEFINE(4); MUL_x2_DEFINE(5); MUL_x2_DEFINE(6); MUL_x2_DEFINE(7); | |
344 | MUL_x2_DEFINE(8); MUL_x2_DEFINE(9); MUL_x2_DEFINE(10); MUL_x2_DEFINE(11); | |
345 | MUL_x2_DEFINE(12); MUL_x2_DEFINE(13); MUL_x2_DEFINE(14); MUL_x2_DEFINE(15); | |
346 | MUL_x2_DEFINE(16); MUL_x2_DEFINE(17); MUL_x2_DEFINE(18); MUL_x2_DEFINE(19); | |
347 | MUL_x2_DEFINE(20); MUL_x2_DEFINE(21); MUL_x2_DEFINE(22); MUL_x2_DEFINE(23); | |
348 | MUL_x2_DEFINE(24); MUL_x2_DEFINE(25); MUL_x2_DEFINE(26); MUL_x2_DEFINE(27); | |
349 | MUL_x2_DEFINE(28); MUL_x2_DEFINE(29); MUL_x2_DEFINE(30); MUL_x2_DEFINE(31); | |
350 | MUL_x2_DEFINE(32); MUL_x2_DEFINE(33); MUL_x2_DEFINE(34); MUL_x2_DEFINE(35); | |
351 | MUL_x2_DEFINE(36); MUL_x2_DEFINE(37); MUL_x2_DEFINE(38); MUL_x2_DEFINE(39); | |
352 | MUL_x2_DEFINE(40); MUL_x2_DEFINE(41); MUL_x2_DEFINE(42); MUL_x2_DEFINE(43); | |
353 | MUL_x2_DEFINE(44); MUL_x2_DEFINE(45); MUL_x2_DEFINE(46); MUL_x2_DEFINE(47); | |
354 | MUL_x2_DEFINE(48); MUL_x2_DEFINE(49); MUL_x2_DEFINE(50); MUL_x2_DEFINE(51); | |
355 | MUL_x2_DEFINE(52); MUL_x2_DEFINE(53); MUL_x2_DEFINE(54); MUL_x2_DEFINE(55); | |
356 | MUL_x2_DEFINE(56); MUL_x2_DEFINE(57); MUL_x2_DEFINE(58); MUL_x2_DEFINE(59); | |
357 | MUL_x2_DEFINE(60); MUL_x2_DEFINE(61); MUL_x2_DEFINE(62); MUL_x2_DEFINE(63); | |
358 | MUL_x2_DEFINE(64); MUL_x2_DEFINE(65); MUL_x2_DEFINE(66); MUL_x2_DEFINE(67); | |
359 | MUL_x2_DEFINE(68); MUL_x2_DEFINE(69); MUL_x2_DEFINE(70); MUL_x2_DEFINE(71); | |
360 | MUL_x2_DEFINE(72); MUL_x2_DEFINE(73); MUL_x2_DEFINE(74); MUL_x2_DEFINE(75); | |
361 | MUL_x2_DEFINE(76); MUL_x2_DEFINE(77); MUL_x2_DEFINE(78); MUL_x2_DEFINE(79); | |
362 | MUL_x2_DEFINE(80); MUL_x2_DEFINE(81); MUL_x2_DEFINE(82); MUL_x2_DEFINE(83); | |
363 | MUL_x2_DEFINE(84); MUL_x2_DEFINE(85); MUL_x2_DEFINE(86); MUL_x2_DEFINE(87); | |
364 | MUL_x2_DEFINE(88); MUL_x2_DEFINE(89); MUL_x2_DEFINE(90); MUL_x2_DEFINE(91); | |
365 | MUL_x2_DEFINE(92); MUL_x2_DEFINE(93); MUL_x2_DEFINE(94); MUL_x2_DEFINE(95); | |
366 | MUL_x2_DEFINE(96); MUL_x2_DEFINE(97); MUL_x2_DEFINE(98); MUL_x2_DEFINE(99); | |
367 | MUL_x2_DEFINE(100); MUL_x2_DEFINE(101); MUL_x2_DEFINE(102); MUL_x2_DEFINE(103); | |
368 | MUL_x2_DEFINE(104); MUL_x2_DEFINE(105); MUL_x2_DEFINE(106); MUL_x2_DEFINE(107); | |
369 | MUL_x2_DEFINE(108); MUL_x2_DEFINE(109); MUL_x2_DEFINE(110); MUL_x2_DEFINE(111); | |
370 | MUL_x2_DEFINE(112); MUL_x2_DEFINE(113); MUL_x2_DEFINE(114); MUL_x2_DEFINE(115); | |
371 | MUL_x2_DEFINE(116); MUL_x2_DEFINE(117); MUL_x2_DEFINE(118); MUL_x2_DEFINE(119); | |
372 | MUL_x2_DEFINE(120); MUL_x2_DEFINE(121); MUL_x2_DEFINE(122); MUL_x2_DEFINE(123); | |
373 | MUL_x2_DEFINE(124); MUL_x2_DEFINE(125); MUL_x2_DEFINE(126); MUL_x2_DEFINE(127); | |
374 | MUL_x2_DEFINE(128); MUL_x2_DEFINE(129); MUL_x2_DEFINE(130); MUL_x2_DEFINE(131); | |
375 | MUL_x2_DEFINE(132); MUL_x2_DEFINE(133); MUL_x2_DEFINE(134); MUL_x2_DEFINE(135); | |
376 | MUL_x2_DEFINE(136); MUL_x2_DEFINE(137); MUL_x2_DEFINE(138); MUL_x2_DEFINE(139); | |
377 | MUL_x2_DEFINE(140); MUL_x2_DEFINE(141); MUL_x2_DEFINE(142); MUL_x2_DEFINE(143); | |
378 | MUL_x2_DEFINE(144); MUL_x2_DEFINE(145); MUL_x2_DEFINE(146); MUL_x2_DEFINE(147); | |
379 | MUL_x2_DEFINE(148); MUL_x2_DEFINE(149); MUL_x2_DEFINE(150); MUL_x2_DEFINE(151); | |
380 | MUL_x2_DEFINE(152); MUL_x2_DEFINE(153); MUL_x2_DEFINE(154); MUL_x2_DEFINE(155); | |
381 | MUL_x2_DEFINE(156); MUL_x2_DEFINE(157); MUL_x2_DEFINE(158); MUL_x2_DEFINE(159); | |
382 | MUL_x2_DEFINE(160); MUL_x2_DEFINE(161); MUL_x2_DEFINE(162); MUL_x2_DEFINE(163); | |
383 | MUL_x2_DEFINE(164); MUL_x2_DEFINE(165); MUL_x2_DEFINE(166); MUL_x2_DEFINE(167); | |
384 | MUL_x2_DEFINE(168); MUL_x2_DEFINE(169); MUL_x2_DEFINE(170); MUL_x2_DEFINE(171); | |
385 | MUL_x2_DEFINE(172); MUL_x2_DEFINE(173); MUL_x2_DEFINE(174); MUL_x2_DEFINE(175); | |
386 | MUL_x2_DEFINE(176); MUL_x2_DEFINE(177); MUL_x2_DEFINE(178); MUL_x2_DEFINE(179); | |
387 | MUL_x2_DEFINE(180); MUL_x2_DEFINE(181); MUL_x2_DEFINE(182); MUL_x2_DEFINE(183); | |
388 | MUL_x2_DEFINE(184); MUL_x2_DEFINE(185); MUL_x2_DEFINE(186); MUL_x2_DEFINE(187); | |
389 | MUL_x2_DEFINE(188); MUL_x2_DEFINE(189); MUL_x2_DEFINE(190); MUL_x2_DEFINE(191); | |
390 | MUL_x2_DEFINE(192); MUL_x2_DEFINE(193); MUL_x2_DEFINE(194); MUL_x2_DEFINE(195); | |
391 | MUL_x2_DEFINE(196); MUL_x2_DEFINE(197); MUL_x2_DEFINE(198); MUL_x2_DEFINE(199); | |
392 | MUL_x2_DEFINE(200); MUL_x2_DEFINE(201); MUL_x2_DEFINE(202); MUL_x2_DEFINE(203); | |
393 | MUL_x2_DEFINE(204); MUL_x2_DEFINE(205); MUL_x2_DEFINE(206); MUL_x2_DEFINE(207); | |
394 | MUL_x2_DEFINE(208); MUL_x2_DEFINE(209); MUL_x2_DEFINE(210); MUL_x2_DEFINE(211); | |
395 | MUL_x2_DEFINE(212); MUL_x2_DEFINE(213); MUL_x2_DEFINE(214); MUL_x2_DEFINE(215); | |
396 | MUL_x2_DEFINE(216); MUL_x2_DEFINE(217); MUL_x2_DEFINE(218); MUL_x2_DEFINE(219); | |
397 | MUL_x2_DEFINE(220); MUL_x2_DEFINE(221); MUL_x2_DEFINE(222); MUL_x2_DEFINE(223); | |
398 | MUL_x2_DEFINE(224); MUL_x2_DEFINE(225); MUL_x2_DEFINE(226); MUL_x2_DEFINE(227); | |
399 | MUL_x2_DEFINE(228); MUL_x2_DEFINE(229); MUL_x2_DEFINE(230); MUL_x2_DEFINE(231); | |
400 | MUL_x2_DEFINE(232); MUL_x2_DEFINE(233); MUL_x2_DEFINE(234); MUL_x2_DEFINE(235); | |
401 | MUL_x2_DEFINE(236); MUL_x2_DEFINE(237); MUL_x2_DEFINE(238); MUL_x2_DEFINE(239); | |
402 | MUL_x2_DEFINE(240); MUL_x2_DEFINE(241); MUL_x2_DEFINE(242); MUL_x2_DEFINE(243); | |
403 | MUL_x2_DEFINE(244); MUL_x2_DEFINE(245); MUL_x2_DEFINE(246); MUL_x2_DEFINE(247); | |
404 | MUL_x2_DEFINE(248); MUL_x2_DEFINE(249); MUL_x2_DEFINE(250); MUL_x2_DEFINE(251); | |
405 | MUL_x2_DEFINE(252); MUL_x2_DEFINE(253); MUL_x2_DEFINE(254); MUL_x2_DEFINE(255); | |
406 | ||
407 | ||
408 | ||
409 | typedef void (*mul_fn_ptr_t)(void); | |
410 | ||
411 | static const mul_fn_ptr_t __attribute__((aligned(256))) | |
412 | gf_x1_mul_fns[256] = { | |
413 | mul_x1_0, mul_x1_1, mul_x1_2, mul_x1_3, mul_x1_4, mul_x1_5, | |
414 | mul_x1_6, mul_x1_7, mul_x1_8, mul_x1_9, mul_x1_10, mul_x1_11, | |
415 | mul_x1_12, mul_x1_13, mul_x1_14, mul_x1_15, mul_x1_16, mul_x1_17, | |
416 | mul_x1_18, mul_x1_19, mul_x1_20, mul_x1_21, mul_x1_22, mul_x1_23, | |
417 | mul_x1_24, mul_x1_25, mul_x1_26, mul_x1_27, mul_x1_28, mul_x1_29, | |
418 | mul_x1_30, mul_x1_31, mul_x1_32, mul_x1_33, mul_x1_34, mul_x1_35, | |
419 | mul_x1_36, mul_x1_37, mul_x1_38, mul_x1_39, mul_x1_40, mul_x1_41, | |
420 | mul_x1_42, mul_x1_43, mul_x1_44, mul_x1_45, mul_x1_46, mul_x1_47, | |
421 | mul_x1_48, mul_x1_49, mul_x1_50, mul_x1_51, mul_x1_52, mul_x1_53, | |
422 | mul_x1_54, mul_x1_55, mul_x1_56, mul_x1_57, mul_x1_58, mul_x1_59, | |
423 | mul_x1_60, mul_x1_61, mul_x1_62, mul_x1_63, mul_x1_64, mul_x1_65, | |
424 | mul_x1_66, mul_x1_67, mul_x1_68, mul_x1_69, mul_x1_70, mul_x1_71, | |
425 | mul_x1_72, mul_x1_73, mul_x1_74, mul_x1_75, mul_x1_76, mul_x1_77, | |
426 | mul_x1_78, mul_x1_79, mul_x1_80, mul_x1_81, mul_x1_82, mul_x1_83, | |
427 | mul_x1_84, mul_x1_85, mul_x1_86, mul_x1_87, mul_x1_88, mul_x1_89, | |
428 | mul_x1_90, mul_x1_91, mul_x1_92, mul_x1_93, mul_x1_94, mul_x1_95, | |
429 | mul_x1_96, mul_x1_97, mul_x1_98, mul_x1_99, mul_x1_100, mul_x1_101, | |
430 | mul_x1_102, mul_x1_103, mul_x1_104, mul_x1_105, mul_x1_106, mul_x1_107, | |
431 | mul_x1_108, mul_x1_109, mul_x1_110, mul_x1_111, mul_x1_112, mul_x1_113, | |
432 | mul_x1_114, mul_x1_115, mul_x1_116, mul_x1_117, mul_x1_118, mul_x1_119, | |
433 | mul_x1_120, mul_x1_121, mul_x1_122, mul_x1_123, mul_x1_124, mul_x1_125, | |
434 | mul_x1_126, mul_x1_127, mul_x1_128, mul_x1_129, mul_x1_130, mul_x1_131, | |
435 | mul_x1_132, mul_x1_133, mul_x1_134, mul_x1_135, mul_x1_136, mul_x1_137, | |
436 | mul_x1_138, mul_x1_139, mul_x1_140, mul_x1_141, mul_x1_142, mul_x1_143, | |
437 | mul_x1_144, mul_x1_145, mul_x1_146, mul_x1_147, mul_x1_148, mul_x1_149, | |
438 | mul_x1_150, mul_x1_151, mul_x1_152, mul_x1_153, mul_x1_154, mul_x1_155, | |
439 | mul_x1_156, mul_x1_157, mul_x1_158, mul_x1_159, mul_x1_160, mul_x1_161, | |
440 | mul_x1_162, mul_x1_163, mul_x1_164, mul_x1_165, mul_x1_166, mul_x1_167, | |
441 | mul_x1_168, mul_x1_169, mul_x1_170, mul_x1_171, mul_x1_172, mul_x1_173, | |
442 | mul_x1_174, mul_x1_175, mul_x1_176, mul_x1_177, mul_x1_178, mul_x1_179, | |
443 | mul_x1_180, mul_x1_181, mul_x1_182, mul_x1_183, mul_x1_184, mul_x1_185, | |
444 | mul_x1_186, mul_x1_187, mul_x1_188, mul_x1_189, mul_x1_190, mul_x1_191, | |
445 | mul_x1_192, mul_x1_193, mul_x1_194, mul_x1_195, mul_x1_196, mul_x1_197, | |
446 | mul_x1_198, mul_x1_199, mul_x1_200, mul_x1_201, mul_x1_202, mul_x1_203, | |
447 | mul_x1_204, mul_x1_205, mul_x1_206, mul_x1_207, mul_x1_208, mul_x1_209, | |
448 | mul_x1_210, mul_x1_211, mul_x1_212, mul_x1_213, mul_x1_214, mul_x1_215, | |
449 | mul_x1_216, mul_x1_217, mul_x1_218, mul_x1_219, mul_x1_220, mul_x1_221, | |
450 | mul_x1_222, mul_x1_223, mul_x1_224, mul_x1_225, mul_x1_226, mul_x1_227, | |
451 | mul_x1_228, mul_x1_229, mul_x1_230, mul_x1_231, mul_x1_232, mul_x1_233, | |
452 | mul_x1_234, mul_x1_235, mul_x1_236, mul_x1_237, mul_x1_238, mul_x1_239, | |
453 | mul_x1_240, mul_x1_241, mul_x1_242, mul_x1_243, mul_x1_244, mul_x1_245, | |
454 | mul_x1_246, mul_x1_247, mul_x1_248, mul_x1_249, mul_x1_250, mul_x1_251, | |
455 | mul_x1_252, mul_x1_253, mul_x1_254, mul_x1_255 | |
456 | }; | |
457 | ||
458 | static const mul_fn_ptr_t __attribute__((aligned(256))) | |
459 | gf_x2_mul_fns[256] = { | |
460 | mul_x2_0, mul_x2_1, mul_x2_2, mul_x2_3, mul_x2_4, mul_x2_5, | |
461 | mul_x2_6, mul_x2_7, mul_x2_8, mul_x2_9, mul_x2_10, mul_x2_11, | |
462 | mul_x2_12, mul_x2_13, mul_x2_14, mul_x2_15, mul_x2_16, mul_x2_17, | |
463 | mul_x2_18, mul_x2_19, mul_x2_20, mul_x2_21, mul_x2_22, mul_x2_23, | |
464 | mul_x2_24, mul_x2_25, mul_x2_26, mul_x2_27, mul_x2_28, mul_x2_29, | |
465 | mul_x2_30, mul_x2_31, mul_x2_32, mul_x2_33, mul_x2_34, mul_x2_35, | |
466 | mul_x2_36, mul_x2_37, mul_x2_38, mul_x2_39, mul_x2_40, mul_x2_41, | |
467 | mul_x2_42, mul_x2_43, mul_x2_44, mul_x2_45, mul_x2_46, mul_x2_47, | |
468 | mul_x2_48, mul_x2_49, mul_x2_50, mul_x2_51, mul_x2_52, mul_x2_53, | |
469 | mul_x2_54, mul_x2_55, mul_x2_56, mul_x2_57, mul_x2_58, mul_x2_59, | |
470 | mul_x2_60, mul_x2_61, mul_x2_62, mul_x2_63, mul_x2_64, mul_x2_65, | |
471 | mul_x2_66, mul_x2_67, mul_x2_68, mul_x2_69, mul_x2_70, mul_x2_71, | |
472 | mul_x2_72, mul_x2_73, mul_x2_74, mul_x2_75, mul_x2_76, mul_x2_77, | |
473 | mul_x2_78, mul_x2_79, mul_x2_80, mul_x2_81, mul_x2_82, mul_x2_83, | |
474 | mul_x2_84, mul_x2_85, mul_x2_86, mul_x2_87, mul_x2_88, mul_x2_89, | |
475 | mul_x2_90, mul_x2_91, mul_x2_92, mul_x2_93, mul_x2_94, mul_x2_95, | |
476 | mul_x2_96, mul_x2_97, mul_x2_98, mul_x2_99, mul_x2_100, mul_x2_101, | |
477 | mul_x2_102, mul_x2_103, mul_x2_104, mul_x2_105, mul_x2_106, mul_x2_107, | |
478 | mul_x2_108, mul_x2_109, mul_x2_110, mul_x2_111, mul_x2_112, mul_x2_113, | |
479 | mul_x2_114, mul_x2_115, mul_x2_116, mul_x2_117, mul_x2_118, mul_x2_119, | |
480 | mul_x2_120, mul_x2_121, mul_x2_122, mul_x2_123, mul_x2_124, mul_x2_125, | |
481 | mul_x2_126, mul_x2_127, mul_x2_128, mul_x2_129, mul_x2_130, mul_x2_131, | |
482 | mul_x2_132, mul_x2_133, mul_x2_134, mul_x2_135, mul_x2_136, mul_x2_137, | |
483 | mul_x2_138, mul_x2_139, mul_x2_140, mul_x2_141, mul_x2_142, mul_x2_143, | |
484 | mul_x2_144, mul_x2_145, mul_x2_146, mul_x2_147, mul_x2_148, mul_x2_149, | |
485 | mul_x2_150, mul_x2_151, mul_x2_152, mul_x2_153, mul_x2_154, mul_x2_155, | |
486 | mul_x2_156, mul_x2_157, mul_x2_158, mul_x2_159, mul_x2_160, mul_x2_161, | |
487 | mul_x2_162, mul_x2_163, mul_x2_164, mul_x2_165, mul_x2_166, mul_x2_167, | |
488 | mul_x2_168, mul_x2_169, mul_x2_170, mul_x2_171, mul_x2_172, mul_x2_173, | |
489 | mul_x2_174, mul_x2_175, mul_x2_176, mul_x2_177, mul_x2_178, mul_x2_179, | |
490 | mul_x2_180, mul_x2_181, mul_x2_182, mul_x2_183, mul_x2_184, mul_x2_185, | |
491 | mul_x2_186, mul_x2_187, mul_x2_188, mul_x2_189, mul_x2_190, mul_x2_191, | |
492 | mul_x2_192, mul_x2_193, mul_x2_194, mul_x2_195, mul_x2_196, mul_x2_197, | |
493 | mul_x2_198, mul_x2_199, mul_x2_200, mul_x2_201, mul_x2_202, mul_x2_203, | |
494 | mul_x2_204, mul_x2_205, mul_x2_206, mul_x2_207, mul_x2_208, mul_x2_209, | |
495 | mul_x2_210, mul_x2_211, mul_x2_212, mul_x2_213, mul_x2_214, mul_x2_215, | |
496 | mul_x2_216, mul_x2_217, mul_x2_218, mul_x2_219, mul_x2_220, mul_x2_221, | |
497 | mul_x2_222, mul_x2_223, mul_x2_224, mul_x2_225, mul_x2_226, mul_x2_227, | |
498 | mul_x2_228, mul_x2_229, mul_x2_230, mul_x2_231, mul_x2_232, mul_x2_233, | |
499 | mul_x2_234, mul_x2_235, mul_x2_236, mul_x2_237, mul_x2_238, mul_x2_239, | |
500 | mul_x2_240, mul_x2_241, mul_x2_242, mul_x2_243, mul_x2_244, mul_x2_245, | |
501 | mul_x2_246, mul_x2_247, mul_x2_248, mul_x2_249, mul_x2_250, mul_x2_251, | |
502 | mul_x2_252, mul_x2_253, mul_x2_254, mul_x2_255 | |
503 | }; | |
504 | ||
505 | #define MUL(c, r...) \ | |
506 | { \ | |
507 | switch (REG_CNT(r)) { \ | |
508 | case 2: \ | |
509 | COPY(r, _mul_x2_in); \ | |
510 | gf_x2_mul_fns[c](); \ | |
511 | COPY(_mul_x2_acc, r); \ | |
512 | break; \ | |
513 | case 1: \ | |
514 | COPY(r, _mul_x1_in); \ | |
515 | gf_x1_mul_fns[c](); \ | |
516 | COPY(_mul_x1_acc, r); \ | |
517 | break; \ | |
c5858ff9 MM |
518 | default: \ |
519 | VERIFY(0); \ | |
ae25d222 GN |
520 | } \ |
521 | } | |
522 | ||
523 | ||
524 | #define raidz_math_begin() kfpu_begin() | |
525 | #define raidz_math_end() kfpu_end() | |
526 | ||
cbf484f8 GN |
527 | #define SYN_STRIDE 4 |
528 | ||
529 | #define ZERO_STRIDE 4 | |
530 | #define ZERO_DEFINE() {} | |
531 | #define ZERO_D 0, 1, 2, 3 | |
532 | ||
533 | #define COPY_STRIDE 4 | |
534 | #define COPY_DEFINE() {} | |
535 | #define COPY_D 0, 1, 2, 3 | |
536 | ||
537 | #define ADD_STRIDE 4 | |
538 | #define ADD_DEFINE() {} | |
539 | #define ADD_D 0, 1, 2, 3 | |
540 | ||
541 | #define MUL_STRIDE 2 | |
65d71d42 | 542 | #define MUL_DEFINE() MUL2_SETUP() |
cbf484f8 GN |
543 | #define MUL_D 0, 1 |
544 | ||
ae25d222 | 545 | #define GEN_P_STRIDE 4 |
cbf484f8 | 546 | #define GEN_P_DEFINE() {} |
ae25d222 GN |
547 | #define GEN_P_P 0, 1, 2, 3 |
548 | ||
cbf484f8 | 549 | #define GEN_PQ_STRIDE 4 |
ae25d222 | 550 | #define GEN_PQ_DEFINE() {} |
cbf484f8 GN |
551 | #define GEN_PQ_D 0, 1, 2, 3 |
552 | #define GEN_PQ_C 4, 5, 6, 7 | |
ae25d222 | 553 | |
cbf484f8 | 554 | #define GEN_PQR_STRIDE 4 |
ae25d222 | 555 | #define GEN_PQR_DEFINE() {} |
cbf484f8 GN |
556 | #define GEN_PQR_D 0, 1, 2, 3 |
557 | #define GEN_PQR_C 4, 5, 6, 7 | |
ae25d222 | 558 | |
cbf484f8 GN |
559 | #define SYN_Q_DEFINE() {} |
560 | #define SYN_Q_D 0, 1, 2, 3 | |
561 | #define SYN_Q_X 4, 5, 6, 7 | |
ae25d222 | 562 | |
cbf484f8 GN |
563 | #define SYN_R_DEFINE() {} |
564 | #define SYN_R_D 0, 1, 2, 3 | |
565 | #define SYN_R_X 4, 5, 6, 7 | |
ae25d222 | 566 | |
cbf484f8 GN |
567 | #define SYN_PQ_DEFINE() {} |
568 | #define SYN_PQ_D 0, 1, 2, 3 | |
569 | #define SYN_PQ_X 4, 5, 6, 7 | |
ae25d222 | 570 | |
ae25d222 | 571 | #define REC_PQ_STRIDE 2 |
65d71d42 | 572 | #define REC_PQ_DEFINE() MUL2_SETUP() |
ae25d222 GN |
573 | #define REC_PQ_X 0, 1 |
574 | #define REC_PQ_Y 2, 3 | |
cbf484f8 GN |
575 | #define REC_PQ_T 4, 5 |
576 | ||
577 | #define SYN_PR_DEFINE() {} | |
578 | #define SYN_PR_D 0, 1, 2, 3 | |
579 | #define SYN_PR_X 4, 5, 6, 7 | |
ae25d222 | 580 | |
ae25d222 | 581 | #define REC_PR_STRIDE 2 |
65d71d42 | 582 | #define REC_PR_DEFINE() MUL2_SETUP() |
ae25d222 GN |
583 | #define REC_PR_X 0, 1 |
584 | #define REC_PR_Y 2, 3 | |
cbf484f8 GN |
585 | #define REC_PR_T 4, 5 |
586 | ||
587 | #define SYN_QR_DEFINE() {} | |
588 | #define SYN_QR_D 0, 1, 2, 3 | |
589 | #define SYN_QR_X 4, 5, 6, 7 | |
ae25d222 | 590 | |
ae25d222 | 591 | #define REC_QR_STRIDE 2 |
65d71d42 | 592 | #define REC_QR_DEFINE() MUL2_SETUP() |
ae25d222 GN |
593 | #define REC_QR_X 0, 1 |
594 | #define REC_QR_Y 2, 3 | |
cbf484f8 GN |
595 | #define REC_QR_T 4, 5 |
596 | ||
597 | #define SYN_PQR_DEFINE() {} | |
598 | #define SYN_PQR_D 0, 1, 2, 3 | |
599 | #define SYN_PQR_X 4, 5, 6, 7 | |
ae25d222 | 600 | |
ae25d222 | 601 | #define REC_PQR_STRIDE 1 |
65d71d42 | 602 | #define REC_PQR_DEFINE() MUL2_SETUP() |
ae25d222 GN |
603 | #define REC_PQR_X 0 |
604 | #define REC_PQR_Y 1 | |
605 | #define REC_PQR_Z 2 | |
cbf484f8 GN |
606 | #define REC_PQR_XS 3 |
607 | #define REC_PQR_YS 4 | |
ae25d222 GN |
608 | |
609 | ||
610 | #include <sys/vdev_raidz_impl.h> | |
611 | #include "vdev_raidz_math_impl.h" | |
612 | ||
613 | DEFINE_GEN_METHODS(sse2); | |
614 | DEFINE_REC_METHODS(sse2); | |
615 | ||
616 | static boolean_t | |
617 | raidz_will_sse2_work(void) | |
618 | { | |
e5db3134 | 619 | return (kfpu_allowed() && zfs_sse_available() && zfs_sse2_available()); |
ae25d222 GN |
620 | } |
621 | ||
622 | const raidz_impl_ops_t vdev_raidz_sse2_impl = { | |
623 | .init = NULL, | |
624 | .fini = NULL, | |
625 | .gen = RAIDZ_GEN_METHODS(sse2), | |
626 | .rec = RAIDZ_REC_METHODS(sse2), | |
627 | .is_supported = &raidz_will_sse2_work, | |
628 | .name = "sse2" | |
629 | }; | |
630 | ||
631 | #endif /* defined(__x86_64) && defined(HAVE_SSE2) */ |