]>
Commit | Line | Data |
---|---|---|
db432672 RH |
1 | /* |
2 | * Generic vectorized operation runtime | |
3 | * | |
4 | * Copyright (c) 2018 Linaro | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
fb0343d5 | 9 | * version 2.1 of the License, or (at your option) any later version. |
db432672 RH |
10 | * |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
20 | #include "qemu/osdep.h" | |
21 | #include "qemu/host-utils.h" | |
22 | #include "cpu.h" | |
23 | #include "exec/helper-proto.h" | |
dcb32f1d | 24 | #include "tcg/tcg-gvec-desc.h" |
db432672 RH |
25 | |
26 | ||
db432672 RH |
27 | static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) |
28 | { | |
29 | intptr_t maxsz = simd_maxsz(desc); | |
30 | intptr_t i; | |
31 | ||
32 | if (unlikely(maxsz > oprsz)) { | |
33 | for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { | |
34 | *(uint64_t *)(d + i) = 0; | |
35 | } | |
36 | } | |
37 | } | |
38 | ||
39 | void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) | |
40 | { | |
41 | intptr_t oprsz = simd_oprsz(desc); | |
42 | intptr_t i; | |
43 | ||
6c7ab301 RH |
44 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
45 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); | |
db432672 RH |
46 | } |
47 | clear_high(d, oprsz, desc); | |
48 | } | |
49 | ||
50 | void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) | |
51 | { | |
52 | intptr_t oprsz = simd_oprsz(desc); | |
53 | intptr_t i; | |
54 | ||
6c7ab301 RH |
55 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
56 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); | |
db432672 RH |
57 | } |
58 | clear_high(d, oprsz, desc); | |
59 | } | |
60 | ||
61 | void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) | |
62 | { | |
63 | intptr_t oprsz = simd_oprsz(desc); | |
64 | intptr_t i; | |
65 | ||
6c7ab301 RH |
66 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
67 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i); | |
db432672 RH |
68 | } |
69 | clear_high(d, oprsz, desc); | |
70 | } | |
71 | ||
72 | void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) | |
73 | { | |
74 | intptr_t oprsz = simd_oprsz(desc); | |
75 | intptr_t i; | |
76 | ||
6c7ab301 RH |
77 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
78 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i); | |
db432672 RH |
79 | } |
80 | clear_high(d, oprsz, desc); | |
81 | } | |
82 | ||
22fc3527 RH |
83 | void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) |
84 | { | |
85 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
86 | intptr_t i; |
87 | ||
6c7ab301 | 88 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
0a83e43a | 89 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b; |
22fc3527 RH |
90 | } |
91 | clear_high(d, oprsz, desc); | |
92 | } | |
93 | ||
94 | void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) | |
95 | { | |
96 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
97 | intptr_t i; |
98 | ||
6c7ab301 | 99 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
0a83e43a | 100 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b; |
22fc3527 RH |
101 | } |
102 | clear_high(d, oprsz, desc); | |
103 | } | |
104 | ||
105 | void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) | |
106 | { | |
107 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
108 | intptr_t i; |
109 | ||
6c7ab301 | 110 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
0a83e43a | 111 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b; |
22fc3527 RH |
112 | } |
113 | clear_high(d, oprsz, desc); | |
114 | } | |
115 | ||
116 | void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) | |
117 | { | |
118 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
119 | intptr_t i; |
120 | ||
6c7ab301 | 121 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
0a83e43a | 122 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b; |
22fc3527 RH |
123 | } |
124 | clear_high(d, oprsz, desc); | |
125 | } | |
126 | ||
db432672 RH |
127 | void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) |
128 | { | |
129 | intptr_t oprsz = simd_oprsz(desc); | |
130 | intptr_t i; | |
131 | ||
6c7ab301 RH |
132 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
133 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); | |
db432672 RH |
134 | } |
135 | clear_high(d, oprsz, desc); | |
136 | } | |
137 | ||
138 | void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) | |
139 | { | |
140 | intptr_t oprsz = simd_oprsz(desc); | |
141 | intptr_t i; | |
142 | ||
6c7ab301 RH |
143 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
144 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); | |
db432672 RH |
145 | } |
146 | clear_high(d, oprsz, desc); | |
147 | } | |
148 | ||
149 | void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) | |
150 | { | |
151 | intptr_t oprsz = simd_oprsz(desc); | |
152 | intptr_t i; | |
153 | ||
6c7ab301 RH |
154 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
155 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i); | |
db432672 RH |
156 | } |
157 | clear_high(d, oprsz, desc); | |
158 | } | |
159 | ||
160 | void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) | |
161 | { | |
162 | intptr_t oprsz = simd_oprsz(desc); | |
163 | intptr_t i; | |
164 | ||
6c7ab301 RH |
165 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
166 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i); | |
db432672 RH |
167 | } |
168 | clear_high(d, oprsz, desc); | |
169 | } | |
170 | ||
22fc3527 RH |
171 | void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) |
172 | { | |
173 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
174 | intptr_t i; |
175 | ||
6c7ab301 | 176 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
0a83e43a | 177 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b; |
22fc3527 RH |
178 | } |
179 | clear_high(d, oprsz, desc); | |
180 | } | |
181 | ||
182 | void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) | |
183 | { | |
184 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
185 | intptr_t i; |
186 | ||
6c7ab301 | 187 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
0a83e43a | 188 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b; |
22fc3527 RH |
189 | } |
190 | clear_high(d, oprsz, desc); | |
191 | } | |
192 | ||
193 | void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) | |
194 | { | |
195 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
196 | intptr_t i; |
197 | ||
6c7ab301 | 198 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
0a83e43a | 199 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b; |
22fc3527 RH |
200 | } |
201 | clear_high(d, oprsz, desc); | |
202 | } | |
203 | ||
204 | void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) | |
205 | { | |
206 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
207 | intptr_t i; |
208 | ||
6c7ab301 | 209 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
0a83e43a | 210 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b; |
22fc3527 RH |
211 | } |
212 | clear_high(d, oprsz, desc); | |
213 | } | |
214 | ||
3774030a RH |
215 | void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) |
216 | { | |
217 | intptr_t oprsz = simd_oprsz(desc); | |
218 | intptr_t i; | |
219 | ||
6c7ab301 RH |
220 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
221 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i); | |
3774030a RH |
222 | } |
223 | clear_high(d, oprsz, desc); | |
224 | } | |
225 | ||
226 | void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) | |
227 | { | |
228 | intptr_t oprsz = simd_oprsz(desc); | |
229 | intptr_t i; | |
230 | ||
6c7ab301 RH |
231 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
232 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i); | |
3774030a RH |
233 | } |
234 | clear_high(d, oprsz, desc); | |
235 | } | |
236 | ||
237 | void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) | |
238 | { | |
239 | intptr_t oprsz = simd_oprsz(desc); | |
240 | intptr_t i; | |
241 | ||
6c7ab301 RH |
242 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
243 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i); | |
3774030a RH |
244 | } |
245 | clear_high(d, oprsz, desc); | |
246 | } | |
247 | ||
248 | void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) | |
249 | { | |
250 | intptr_t oprsz = simd_oprsz(desc); | |
251 | intptr_t i; | |
252 | ||
6c7ab301 RH |
253 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
254 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i); | |
3774030a RH |
255 | } |
256 | clear_high(d, oprsz, desc); | |
257 | } | |
258 | ||
22fc3527 RH |
259 | void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) |
260 | { | |
261 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
262 | intptr_t i; |
263 | ||
6c7ab301 | 264 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
0a83e43a | 265 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b; |
22fc3527 RH |
266 | } |
267 | clear_high(d, oprsz, desc); | |
268 | } | |
269 | ||
270 | void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) | |
271 | { | |
272 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
273 | intptr_t i; |
274 | ||
6c7ab301 | 275 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
0a83e43a | 276 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b; |
22fc3527 RH |
277 | } |
278 | clear_high(d, oprsz, desc); | |
279 | } | |
280 | ||
281 | void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) | |
282 | { | |
283 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
284 | intptr_t i; |
285 | ||
6c7ab301 | 286 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
0a83e43a | 287 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b; |
22fc3527 RH |
288 | } |
289 | clear_high(d, oprsz, desc); | |
290 | } | |
291 | ||
292 | void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) | |
293 | { | |
294 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
295 | intptr_t i; |
296 | ||
6c7ab301 | 297 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
0a83e43a | 298 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b; |
22fc3527 RH |
299 | } |
300 | clear_high(d, oprsz, desc); | |
301 | } | |
302 | ||
db432672 RH |
303 | void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) |
304 | { | |
305 | intptr_t oprsz = simd_oprsz(desc); | |
306 | intptr_t i; | |
307 | ||
6c7ab301 RH |
308 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
309 | *(uint8_t *)(d + i) = -*(uint8_t *)(a + i); | |
db432672 RH |
310 | } |
311 | clear_high(d, oprsz, desc); | |
312 | } | |
313 | ||
314 | void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) | |
315 | { | |
316 | intptr_t oprsz = simd_oprsz(desc); | |
317 | intptr_t i; | |
318 | ||
6c7ab301 RH |
319 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
320 | *(uint16_t *)(d + i) = -*(uint16_t *)(a + i); | |
db432672 RH |
321 | } |
322 | clear_high(d, oprsz, desc); | |
323 | } | |
324 | ||
325 | void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) | |
326 | { | |
327 | intptr_t oprsz = simd_oprsz(desc); | |
328 | intptr_t i; | |
329 | ||
6c7ab301 RH |
330 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
331 | *(uint32_t *)(d + i) = -*(uint32_t *)(a + i); | |
db432672 RH |
332 | } |
333 | clear_high(d, oprsz, desc); | |
334 | } | |
335 | ||
336 | void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) | |
337 | { | |
338 | intptr_t oprsz = simd_oprsz(desc); | |
339 | intptr_t i; | |
340 | ||
6c7ab301 RH |
341 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
342 | *(uint64_t *)(d + i) = -*(uint64_t *)(a + i); | |
db432672 RH |
343 | } |
344 | clear_high(d, oprsz, desc); | |
345 | } | |
346 | ||
bcefc902 RH |
347 | void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) |
348 | { | |
349 | intptr_t oprsz = simd_oprsz(desc); | |
350 | intptr_t i; | |
351 | ||
352 | for (i = 0; i < oprsz; i += sizeof(int8_t)) { | |
353 | int8_t aa = *(int8_t *)(a + i); | |
354 | *(int8_t *)(d + i) = aa < 0 ? -aa : aa; | |
355 | } | |
356 | clear_high(d, oprsz, desc); | |
357 | } | |
358 | ||
359 | void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) | |
360 | { | |
361 | intptr_t oprsz = simd_oprsz(desc); | |
362 | intptr_t i; | |
363 | ||
364 | for (i = 0; i < oprsz; i += sizeof(int16_t)) { | |
365 | int16_t aa = *(int16_t *)(a + i); | |
366 | *(int16_t *)(d + i) = aa < 0 ? -aa : aa; | |
367 | } | |
368 | clear_high(d, oprsz, desc); | |
369 | } | |
370 | ||
371 | void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) | |
372 | { | |
373 | intptr_t oprsz = simd_oprsz(desc); | |
374 | intptr_t i; | |
375 | ||
376 | for (i = 0; i < oprsz; i += sizeof(int32_t)) { | |
377 | int32_t aa = *(int32_t *)(a + i); | |
378 | *(int32_t *)(d + i) = aa < 0 ? -aa : aa; | |
379 | } | |
380 | clear_high(d, oprsz, desc); | |
381 | } | |
382 | ||
383 | void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) | |
384 | { | |
385 | intptr_t oprsz = simd_oprsz(desc); | |
386 | intptr_t i; | |
387 | ||
388 | for (i = 0; i < oprsz; i += sizeof(int64_t)) { | |
389 | int64_t aa = *(int64_t *)(a + i); | |
390 | *(int64_t *)(d + i) = aa < 0 ? -aa : aa; | |
391 | } | |
392 | clear_high(d, oprsz, desc); | |
393 | } | |
394 | ||
db432672 RH |
395 | void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) |
396 | { | |
397 | intptr_t oprsz = simd_oprsz(desc); | |
398 | ||
399 | memcpy(d, a, oprsz); | |
400 | clear_high(d, oprsz, desc); | |
401 | } | |
402 | ||
403 | void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) | |
404 | { | |
405 | intptr_t oprsz = simd_oprsz(desc); | |
406 | intptr_t i; | |
407 | ||
408 | if (c == 0) { | |
409 | oprsz = 0; | |
410 | } else { | |
411 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
412 | *(uint64_t *)(d + i) = c; | |
413 | } | |
414 | } | |
415 | clear_high(d, oprsz, desc); | |
416 | } | |
417 | ||
418 | void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) | |
419 | { | |
420 | intptr_t oprsz = simd_oprsz(desc); | |
421 | intptr_t i; | |
422 | ||
423 | if (c == 0) { | |
424 | oprsz = 0; | |
425 | } else { | |
426 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
427 | *(uint32_t *)(d + i) = c; | |
428 | } | |
429 | } | |
430 | clear_high(d, oprsz, desc); | |
431 | } | |
432 | ||
433 | void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) | |
434 | { | |
435 | HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); | |
436 | } | |
437 | ||
438 | void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) | |
439 | { | |
440 | HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); | |
441 | } | |
442 | ||
443 | void HELPER(gvec_not)(void *d, void *a, uint32_t desc) | |
444 | { | |
445 | intptr_t oprsz = simd_oprsz(desc); | |
446 | intptr_t i; | |
447 | ||
6c7ab301 RH |
448 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
449 | *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i); | |
db432672 RH |
450 | } |
451 | clear_high(d, oprsz, desc); | |
452 | } | |
453 | ||
454 | void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) | |
455 | { | |
456 | intptr_t oprsz = simd_oprsz(desc); | |
457 | intptr_t i; | |
458 | ||
6c7ab301 RH |
459 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
460 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i); | |
db432672 RH |
461 | } |
462 | clear_high(d, oprsz, desc); | |
463 | } | |
464 | ||
465 | void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) | |
466 | { | |
467 | intptr_t oprsz = simd_oprsz(desc); | |
468 | intptr_t i; | |
469 | ||
6c7ab301 RH |
470 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
471 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i); | |
db432672 RH |
472 | } |
473 | clear_high(d, oprsz, desc); | |
474 | } | |
475 | ||
476 | void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) | |
477 | { | |
478 | intptr_t oprsz = simd_oprsz(desc); | |
479 | intptr_t i; | |
480 | ||
6c7ab301 RH |
481 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
482 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i); | |
db432672 RH |
483 | } |
484 | clear_high(d, oprsz, desc); | |
485 | } | |
486 | ||
487 | void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) | |
488 | { | |
489 | intptr_t oprsz = simd_oprsz(desc); | |
490 | intptr_t i; | |
491 | ||
6c7ab301 RH |
492 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
493 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i); | |
db432672 RH |
494 | } |
495 | clear_high(d, oprsz, desc); | |
496 | } | |
497 | ||
498 | void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) | |
499 | { | |
500 | intptr_t oprsz = simd_oprsz(desc); | |
501 | intptr_t i; | |
502 | ||
6c7ab301 RH |
503 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
504 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i); | |
db432672 RH |
505 | } |
506 | clear_high(d, oprsz, desc); | |
507 | } | |
d0ec9796 | 508 | |
f550805d RH |
509 | void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) |
510 | { | |
511 | intptr_t oprsz = simd_oprsz(desc); | |
512 | intptr_t i; | |
513 | ||
6c7ab301 RH |
514 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
515 | *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i)); | |
f550805d RH |
516 | } |
517 | clear_high(d, oprsz, desc); | |
518 | } | |
519 | ||
520 | void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) | |
521 | { | |
522 | intptr_t oprsz = simd_oprsz(desc); | |
523 | intptr_t i; | |
524 | ||
6c7ab301 RH |
525 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
526 | *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i)); | |
f550805d RH |
527 | } |
528 | clear_high(d, oprsz, desc); | |
529 | } | |
530 | ||
531 | void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) | |
532 | { | |
533 | intptr_t oprsz = simd_oprsz(desc); | |
534 | intptr_t i; | |
535 | ||
6c7ab301 RH |
536 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
537 | *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i)); | |
f550805d RH |
538 | } |
539 | clear_high(d, oprsz, desc); | |
540 | } | |
541 | ||
22fc3527 RH |
542 | void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) |
543 | { | |
544 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
545 | intptr_t i; |
546 | ||
6c7ab301 | 547 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
0a83e43a | 548 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b; |
22fc3527 RH |
549 | } |
550 | clear_high(d, oprsz, desc); | |
551 | } | |
552 | ||
553 | void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) | |
554 | { | |
555 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
556 | intptr_t i; |
557 | ||
6c7ab301 | 558 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
0a83e43a | 559 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b; |
22fc3527 RH |
560 | } |
561 | clear_high(d, oprsz, desc); | |
562 | } | |
563 | ||
564 | void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) | |
565 | { | |
566 | intptr_t oprsz = simd_oprsz(desc); | |
22fc3527 RH |
567 | intptr_t i; |
568 | ||
6c7ab301 | 569 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
0a83e43a | 570 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b; |
22fc3527 RH |
571 | } |
572 | clear_high(d, oprsz, desc); | |
573 | } | |
574 | ||
d0ec9796 RH |
575 | void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) |
576 | { | |
577 | intptr_t oprsz = simd_oprsz(desc); | |
578 | int shift = simd_data(desc); | |
579 | intptr_t i; | |
580 | ||
6c7ab301 RH |
581 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
582 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift; | |
d0ec9796 RH |
583 | } |
584 | clear_high(d, oprsz, desc); | |
585 | } | |
586 | ||
587 | void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) | |
588 | { | |
589 | intptr_t oprsz = simd_oprsz(desc); | |
590 | int shift = simd_data(desc); | |
591 | intptr_t i; | |
592 | ||
6c7ab301 RH |
593 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
594 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift; | |
d0ec9796 RH |
595 | } |
596 | clear_high(d, oprsz, desc); | |
597 | } | |
598 | ||
599 | void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) | |
600 | { | |
601 | intptr_t oprsz = simd_oprsz(desc); | |
602 | int shift = simd_data(desc); | |
603 | intptr_t i; | |
604 | ||
6c7ab301 RH |
605 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
606 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift; | |
d0ec9796 RH |
607 | } |
608 | clear_high(d, oprsz, desc); | |
609 | } | |
610 | ||
611 | void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) | |
612 | { | |
613 | intptr_t oprsz = simd_oprsz(desc); | |
614 | int shift = simd_data(desc); | |
615 | intptr_t i; | |
616 | ||
6c7ab301 RH |
617 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
618 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift; | |
d0ec9796 RH |
619 | } |
620 | clear_high(d, oprsz, desc); | |
621 | } | |
622 | ||
623 | void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) | |
624 | { | |
625 | intptr_t oprsz = simd_oprsz(desc); | |
626 | int shift = simd_data(desc); | |
627 | intptr_t i; | |
628 | ||
6c7ab301 RH |
629 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
630 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift; | |
d0ec9796 RH |
631 | } |
632 | clear_high(d, oprsz, desc); | |
633 | } | |
634 | ||
635 | void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) | |
636 | { | |
637 | intptr_t oprsz = simd_oprsz(desc); | |
638 | int shift = simd_data(desc); | |
639 | intptr_t i; | |
640 | ||
6c7ab301 RH |
641 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
642 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift; | |
d0ec9796 RH |
643 | } |
644 | clear_high(d, oprsz, desc); | |
645 | } | |
646 | ||
647 | void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) | |
648 | { | |
649 | intptr_t oprsz = simd_oprsz(desc); | |
650 | int shift = simd_data(desc); | |
651 | intptr_t i; | |
652 | ||
6c7ab301 RH |
653 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
654 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift; | |
d0ec9796 RH |
655 | } |
656 | clear_high(d, oprsz, desc); | |
657 | } | |
658 | ||
659 | void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) | |
660 | { | |
661 | intptr_t oprsz = simd_oprsz(desc); | |
662 | int shift = simd_data(desc); | |
663 | intptr_t i; | |
664 | ||
6c7ab301 RH |
665 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
666 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift; | |
d0ec9796 RH |
667 | } |
668 | clear_high(d, oprsz, desc); | |
669 | } | |
670 | ||
671 | void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) | |
672 | { | |
673 | intptr_t oprsz = simd_oprsz(desc); | |
674 | int shift = simd_data(desc); | |
675 | intptr_t i; | |
676 | ||
6c7ab301 RH |
677 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { |
678 | *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift; | |
d0ec9796 RH |
679 | } |
680 | clear_high(d, oprsz, desc); | |
681 | } | |
682 | ||
683 | void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) | |
684 | { | |
685 | intptr_t oprsz = simd_oprsz(desc); | |
686 | int shift = simd_data(desc); | |
687 | intptr_t i; | |
688 | ||
6c7ab301 RH |
689 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { |
690 | *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift; | |
d0ec9796 RH |
691 | } |
692 | clear_high(d, oprsz, desc); | |
693 | } | |
694 | ||
695 | void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) | |
696 | { | |
697 | intptr_t oprsz = simd_oprsz(desc); | |
698 | int shift = simd_data(desc); | |
699 | intptr_t i; | |
700 | ||
6c7ab301 RH |
701 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { |
702 | *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift; | |
d0ec9796 RH |
703 | } |
704 | clear_high(d, oprsz, desc); | |
705 | } | |
706 | ||
707 | void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) | |
708 | { | |
709 | intptr_t oprsz = simd_oprsz(desc); | |
710 | int shift = simd_data(desc); | |
711 | intptr_t i; | |
712 | ||
6c7ab301 RH |
713 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
714 | *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift; | |
d0ec9796 RH |
715 | } |
716 | clear_high(d, oprsz, desc); | |
717 | } | |
212be173 | 718 | |
b0f7e744 RH |
719 | void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc) |
720 | { | |
721 | intptr_t oprsz = simd_oprsz(desc); | |
722 | int shift = simd_data(desc); | |
723 | intptr_t i; | |
724 | ||
725 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
726 | *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift); | |
727 | } | |
728 | clear_high(d, oprsz, desc); | |
729 | } | |
730 | ||
731 | void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc) | |
732 | { | |
733 | intptr_t oprsz = simd_oprsz(desc); | |
734 | int shift = simd_data(desc); | |
735 | intptr_t i; | |
736 | ||
737 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
738 | *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift); | |
739 | } | |
740 | clear_high(d, oprsz, desc); | |
741 | } | |
742 | ||
743 | void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc) | |
744 | { | |
745 | intptr_t oprsz = simd_oprsz(desc); | |
746 | int shift = simd_data(desc); | |
747 | intptr_t i; | |
748 | ||
749 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
750 | *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift); | |
751 | } | |
752 | clear_high(d, oprsz, desc); | |
753 | } | |
754 | ||
755 | void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc) | |
756 | { | |
757 | intptr_t oprsz = simd_oprsz(desc); | |
758 | int shift = simd_data(desc); | |
759 | intptr_t i; | |
760 | ||
761 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
762 | *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift); | |
763 | } | |
764 | clear_high(d, oprsz, desc); | |
765 | } | |
766 | ||
5ee5c14c RH |
767 | void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) |
768 | { | |
769 | intptr_t oprsz = simd_oprsz(desc); | |
770 | intptr_t i; | |
771 | ||
772 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
773 | uint8_t sh = *(uint8_t *)(b + i) & 7; | |
774 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; | |
775 | } | |
776 | clear_high(d, oprsz, desc); | |
777 | } | |
778 | ||
779 | void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) | |
780 | { | |
781 | intptr_t oprsz = simd_oprsz(desc); | |
782 | intptr_t i; | |
783 | ||
784 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
785 | uint8_t sh = *(uint16_t *)(b + i) & 15; | |
786 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; | |
787 | } | |
788 | clear_high(d, oprsz, desc); | |
789 | } | |
790 | ||
791 | void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) | |
792 | { | |
793 | intptr_t oprsz = simd_oprsz(desc); | |
794 | intptr_t i; | |
795 | ||
796 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
797 | uint8_t sh = *(uint32_t *)(b + i) & 31; | |
798 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; | |
799 | } | |
800 | clear_high(d, oprsz, desc); | |
801 | } | |
802 | ||
803 | void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) | |
804 | { | |
805 | intptr_t oprsz = simd_oprsz(desc); | |
806 | intptr_t i; | |
807 | ||
808 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
809 | uint8_t sh = *(uint64_t *)(b + i) & 63; | |
810 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; | |
811 | } | |
812 | clear_high(d, oprsz, desc); | |
813 | } | |
814 | ||
815 | void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) | |
816 | { | |
817 | intptr_t oprsz = simd_oprsz(desc); | |
818 | intptr_t i; | |
819 | ||
820 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
821 | uint8_t sh = *(uint8_t *)(b + i) & 7; | |
822 | *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; | |
823 | } | |
824 | clear_high(d, oprsz, desc); | |
825 | } | |
826 | ||
827 | void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) | |
828 | { | |
829 | intptr_t oprsz = simd_oprsz(desc); | |
830 | intptr_t i; | |
831 | ||
832 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
833 | uint8_t sh = *(uint16_t *)(b + i) & 15; | |
834 | *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; | |
835 | } | |
836 | clear_high(d, oprsz, desc); | |
837 | } | |
838 | ||
839 | void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) | |
840 | { | |
841 | intptr_t oprsz = simd_oprsz(desc); | |
842 | intptr_t i; | |
843 | ||
844 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
845 | uint8_t sh = *(uint32_t *)(b + i) & 31; | |
846 | *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; | |
847 | } | |
848 | clear_high(d, oprsz, desc); | |
849 | } | |
850 | ||
851 | void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) | |
852 | { | |
853 | intptr_t oprsz = simd_oprsz(desc); | |
854 | intptr_t i; | |
855 | ||
856 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
857 | uint8_t sh = *(uint64_t *)(b + i) & 63; | |
858 | *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; | |
859 | } | |
860 | clear_high(d, oprsz, desc); | |
861 | } | |
862 | ||
863 | void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) | |
864 | { | |
865 | intptr_t oprsz = simd_oprsz(desc); | |
866 | intptr_t i; | |
867 | ||
899f08ad | 868 | for (i = 0; i < oprsz; i += sizeof(int8_t)) { |
5ee5c14c RH |
869 | uint8_t sh = *(uint8_t *)(b + i) & 7; |
870 | *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; | |
871 | } | |
872 | clear_high(d, oprsz, desc); | |
873 | } | |
874 | ||
875 | void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) | |
876 | { | |
877 | intptr_t oprsz = simd_oprsz(desc); | |
878 | intptr_t i; | |
879 | ||
880 | for (i = 0; i < oprsz; i += sizeof(int16_t)) { | |
881 | uint8_t sh = *(uint16_t *)(b + i) & 15; | |
882 | *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; | |
883 | } | |
884 | clear_high(d, oprsz, desc); | |
885 | } | |
886 | ||
887 | void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) | |
888 | { | |
889 | intptr_t oprsz = simd_oprsz(desc); | |
890 | intptr_t i; | |
891 | ||
899f08ad | 892 | for (i = 0; i < oprsz; i += sizeof(int32_t)) { |
5ee5c14c RH |
893 | uint8_t sh = *(uint32_t *)(b + i) & 31; |
894 | *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; | |
895 | } | |
896 | clear_high(d, oprsz, desc); | |
897 | } | |
898 | ||
899 | void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) | |
900 | { | |
901 | intptr_t oprsz = simd_oprsz(desc); | |
902 | intptr_t i; | |
903 | ||
899f08ad | 904 | for (i = 0; i < oprsz; i += sizeof(int64_t)) { |
5ee5c14c RH |
905 | uint8_t sh = *(uint64_t *)(b + i) & 63; |
906 | *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; | |
907 | } | |
908 | clear_high(d, oprsz, desc); | |
909 | } | |
910 | ||
5d0ceda9 RH |
911 | void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc) |
912 | { | |
913 | intptr_t oprsz = simd_oprsz(desc); | |
914 | intptr_t i; | |
915 | ||
916 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
917 | uint8_t sh = *(uint8_t *)(b + i) & 7; | |
918 | *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh); | |
919 | } | |
920 | clear_high(d, oprsz, desc); | |
921 | } | |
922 | ||
923 | void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc) | |
924 | { | |
925 | intptr_t oprsz = simd_oprsz(desc); | |
926 | intptr_t i; | |
927 | ||
928 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
929 | uint8_t sh = *(uint16_t *)(b + i) & 15; | |
930 | *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh); | |
931 | } | |
932 | clear_high(d, oprsz, desc); | |
933 | } | |
934 | ||
935 | void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc) | |
936 | { | |
937 | intptr_t oprsz = simd_oprsz(desc); | |
938 | intptr_t i; | |
939 | ||
940 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
941 | uint8_t sh = *(uint32_t *)(b + i) & 31; | |
942 | *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh); | |
943 | } | |
944 | clear_high(d, oprsz, desc); | |
945 | } | |
946 | ||
947 | void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc) | |
948 | { | |
949 | intptr_t oprsz = simd_oprsz(desc); | |
950 | intptr_t i; | |
951 | ||
952 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
953 | uint8_t sh = *(uint64_t *)(b + i) & 63; | |
954 | *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh); | |
955 | } | |
956 | clear_high(d, oprsz, desc); | |
957 | } | |
958 | ||
959 | void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc) | |
960 | { | |
961 | intptr_t oprsz = simd_oprsz(desc); | |
962 | intptr_t i; | |
963 | ||
964 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
965 | uint8_t sh = *(uint8_t *)(b + i) & 7; | |
966 | *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh); | |
967 | } | |
968 | clear_high(d, oprsz, desc); | |
969 | } | |
970 | ||
971 | void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc) | |
972 | { | |
973 | intptr_t oprsz = simd_oprsz(desc); | |
974 | intptr_t i; | |
975 | ||
976 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
977 | uint8_t sh = *(uint16_t *)(b + i) & 15; | |
978 | *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh); | |
979 | } | |
980 | clear_high(d, oprsz, desc); | |
981 | } | |
982 | ||
983 | void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc) | |
984 | { | |
985 | intptr_t oprsz = simd_oprsz(desc); | |
986 | intptr_t i; | |
987 | ||
988 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
989 | uint8_t sh = *(uint32_t *)(b + i) & 31; | |
990 | *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh); | |
991 | } | |
992 | clear_high(d, oprsz, desc); | |
993 | } | |
994 | ||
995 | void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc) | |
996 | { | |
997 | intptr_t oprsz = simd_oprsz(desc); | |
998 | intptr_t i; | |
999 | ||
1000 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
1001 | uint8_t sh = *(uint64_t *)(b + i) & 63; | |
1002 | *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh); | |
1003 | } | |
1004 | clear_high(d, oprsz, desc); | |
1005 | } | |
1006 | ||
212be173 RH |
1007 | #define DO_CMP1(NAME, TYPE, OP) \ |
1008 | void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ | |
1009 | { \ | |
1010 | intptr_t oprsz = simd_oprsz(desc); \ | |
1011 | intptr_t i; \ | |
6cb1d3b8 | 1012 | for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ |
0270bd50 | 1013 | *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ |
212be173 RH |
1014 | } \ |
1015 | clear_high(d, oprsz, desc); \ | |
1016 | } | |
1017 | ||
1018 | #define DO_CMP2(SZ) \ | |
6c7ab301 RH |
1019 | DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \ |
1020 | DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \ | |
1021 | DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \ | |
1022 | DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \ | |
1023 | DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \ | |
1024 | DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=) | |
212be173 RH |
1025 | |
1026 | DO_CMP2(8) | |
1027 | DO_CMP2(16) | |
1028 | DO_CMP2(32) | |
1029 | DO_CMP2(64) | |
1030 | ||
212be173 RH |
1031 | #undef DO_CMP1 |
1032 | #undef DO_CMP2 | |
f49b12c6 RH |
1033 | |
1034 | void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) | |
1035 | { | |
1036 | intptr_t oprsz = simd_oprsz(desc); | |
1037 | intptr_t i; | |
1038 | ||
1039 | for (i = 0; i < oprsz; i += sizeof(int8_t)) { | |
1040 | int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); | |
1041 | if (r > INT8_MAX) { | |
1042 | r = INT8_MAX; | |
1043 | } else if (r < INT8_MIN) { | |
1044 | r = INT8_MIN; | |
1045 | } | |
1046 | *(int8_t *)(d + i) = r; | |
1047 | } | |
1048 | clear_high(d, oprsz, desc); | |
1049 | } | |
1050 | ||
1051 | void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) | |
1052 | { | |
1053 | intptr_t oprsz = simd_oprsz(desc); | |
1054 | intptr_t i; | |
1055 | ||
1056 | for (i = 0; i < oprsz; i += sizeof(int16_t)) { | |
1057 | int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); | |
1058 | if (r > INT16_MAX) { | |
1059 | r = INT16_MAX; | |
1060 | } else if (r < INT16_MIN) { | |
1061 | r = INT16_MIN; | |
1062 | } | |
1063 | *(int16_t *)(d + i) = r; | |
1064 | } | |
1065 | clear_high(d, oprsz, desc); | |
1066 | } | |
1067 | ||
1068 | void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) | |
1069 | { | |
1070 | intptr_t oprsz = simd_oprsz(desc); | |
1071 | intptr_t i; | |
1072 | ||
1073 | for (i = 0; i < oprsz; i += sizeof(int32_t)) { | |
1074 | int32_t ai = *(int32_t *)(a + i); | |
1075 | int32_t bi = *(int32_t *)(b + i); | |
7702a855 RH |
1076 | int32_t di; |
1077 | if (sadd32_overflow(ai, bi, &di)) { | |
f49b12c6 RH |
1078 | di = (di < 0 ? INT32_MAX : INT32_MIN); |
1079 | } | |
1080 | *(int32_t *)(d + i) = di; | |
1081 | } | |
1082 | clear_high(d, oprsz, desc); | |
1083 | } | |
1084 | ||
1085 | void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) | |
1086 | { | |
1087 | intptr_t oprsz = simd_oprsz(desc); | |
1088 | intptr_t i; | |
1089 | ||
1090 | for (i = 0; i < oprsz; i += sizeof(int64_t)) { | |
1091 | int64_t ai = *(int64_t *)(a + i); | |
1092 | int64_t bi = *(int64_t *)(b + i); | |
7702a855 RH |
1093 | int64_t di; |
1094 | if (sadd64_overflow(ai, bi, &di)) { | |
f49b12c6 RH |
1095 | di = (di < 0 ? INT64_MAX : INT64_MIN); |
1096 | } | |
1097 | *(int64_t *)(d + i) = di; | |
1098 | } | |
1099 | clear_high(d, oprsz, desc); | |
1100 | } | |
1101 | ||
1102 | void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) | |
1103 | { | |
1104 | intptr_t oprsz = simd_oprsz(desc); | |
1105 | intptr_t i; | |
1106 | ||
1107 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
1108 | int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); | |
1109 | if (r > INT8_MAX) { | |
1110 | r = INT8_MAX; | |
1111 | } else if (r < INT8_MIN) { | |
1112 | r = INT8_MIN; | |
1113 | } | |
1114 | *(uint8_t *)(d + i) = r; | |
1115 | } | |
1116 | clear_high(d, oprsz, desc); | |
1117 | } | |
1118 | ||
1119 | void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) | |
1120 | { | |
1121 | intptr_t oprsz = simd_oprsz(desc); | |
1122 | intptr_t i; | |
1123 | ||
1124 | for (i = 0; i < oprsz; i += sizeof(int16_t)) { | |
1125 | int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); | |
1126 | if (r > INT16_MAX) { | |
1127 | r = INT16_MAX; | |
1128 | } else if (r < INT16_MIN) { | |
1129 | r = INT16_MIN; | |
1130 | } | |
1131 | *(int16_t *)(d + i) = r; | |
1132 | } | |
1133 | clear_high(d, oprsz, desc); | |
1134 | } | |
1135 | ||
1136 | void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) | |
1137 | { | |
1138 | intptr_t oprsz = simd_oprsz(desc); | |
1139 | intptr_t i; | |
1140 | ||
1141 | for (i = 0; i < oprsz; i += sizeof(int32_t)) { | |
1142 | int32_t ai = *(int32_t *)(a + i); | |
1143 | int32_t bi = *(int32_t *)(b + i); | |
7702a855 RH |
1144 | int32_t di; |
1145 | if (ssub32_overflow(ai, bi, &di)) { | |
f49b12c6 RH |
1146 | di = (di < 0 ? INT32_MAX : INT32_MIN); |
1147 | } | |
1148 | *(int32_t *)(d + i) = di; | |
1149 | } | |
1150 | clear_high(d, oprsz, desc); | |
1151 | } | |
1152 | ||
1153 | void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) | |
1154 | { | |
1155 | intptr_t oprsz = simd_oprsz(desc); | |
1156 | intptr_t i; | |
1157 | ||
1158 | for (i = 0; i < oprsz; i += sizeof(int64_t)) { | |
1159 | int64_t ai = *(int64_t *)(a + i); | |
1160 | int64_t bi = *(int64_t *)(b + i); | |
7702a855 RH |
1161 | int64_t di; |
1162 | if (ssub64_overflow(ai, bi, &di)) { | |
f49b12c6 RH |
1163 | di = (di < 0 ? INT64_MAX : INT64_MIN); |
1164 | } | |
1165 | *(int64_t *)(d + i) = di; | |
1166 | } | |
1167 | clear_high(d, oprsz, desc); | |
1168 | } | |
1169 | ||
1170 | void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) | |
1171 | { | |
1172 | intptr_t oprsz = simd_oprsz(desc); | |
1173 | intptr_t i; | |
1174 | ||
1175 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
1176 | unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); | |
1177 | if (r > UINT8_MAX) { | |
1178 | r = UINT8_MAX; | |
1179 | } | |
1180 | *(uint8_t *)(d + i) = r; | |
1181 | } | |
1182 | clear_high(d, oprsz, desc); | |
1183 | } | |
1184 | ||
1185 | void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) | |
1186 | { | |
1187 | intptr_t oprsz = simd_oprsz(desc); | |
1188 | intptr_t i; | |
1189 | ||
1190 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
1191 | unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); | |
1192 | if (r > UINT16_MAX) { | |
1193 | r = UINT16_MAX; | |
1194 | } | |
1195 | *(uint16_t *)(d + i) = r; | |
1196 | } | |
1197 | clear_high(d, oprsz, desc); | |
1198 | } | |
1199 | ||
1200 | void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) | |
1201 | { | |
1202 | intptr_t oprsz = simd_oprsz(desc); | |
1203 | intptr_t i; | |
1204 | ||
1205 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
1206 | uint32_t ai = *(uint32_t *)(a + i); | |
1207 | uint32_t bi = *(uint32_t *)(b + i); | |
7702a855 RH |
1208 | uint32_t di; |
1209 | if (uadd32_overflow(ai, bi, &di)) { | |
f49b12c6 RH |
1210 | di = UINT32_MAX; |
1211 | } | |
1212 | *(uint32_t *)(d + i) = di; | |
1213 | } | |
1214 | clear_high(d, oprsz, desc); | |
1215 | } | |
1216 | ||
1217 | void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) | |
1218 | { | |
1219 | intptr_t oprsz = simd_oprsz(desc); | |
1220 | intptr_t i; | |
1221 | ||
1222 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
1223 | uint64_t ai = *(uint64_t *)(a + i); | |
1224 | uint64_t bi = *(uint64_t *)(b + i); | |
7702a855 RH |
1225 | uint64_t di; |
1226 | if (uadd64_overflow(ai, bi, &di)) { | |
f49b12c6 RH |
1227 | di = UINT64_MAX; |
1228 | } | |
1229 | *(uint64_t *)(d + i) = di; | |
1230 | } | |
1231 | clear_high(d, oprsz, desc); | |
1232 | } | |
1233 | ||
1234 | void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) | |
1235 | { | |
1236 | intptr_t oprsz = simd_oprsz(desc); | |
1237 | intptr_t i; | |
1238 | ||
1239 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
1240 | int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); | |
1241 | if (r < 0) { | |
1242 | r = 0; | |
1243 | } | |
1244 | *(uint8_t *)(d + i) = r; | |
1245 | } | |
1246 | clear_high(d, oprsz, desc); | |
1247 | } | |
1248 | ||
1249 | void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) | |
1250 | { | |
1251 | intptr_t oprsz = simd_oprsz(desc); | |
1252 | intptr_t i; | |
1253 | ||
1254 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
1255 | int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); | |
1256 | if (r < 0) { | |
1257 | r = 0; | |
1258 | } | |
1259 | *(uint16_t *)(d + i) = r; | |
1260 | } | |
1261 | clear_high(d, oprsz, desc); | |
1262 | } | |
1263 | ||
1264 | void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) | |
1265 | { | |
1266 | intptr_t oprsz = simd_oprsz(desc); | |
1267 | intptr_t i; | |
1268 | ||
1269 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
1270 | uint32_t ai = *(uint32_t *)(a + i); | |
1271 | uint32_t bi = *(uint32_t *)(b + i); | |
7702a855 RH |
1272 | uint32_t di; |
1273 | if (usub32_overflow(ai, bi, &di)) { | |
f49b12c6 RH |
1274 | di = 0; |
1275 | } | |
1276 | *(uint32_t *)(d + i) = di; | |
1277 | } | |
1278 | clear_high(d, oprsz, desc); | |
1279 | } | |
1280 | ||
1281 | void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) | |
1282 | { | |
1283 | intptr_t oprsz = simd_oprsz(desc); | |
1284 | intptr_t i; | |
1285 | ||
1286 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
1287 | uint64_t ai = *(uint64_t *)(a + i); | |
1288 | uint64_t bi = *(uint64_t *)(b + i); | |
7702a855 RH |
1289 | uint64_t di; |
1290 | if (usub64_overflow(ai, bi, &di)) { | |
f49b12c6 RH |
1291 | di = 0; |
1292 | } | |
1293 | *(uint64_t *)(d + i) = di; | |
1294 | } | |
1295 | clear_high(d, oprsz, desc); | |
1296 | } | |
dd0a0fcd RH |
1297 | |
1298 | void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) | |
1299 | { | |
1300 | intptr_t oprsz = simd_oprsz(desc); | |
1301 | intptr_t i; | |
1302 | ||
1303 | for (i = 0; i < oprsz; i += sizeof(int8_t)) { | |
1304 | int8_t aa = *(int8_t *)(a + i); | |
1305 | int8_t bb = *(int8_t *)(b + i); | |
1306 | int8_t dd = aa < bb ? aa : bb; | |
1307 | *(int8_t *)(d + i) = dd; | |
1308 | } | |
1309 | clear_high(d, oprsz, desc); | |
1310 | } | |
1311 | ||
1312 | void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) | |
1313 | { | |
1314 | intptr_t oprsz = simd_oprsz(desc); | |
1315 | intptr_t i; | |
1316 | ||
1317 | for (i = 0; i < oprsz; i += sizeof(int16_t)) { | |
1318 | int16_t aa = *(int16_t *)(a + i); | |
1319 | int16_t bb = *(int16_t *)(b + i); | |
1320 | int16_t dd = aa < bb ? aa : bb; | |
1321 | *(int16_t *)(d + i) = dd; | |
1322 | } | |
1323 | clear_high(d, oprsz, desc); | |
1324 | } | |
1325 | ||
1326 | void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) | |
1327 | { | |
1328 | intptr_t oprsz = simd_oprsz(desc); | |
1329 | intptr_t i; | |
1330 | ||
1331 | for (i = 0; i < oprsz; i += sizeof(int32_t)) { | |
1332 | int32_t aa = *(int32_t *)(a + i); | |
1333 | int32_t bb = *(int32_t *)(b + i); | |
1334 | int32_t dd = aa < bb ? aa : bb; | |
1335 | *(int32_t *)(d + i) = dd; | |
1336 | } | |
1337 | clear_high(d, oprsz, desc); | |
1338 | } | |
1339 | ||
1340 | void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) | |
1341 | { | |
1342 | intptr_t oprsz = simd_oprsz(desc); | |
1343 | intptr_t i; | |
1344 | ||
1345 | for (i = 0; i < oprsz; i += sizeof(int64_t)) { | |
1346 | int64_t aa = *(int64_t *)(a + i); | |
1347 | int64_t bb = *(int64_t *)(b + i); | |
1348 | int64_t dd = aa < bb ? aa : bb; | |
1349 | *(int64_t *)(d + i) = dd; | |
1350 | } | |
1351 | clear_high(d, oprsz, desc); | |
1352 | } | |
1353 | ||
1354 | void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) | |
1355 | { | |
1356 | intptr_t oprsz = simd_oprsz(desc); | |
1357 | intptr_t i; | |
1358 | ||
1359 | for (i = 0; i < oprsz; i += sizeof(int8_t)) { | |
1360 | int8_t aa = *(int8_t *)(a + i); | |
1361 | int8_t bb = *(int8_t *)(b + i); | |
1362 | int8_t dd = aa > bb ? aa : bb; | |
1363 | *(int8_t *)(d + i) = dd; | |
1364 | } | |
1365 | clear_high(d, oprsz, desc); | |
1366 | } | |
1367 | ||
1368 | void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) | |
1369 | { | |
1370 | intptr_t oprsz = simd_oprsz(desc); | |
1371 | intptr_t i; | |
1372 | ||
1373 | for (i = 0; i < oprsz; i += sizeof(int16_t)) { | |
1374 | int16_t aa = *(int16_t *)(a + i); | |
1375 | int16_t bb = *(int16_t *)(b + i); | |
1376 | int16_t dd = aa > bb ? aa : bb; | |
1377 | *(int16_t *)(d + i) = dd; | |
1378 | } | |
1379 | clear_high(d, oprsz, desc); | |
1380 | } | |
1381 | ||
1382 | void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) | |
1383 | { | |
1384 | intptr_t oprsz = simd_oprsz(desc); | |
1385 | intptr_t i; | |
1386 | ||
1387 | for (i = 0; i < oprsz; i += sizeof(int32_t)) { | |
1388 | int32_t aa = *(int32_t *)(a + i); | |
1389 | int32_t bb = *(int32_t *)(b + i); | |
1390 | int32_t dd = aa > bb ? aa : bb; | |
1391 | *(int32_t *)(d + i) = dd; | |
1392 | } | |
1393 | clear_high(d, oprsz, desc); | |
1394 | } | |
1395 | ||
1396 | void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) | |
1397 | { | |
1398 | intptr_t oprsz = simd_oprsz(desc); | |
1399 | intptr_t i; | |
1400 | ||
1401 | for (i = 0; i < oprsz; i += sizeof(int64_t)) { | |
1402 | int64_t aa = *(int64_t *)(a + i); | |
1403 | int64_t bb = *(int64_t *)(b + i); | |
1404 | int64_t dd = aa > bb ? aa : bb; | |
1405 | *(int64_t *)(d + i) = dd; | |
1406 | } | |
1407 | clear_high(d, oprsz, desc); | |
1408 | } | |
1409 | ||
1410 | void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) | |
1411 | { | |
1412 | intptr_t oprsz = simd_oprsz(desc); | |
1413 | intptr_t i; | |
1414 | ||
1415 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
1416 | uint8_t aa = *(uint8_t *)(a + i); | |
1417 | uint8_t bb = *(uint8_t *)(b + i); | |
1418 | uint8_t dd = aa < bb ? aa : bb; | |
1419 | *(uint8_t *)(d + i) = dd; | |
1420 | } | |
1421 | clear_high(d, oprsz, desc); | |
1422 | } | |
1423 | ||
1424 | void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) | |
1425 | { | |
1426 | intptr_t oprsz = simd_oprsz(desc); | |
1427 | intptr_t i; | |
1428 | ||
1429 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
1430 | uint16_t aa = *(uint16_t *)(a + i); | |
1431 | uint16_t bb = *(uint16_t *)(b + i); | |
1432 | uint16_t dd = aa < bb ? aa : bb; | |
1433 | *(uint16_t *)(d + i) = dd; | |
1434 | } | |
1435 | clear_high(d, oprsz, desc); | |
1436 | } | |
1437 | ||
1438 | void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) | |
1439 | { | |
1440 | intptr_t oprsz = simd_oprsz(desc); | |
1441 | intptr_t i; | |
1442 | ||
1443 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
1444 | uint32_t aa = *(uint32_t *)(a + i); | |
1445 | uint32_t bb = *(uint32_t *)(b + i); | |
1446 | uint32_t dd = aa < bb ? aa : bb; | |
1447 | *(uint32_t *)(d + i) = dd; | |
1448 | } | |
1449 | clear_high(d, oprsz, desc); | |
1450 | } | |
1451 | ||
1452 | void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) | |
1453 | { | |
1454 | intptr_t oprsz = simd_oprsz(desc); | |
1455 | intptr_t i; | |
1456 | ||
1457 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
1458 | uint64_t aa = *(uint64_t *)(a + i); | |
1459 | uint64_t bb = *(uint64_t *)(b + i); | |
1460 | uint64_t dd = aa < bb ? aa : bb; | |
1461 | *(uint64_t *)(d + i) = dd; | |
1462 | } | |
1463 | clear_high(d, oprsz, desc); | |
1464 | } | |
1465 | ||
1466 | void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) | |
1467 | { | |
1468 | intptr_t oprsz = simd_oprsz(desc); | |
1469 | intptr_t i; | |
1470 | ||
1471 | for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | |
1472 | uint8_t aa = *(uint8_t *)(a + i); | |
1473 | uint8_t bb = *(uint8_t *)(b + i); | |
1474 | uint8_t dd = aa > bb ? aa : bb; | |
1475 | *(uint8_t *)(d + i) = dd; | |
1476 | } | |
1477 | clear_high(d, oprsz, desc); | |
1478 | } | |
1479 | ||
1480 | void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) | |
1481 | { | |
1482 | intptr_t oprsz = simd_oprsz(desc); | |
1483 | intptr_t i; | |
1484 | ||
1485 | for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | |
1486 | uint16_t aa = *(uint16_t *)(a + i); | |
1487 | uint16_t bb = *(uint16_t *)(b + i); | |
1488 | uint16_t dd = aa > bb ? aa : bb; | |
1489 | *(uint16_t *)(d + i) = dd; | |
1490 | } | |
1491 | clear_high(d, oprsz, desc); | |
1492 | } | |
1493 | ||
1494 | void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) | |
1495 | { | |
1496 | intptr_t oprsz = simd_oprsz(desc); | |
1497 | intptr_t i; | |
1498 | ||
1499 | for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | |
1500 | uint32_t aa = *(uint32_t *)(a + i); | |
1501 | uint32_t bb = *(uint32_t *)(b + i); | |
1502 | uint32_t dd = aa > bb ? aa : bb; | |
1503 | *(uint32_t *)(d + i) = dd; | |
1504 | } | |
1505 | clear_high(d, oprsz, desc); | |
1506 | } | |
1507 | ||
1508 | void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) | |
1509 | { | |
1510 | intptr_t oprsz = simd_oprsz(desc); | |
1511 | intptr_t i; | |
1512 | ||
1513 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | |
1514 | uint64_t aa = *(uint64_t *)(a + i); | |
1515 | uint64_t bb = *(uint64_t *)(b + i); | |
1516 | uint64_t dd = aa > bb ? aa : bb; | |
1517 | *(uint64_t *)(d + i) = dd; | |
1518 | } | |
1519 | clear_high(d, oprsz, desc); | |
1520 | } | |
38dc1294 RH |
1521 | |
1522 | void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) | |
1523 | { | |
1524 | intptr_t oprsz = simd_oprsz(desc); | |
1525 | intptr_t i; | |
1526 | ||
6c7ab301 RH |
1527 | for (i = 0; i < oprsz; i += sizeof(uint64_t)) { |
1528 | uint64_t aa = *(uint64_t *)(a + i); | |
1529 | uint64_t bb = *(uint64_t *)(b + i); | |
1530 | uint64_t cc = *(uint64_t *)(c + i); | |
1531 | *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa); | |
38dc1294 RH |
1532 | } |
1533 | clear_high(d, oprsz, desc); | |
1534 | } |