]>
git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/tcg-runtime-gvec.c
2 * Generic vectorized operation runtime
4 * Copyright (c) 2018 Linaro
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
23 #include "exec/helper-proto-common.h"
24 #include "tcg/tcg-gvec-desc.h"
27 static inline void clear_high(void *d
, intptr_t oprsz
, uint32_t desc
)
29 intptr_t maxsz
= simd_maxsz(desc
);
32 if (unlikely(maxsz
> oprsz
)) {
33 for (i
= oprsz
; i
< maxsz
; i
+= sizeof(uint64_t)) {
34 *(uint64_t *)(d
+ i
) = 0;
39 void HELPER(gvec_add8
)(void *d
, void *a
, void *b
, uint32_t desc
)
41 intptr_t oprsz
= simd_oprsz(desc
);
44 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
45 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) + *(uint8_t *)(b
+ i
);
47 clear_high(d
, oprsz
, desc
);
50 void HELPER(gvec_add16
)(void *d
, void *a
, void *b
, uint32_t desc
)
52 intptr_t oprsz
= simd_oprsz(desc
);
55 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
56 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) + *(uint16_t *)(b
+ i
);
58 clear_high(d
, oprsz
, desc
);
61 void HELPER(gvec_add32
)(void *d
, void *a
, void *b
, uint32_t desc
)
63 intptr_t oprsz
= simd_oprsz(desc
);
66 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
67 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) + *(uint32_t *)(b
+ i
);
69 clear_high(d
, oprsz
, desc
);
72 void HELPER(gvec_add64
)(void *d
, void *a
, void *b
, uint32_t desc
)
74 intptr_t oprsz
= simd_oprsz(desc
);
77 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
78 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) + *(uint64_t *)(b
+ i
);
80 clear_high(d
, oprsz
, desc
);
83 void HELPER(gvec_adds8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
85 intptr_t oprsz
= simd_oprsz(desc
);
88 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
89 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) + (uint8_t)b
;
91 clear_high(d
, oprsz
, desc
);
94 void HELPER(gvec_adds16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
96 intptr_t oprsz
= simd_oprsz(desc
);
99 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
100 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) + (uint16_t)b
;
102 clear_high(d
, oprsz
, desc
);
105 void HELPER(gvec_adds32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
107 intptr_t oprsz
= simd_oprsz(desc
);
110 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
111 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) + (uint32_t)b
;
113 clear_high(d
, oprsz
, desc
);
116 void HELPER(gvec_adds64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
118 intptr_t oprsz
= simd_oprsz(desc
);
121 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
122 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) + b
;
124 clear_high(d
, oprsz
, desc
);
127 void HELPER(gvec_sub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
129 intptr_t oprsz
= simd_oprsz(desc
);
132 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
133 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) - *(uint8_t *)(b
+ i
);
135 clear_high(d
, oprsz
, desc
);
138 void HELPER(gvec_sub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
140 intptr_t oprsz
= simd_oprsz(desc
);
143 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
144 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) - *(uint16_t *)(b
+ i
);
146 clear_high(d
, oprsz
, desc
);
149 void HELPER(gvec_sub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
151 intptr_t oprsz
= simd_oprsz(desc
);
154 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
155 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) - *(uint32_t *)(b
+ i
);
157 clear_high(d
, oprsz
, desc
);
160 void HELPER(gvec_sub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
162 intptr_t oprsz
= simd_oprsz(desc
);
165 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
166 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) - *(uint64_t *)(b
+ i
);
168 clear_high(d
, oprsz
, desc
);
171 void HELPER(gvec_subs8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
173 intptr_t oprsz
= simd_oprsz(desc
);
176 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
177 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) - (uint8_t)b
;
179 clear_high(d
, oprsz
, desc
);
182 void HELPER(gvec_subs16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
184 intptr_t oprsz
= simd_oprsz(desc
);
187 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
188 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) - (uint16_t)b
;
190 clear_high(d
, oprsz
, desc
);
193 void HELPER(gvec_subs32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
195 intptr_t oprsz
= simd_oprsz(desc
);
198 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
199 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) - (uint32_t)b
;
201 clear_high(d
, oprsz
, desc
);
204 void HELPER(gvec_subs64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
206 intptr_t oprsz
= simd_oprsz(desc
);
209 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
210 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) - b
;
212 clear_high(d
, oprsz
, desc
);
215 void HELPER(gvec_mul8
)(void *d
, void *a
, void *b
, uint32_t desc
)
217 intptr_t oprsz
= simd_oprsz(desc
);
220 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
221 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) * *(uint8_t *)(b
+ i
);
223 clear_high(d
, oprsz
, desc
);
226 void HELPER(gvec_mul16
)(void *d
, void *a
, void *b
, uint32_t desc
)
228 intptr_t oprsz
= simd_oprsz(desc
);
231 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
232 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) * *(uint16_t *)(b
+ i
);
234 clear_high(d
, oprsz
, desc
);
237 void HELPER(gvec_mul32
)(void *d
, void *a
, void *b
, uint32_t desc
)
239 intptr_t oprsz
= simd_oprsz(desc
);
242 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
243 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) * *(uint32_t *)(b
+ i
);
245 clear_high(d
, oprsz
, desc
);
248 void HELPER(gvec_mul64
)(void *d
, void *a
, void *b
, uint32_t desc
)
250 intptr_t oprsz
= simd_oprsz(desc
);
253 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
254 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) * *(uint64_t *)(b
+ i
);
256 clear_high(d
, oprsz
, desc
);
259 void HELPER(gvec_muls8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
261 intptr_t oprsz
= simd_oprsz(desc
);
264 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
265 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) * (uint8_t)b
;
267 clear_high(d
, oprsz
, desc
);
270 void HELPER(gvec_muls16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
272 intptr_t oprsz
= simd_oprsz(desc
);
275 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
276 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) * (uint16_t)b
;
278 clear_high(d
, oprsz
, desc
);
281 void HELPER(gvec_muls32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
283 intptr_t oprsz
= simd_oprsz(desc
);
286 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
287 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) * (uint32_t)b
;
289 clear_high(d
, oprsz
, desc
);
292 void HELPER(gvec_muls64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
294 intptr_t oprsz
= simd_oprsz(desc
);
297 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
298 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) * b
;
300 clear_high(d
, oprsz
, desc
);
303 void HELPER(gvec_neg8
)(void *d
, void *a
, uint32_t desc
)
305 intptr_t oprsz
= simd_oprsz(desc
);
308 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
309 *(uint8_t *)(d
+ i
) = -*(uint8_t *)(a
+ i
);
311 clear_high(d
, oprsz
, desc
);
314 void HELPER(gvec_neg16
)(void *d
, void *a
, uint32_t desc
)
316 intptr_t oprsz
= simd_oprsz(desc
);
319 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
320 *(uint16_t *)(d
+ i
) = -*(uint16_t *)(a
+ i
);
322 clear_high(d
, oprsz
, desc
);
325 void HELPER(gvec_neg32
)(void *d
, void *a
, uint32_t desc
)
327 intptr_t oprsz
= simd_oprsz(desc
);
330 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
331 *(uint32_t *)(d
+ i
) = -*(uint32_t *)(a
+ i
);
333 clear_high(d
, oprsz
, desc
);
336 void HELPER(gvec_neg64
)(void *d
, void *a
, uint32_t desc
)
338 intptr_t oprsz
= simd_oprsz(desc
);
341 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
342 *(uint64_t *)(d
+ i
) = -*(uint64_t *)(a
+ i
);
344 clear_high(d
, oprsz
, desc
);
347 void HELPER(gvec_abs8
)(void *d
, void *a
, uint32_t desc
)
349 intptr_t oprsz
= simd_oprsz(desc
);
352 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
353 int8_t aa
= *(int8_t *)(a
+ i
);
354 *(int8_t *)(d
+ i
) = aa
< 0 ? -aa
: aa
;
356 clear_high(d
, oprsz
, desc
);
359 void HELPER(gvec_abs16
)(void *d
, void *a
, uint32_t desc
)
361 intptr_t oprsz
= simd_oprsz(desc
);
364 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
365 int16_t aa
= *(int16_t *)(a
+ i
);
366 *(int16_t *)(d
+ i
) = aa
< 0 ? -aa
: aa
;
368 clear_high(d
, oprsz
, desc
);
371 void HELPER(gvec_abs32
)(void *d
, void *a
, uint32_t desc
)
373 intptr_t oprsz
= simd_oprsz(desc
);
376 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
377 int32_t aa
= *(int32_t *)(a
+ i
);
378 *(int32_t *)(d
+ i
) = aa
< 0 ? -aa
: aa
;
380 clear_high(d
, oprsz
, desc
);
383 void HELPER(gvec_abs64
)(void *d
, void *a
, uint32_t desc
)
385 intptr_t oprsz
= simd_oprsz(desc
);
388 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
389 int64_t aa
= *(int64_t *)(a
+ i
);
390 *(int64_t *)(d
+ i
) = aa
< 0 ? -aa
: aa
;
392 clear_high(d
, oprsz
, desc
);
395 void HELPER(gvec_mov
)(void *d
, void *a
, uint32_t desc
)
397 intptr_t oprsz
= simd_oprsz(desc
);
400 clear_high(d
, oprsz
, desc
);
403 void HELPER(gvec_dup64
)(void *d
, uint32_t desc
, uint64_t c
)
405 intptr_t oprsz
= simd_oprsz(desc
);
411 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
412 *(uint64_t *)(d
+ i
) = c
;
415 clear_high(d
, oprsz
, desc
);
418 void HELPER(gvec_dup32
)(void *d
, uint32_t desc
, uint32_t c
)
420 intptr_t oprsz
= simd_oprsz(desc
);
426 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
427 *(uint32_t *)(d
+ i
) = c
;
430 clear_high(d
, oprsz
, desc
);
433 void HELPER(gvec_dup16
)(void *d
, uint32_t desc
, uint32_t c
)
435 HELPER(gvec_dup32
)(d
, desc
, 0x00010001 * (c
& 0xffff));
438 void HELPER(gvec_dup8
)(void *d
, uint32_t desc
, uint32_t c
)
440 HELPER(gvec_dup32
)(d
, desc
, 0x01010101 * (c
& 0xff));
443 void HELPER(gvec_not
)(void *d
, void *a
, uint32_t desc
)
445 intptr_t oprsz
= simd_oprsz(desc
);
448 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
449 *(uint64_t *)(d
+ i
) = ~*(uint64_t *)(a
+ i
);
451 clear_high(d
, oprsz
, desc
);
454 void HELPER(gvec_and
)(void *d
, void *a
, void *b
, uint32_t desc
)
456 intptr_t oprsz
= simd_oprsz(desc
);
459 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
460 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) & *(uint64_t *)(b
+ i
);
462 clear_high(d
, oprsz
, desc
);
465 void HELPER(gvec_or
)(void *d
, void *a
, void *b
, uint32_t desc
)
467 intptr_t oprsz
= simd_oprsz(desc
);
470 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
471 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) | *(uint64_t *)(b
+ i
);
473 clear_high(d
, oprsz
, desc
);
476 void HELPER(gvec_xor
)(void *d
, void *a
, void *b
, uint32_t desc
)
478 intptr_t oprsz
= simd_oprsz(desc
);
481 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
482 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) ^ *(uint64_t *)(b
+ i
);
484 clear_high(d
, oprsz
, desc
);
487 void HELPER(gvec_andc
)(void *d
, void *a
, void *b
, uint32_t desc
)
489 intptr_t oprsz
= simd_oprsz(desc
);
492 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
493 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) &~ *(uint64_t *)(b
+ i
);
495 clear_high(d
, oprsz
, desc
);
498 void HELPER(gvec_orc
)(void *d
, void *a
, void *b
, uint32_t desc
)
500 intptr_t oprsz
= simd_oprsz(desc
);
503 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
504 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) |~ *(uint64_t *)(b
+ i
);
506 clear_high(d
, oprsz
, desc
);
509 void HELPER(gvec_nand
)(void *d
, void *a
, void *b
, uint32_t desc
)
511 intptr_t oprsz
= simd_oprsz(desc
);
514 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
515 *(uint64_t *)(d
+ i
) = ~(*(uint64_t *)(a
+ i
) & *(uint64_t *)(b
+ i
));
517 clear_high(d
, oprsz
, desc
);
520 void HELPER(gvec_nor
)(void *d
, void *a
, void *b
, uint32_t desc
)
522 intptr_t oprsz
= simd_oprsz(desc
);
525 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
526 *(uint64_t *)(d
+ i
) = ~(*(uint64_t *)(a
+ i
) | *(uint64_t *)(b
+ i
));
528 clear_high(d
, oprsz
, desc
);
531 void HELPER(gvec_eqv
)(void *d
, void *a
, void *b
, uint32_t desc
)
533 intptr_t oprsz
= simd_oprsz(desc
);
536 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
537 *(uint64_t *)(d
+ i
) = ~(*(uint64_t *)(a
+ i
) ^ *(uint64_t *)(b
+ i
));
539 clear_high(d
, oprsz
, desc
);
542 void HELPER(gvec_ands
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
544 intptr_t oprsz
= simd_oprsz(desc
);
547 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
548 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) & b
;
550 clear_high(d
, oprsz
, desc
);
553 void HELPER(gvec_andcs
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
555 intptr_t oprsz
= simd_oprsz(desc
);
558 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
559 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) & ~b
;
561 clear_high(d
, oprsz
, desc
);
564 void HELPER(gvec_xors
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
566 intptr_t oprsz
= simd_oprsz(desc
);
569 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
570 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) ^ b
;
572 clear_high(d
, oprsz
, desc
);
575 void HELPER(gvec_ors
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
577 intptr_t oprsz
= simd_oprsz(desc
);
580 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
581 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) | b
;
583 clear_high(d
, oprsz
, desc
);
586 void HELPER(gvec_shl8i
)(void *d
, void *a
, uint32_t desc
)
588 intptr_t oprsz
= simd_oprsz(desc
);
589 int shift
= simd_data(desc
);
592 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
593 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) << shift
;
595 clear_high(d
, oprsz
, desc
);
598 void HELPER(gvec_shl16i
)(void *d
, void *a
, uint32_t desc
)
600 intptr_t oprsz
= simd_oprsz(desc
);
601 int shift
= simd_data(desc
);
604 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
605 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) << shift
;
607 clear_high(d
, oprsz
, desc
);
610 void HELPER(gvec_shl32i
)(void *d
, void *a
, uint32_t desc
)
612 intptr_t oprsz
= simd_oprsz(desc
);
613 int shift
= simd_data(desc
);
616 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
617 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) << shift
;
619 clear_high(d
, oprsz
, desc
);
622 void HELPER(gvec_shl64i
)(void *d
, void *a
, uint32_t desc
)
624 intptr_t oprsz
= simd_oprsz(desc
);
625 int shift
= simd_data(desc
);
628 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
629 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) << shift
;
631 clear_high(d
, oprsz
, desc
);
634 void HELPER(gvec_shr8i
)(void *d
, void *a
, uint32_t desc
)
636 intptr_t oprsz
= simd_oprsz(desc
);
637 int shift
= simd_data(desc
);
640 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
641 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) >> shift
;
643 clear_high(d
, oprsz
, desc
);
646 void HELPER(gvec_shr16i
)(void *d
, void *a
, uint32_t desc
)
648 intptr_t oprsz
= simd_oprsz(desc
);
649 int shift
= simd_data(desc
);
652 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
653 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) >> shift
;
655 clear_high(d
, oprsz
, desc
);
658 void HELPER(gvec_shr32i
)(void *d
, void *a
, uint32_t desc
)
660 intptr_t oprsz
= simd_oprsz(desc
);
661 int shift
= simd_data(desc
);
664 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
665 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) >> shift
;
667 clear_high(d
, oprsz
, desc
);
670 void HELPER(gvec_shr64i
)(void *d
, void *a
, uint32_t desc
)
672 intptr_t oprsz
= simd_oprsz(desc
);
673 int shift
= simd_data(desc
);
676 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
677 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) >> shift
;
679 clear_high(d
, oprsz
, desc
);
682 void HELPER(gvec_sar8i
)(void *d
, void *a
, uint32_t desc
)
684 intptr_t oprsz
= simd_oprsz(desc
);
685 int shift
= simd_data(desc
);
688 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
689 *(int8_t *)(d
+ i
) = *(int8_t *)(a
+ i
) >> shift
;
691 clear_high(d
, oprsz
, desc
);
694 void HELPER(gvec_sar16i
)(void *d
, void *a
, uint32_t desc
)
696 intptr_t oprsz
= simd_oprsz(desc
);
697 int shift
= simd_data(desc
);
700 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
701 *(int16_t *)(d
+ i
) = *(int16_t *)(a
+ i
) >> shift
;
703 clear_high(d
, oprsz
, desc
);
706 void HELPER(gvec_sar32i
)(void *d
, void *a
, uint32_t desc
)
708 intptr_t oprsz
= simd_oprsz(desc
);
709 int shift
= simd_data(desc
);
712 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
713 *(int32_t *)(d
+ i
) = *(int32_t *)(a
+ i
) >> shift
;
715 clear_high(d
, oprsz
, desc
);
718 void HELPER(gvec_sar64i
)(void *d
, void *a
, uint32_t desc
)
720 intptr_t oprsz
= simd_oprsz(desc
);
721 int shift
= simd_data(desc
);
724 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
725 *(int64_t *)(d
+ i
) = *(int64_t *)(a
+ i
) >> shift
;
727 clear_high(d
, oprsz
, desc
);
730 void HELPER(gvec_rotl8i
)(void *d
, void *a
, uint32_t desc
)
732 intptr_t oprsz
= simd_oprsz(desc
);
733 int shift
= simd_data(desc
);
736 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
737 *(uint8_t *)(d
+ i
) = rol8(*(uint8_t *)(a
+ i
), shift
);
739 clear_high(d
, oprsz
, desc
);
742 void HELPER(gvec_rotl16i
)(void *d
, void *a
, uint32_t desc
)
744 intptr_t oprsz
= simd_oprsz(desc
);
745 int shift
= simd_data(desc
);
748 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
749 *(uint16_t *)(d
+ i
) = rol16(*(uint16_t *)(a
+ i
), shift
);
751 clear_high(d
, oprsz
, desc
);
754 void HELPER(gvec_rotl32i
)(void *d
, void *a
, uint32_t desc
)
756 intptr_t oprsz
= simd_oprsz(desc
);
757 int shift
= simd_data(desc
);
760 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
761 *(uint32_t *)(d
+ i
) = rol32(*(uint32_t *)(a
+ i
), shift
);
763 clear_high(d
, oprsz
, desc
);
766 void HELPER(gvec_rotl64i
)(void *d
, void *a
, uint32_t desc
)
768 intptr_t oprsz
= simd_oprsz(desc
);
769 int shift
= simd_data(desc
);
772 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
773 *(uint64_t *)(d
+ i
) = rol64(*(uint64_t *)(a
+ i
), shift
);
775 clear_high(d
, oprsz
, desc
);
778 void HELPER(gvec_shl8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
780 intptr_t oprsz
= simd_oprsz(desc
);
783 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
784 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
785 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) << sh
;
787 clear_high(d
, oprsz
, desc
);
790 void HELPER(gvec_shl16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
792 intptr_t oprsz
= simd_oprsz(desc
);
795 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
796 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
797 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) << sh
;
799 clear_high(d
, oprsz
, desc
);
802 void HELPER(gvec_shl32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
804 intptr_t oprsz
= simd_oprsz(desc
);
807 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
808 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
809 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) << sh
;
811 clear_high(d
, oprsz
, desc
);
814 void HELPER(gvec_shl64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
816 intptr_t oprsz
= simd_oprsz(desc
);
819 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
820 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
821 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) << sh
;
823 clear_high(d
, oprsz
, desc
);
826 void HELPER(gvec_shr8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
828 intptr_t oprsz
= simd_oprsz(desc
);
831 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
832 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
833 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) >> sh
;
835 clear_high(d
, oprsz
, desc
);
838 void HELPER(gvec_shr16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
840 intptr_t oprsz
= simd_oprsz(desc
);
843 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
844 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
845 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) >> sh
;
847 clear_high(d
, oprsz
, desc
);
850 void HELPER(gvec_shr32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
852 intptr_t oprsz
= simd_oprsz(desc
);
855 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
856 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
857 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) >> sh
;
859 clear_high(d
, oprsz
, desc
);
862 void HELPER(gvec_shr64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
864 intptr_t oprsz
= simd_oprsz(desc
);
867 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
868 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
869 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) >> sh
;
871 clear_high(d
, oprsz
, desc
);
874 void HELPER(gvec_sar8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
876 intptr_t oprsz
= simd_oprsz(desc
);
879 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
880 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
881 *(int8_t *)(d
+ i
) = *(int8_t *)(a
+ i
) >> sh
;
883 clear_high(d
, oprsz
, desc
);
886 void HELPER(gvec_sar16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
888 intptr_t oprsz
= simd_oprsz(desc
);
891 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
892 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
893 *(int16_t *)(d
+ i
) = *(int16_t *)(a
+ i
) >> sh
;
895 clear_high(d
, oprsz
, desc
);
898 void HELPER(gvec_sar32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
900 intptr_t oprsz
= simd_oprsz(desc
);
903 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
904 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
905 *(int32_t *)(d
+ i
) = *(int32_t *)(a
+ i
) >> sh
;
907 clear_high(d
, oprsz
, desc
);
910 void HELPER(gvec_sar64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
912 intptr_t oprsz
= simd_oprsz(desc
);
915 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
916 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
917 *(int64_t *)(d
+ i
) = *(int64_t *)(a
+ i
) >> sh
;
919 clear_high(d
, oprsz
, desc
);
922 void HELPER(gvec_rotl8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
924 intptr_t oprsz
= simd_oprsz(desc
);
927 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
928 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
929 *(uint8_t *)(d
+ i
) = rol8(*(uint8_t *)(a
+ i
), sh
);
931 clear_high(d
, oprsz
, desc
);
934 void HELPER(gvec_rotl16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
936 intptr_t oprsz
= simd_oprsz(desc
);
939 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
940 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
941 *(uint16_t *)(d
+ i
) = rol16(*(uint16_t *)(a
+ i
), sh
);
943 clear_high(d
, oprsz
, desc
);
946 void HELPER(gvec_rotl32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
948 intptr_t oprsz
= simd_oprsz(desc
);
951 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
952 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
953 *(uint32_t *)(d
+ i
) = rol32(*(uint32_t *)(a
+ i
), sh
);
955 clear_high(d
, oprsz
, desc
);
958 void HELPER(gvec_rotl64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
960 intptr_t oprsz
= simd_oprsz(desc
);
963 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
964 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
965 *(uint64_t *)(d
+ i
) = rol64(*(uint64_t *)(a
+ i
), sh
);
967 clear_high(d
, oprsz
, desc
);
970 void HELPER(gvec_rotr8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
972 intptr_t oprsz
= simd_oprsz(desc
);
975 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
976 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
977 *(uint8_t *)(d
+ i
) = ror8(*(uint8_t *)(a
+ i
), sh
);
979 clear_high(d
, oprsz
, desc
);
982 void HELPER(gvec_rotr16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
984 intptr_t oprsz
= simd_oprsz(desc
);
987 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
988 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
989 *(uint16_t *)(d
+ i
) = ror16(*(uint16_t *)(a
+ i
), sh
);
991 clear_high(d
, oprsz
, desc
);
994 void HELPER(gvec_rotr32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
996 intptr_t oprsz
= simd_oprsz(desc
);
999 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1000 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
1001 *(uint32_t *)(d
+ i
) = ror32(*(uint32_t *)(a
+ i
), sh
);
1003 clear_high(d
, oprsz
, desc
);
1006 void HELPER(gvec_rotr64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
1008 intptr_t oprsz
= simd_oprsz(desc
);
1011 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1012 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
1013 *(uint64_t *)(d
+ i
) = ror64(*(uint64_t *)(a
+ i
), sh
);
1015 clear_high(d
, oprsz
, desc
);
1018 #define DO_CMP1(NAME, TYPE, OP) \
1019 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
1021 intptr_t oprsz = simd_oprsz(desc); \
1023 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
1024 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
1026 clear_high(d, oprsz, desc); \
1029 #define DO_CMP2(SZ) \
1030 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
1031 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
1032 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
1033 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
1034 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
1035 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1045 void HELPER(gvec_ssadd8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1047 intptr_t oprsz
= simd_oprsz(desc
);
1050 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
1051 int r
= *(int8_t *)(a
+ i
) + *(int8_t *)(b
+ i
);
1054 } else if (r
< INT8_MIN
) {
1057 *(int8_t *)(d
+ i
) = r
;
1059 clear_high(d
, oprsz
, desc
);
1062 void HELPER(gvec_ssadd16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1064 intptr_t oprsz
= simd_oprsz(desc
);
1067 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
1068 int r
= *(int16_t *)(a
+ i
) + *(int16_t *)(b
+ i
);
1069 if (r
> INT16_MAX
) {
1071 } else if (r
< INT16_MIN
) {
1074 *(int16_t *)(d
+ i
) = r
;
1076 clear_high(d
, oprsz
, desc
);
1079 void HELPER(gvec_ssadd32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1081 intptr_t oprsz
= simd_oprsz(desc
);
1084 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
1085 int32_t ai
= *(int32_t *)(a
+ i
);
1086 int32_t bi
= *(int32_t *)(b
+ i
);
1088 if (sadd32_overflow(ai
, bi
, &di
)) {
1089 di
= (di
< 0 ? INT32_MAX
: INT32_MIN
);
1091 *(int32_t *)(d
+ i
) = di
;
1093 clear_high(d
, oprsz
, desc
);
1096 void HELPER(gvec_ssadd64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1098 intptr_t oprsz
= simd_oprsz(desc
);
1101 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
1102 int64_t ai
= *(int64_t *)(a
+ i
);
1103 int64_t bi
= *(int64_t *)(b
+ i
);
1105 if (sadd64_overflow(ai
, bi
, &di
)) {
1106 di
= (di
< 0 ? INT64_MAX
: INT64_MIN
);
1108 *(int64_t *)(d
+ i
) = di
;
1110 clear_high(d
, oprsz
, desc
);
1113 void HELPER(gvec_sssub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1115 intptr_t oprsz
= simd_oprsz(desc
);
1118 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1119 int r
= *(int8_t *)(a
+ i
) - *(int8_t *)(b
+ i
);
1122 } else if (r
< INT8_MIN
) {
1125 *(uint8_t *)(d
+ i
) = r
;
1127 clear_high(d
, oprsz
, desc
);
1130 void HELPER(gvec_sssub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1132 intptr_t oprsz
= simd_oprsz(desc
);
1135 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
1136 int r
= *(int16_t *)(a
+ i
) - *(int16_t *)(b
+ i
);
1137 if (r
> INT16_MAX
) {
1139 } else if (r
< INT16_MIN
) {
1142 *(int16_t *)(d
+ i
) = r
;
1144 clear_high(d
, oprsz
, desc
);
1147 void HELPER(gvec_sssub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1149 intptr_t oprsz
= simd_oprsz(desc
);
1152 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
1153 int32_t ai
= *(int32_t *)(a
+ i
);
1154 int32_t bi
= *(int32_t *)(b
+ i
);
1156 if (ssub32_overflow(ai
, bi
, &di
)) {
1157 di
= (di
< 0 ? INT32_MAX
: INT32_MIN
);
1159 *(int32_t *)(d
+ i
) = di
;
1161 clear_high(d
, oprsz
, desc
);
1164 void HELPER(gvec_sssub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1166 intptr_t oprsz
= simd_oprsz(desc
);
1169 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
1170 int64_t ai
= *(int64_t *)(a
+ i
);
1171 int64_t bi
= *(int64_t *)(b
+ i
);
1173 if (ssub64_overflow(ai
, bi
, &di
)) {
1174 di
= (di
< 0 ? INT64_MAX
: INT64_MIN
);
1176 *(int64_t *)(d
+ i
) = di
;
1178 clear_high(d
, oprsz
, desc
);
1181 void HELPER(gvec_usadd8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1183 intptr_t oprsz
= simd_oprsz(desc
);
1186 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1187 unsigned r
= *(uint8_t *)(a
+ i
) + *(uint8_t *)(b
+ i
);
1188 if (r
> UINT8_MAX
) {
1191 *(uint8_t *)(d
+ i
) = r
;
1193 clear_high(d
, oprsz
, desc
);
1196 void HELPER(gvec_usadd16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1198 intptr_t oprsz
= simd_oprsz(desc
);
1201 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
1202 unsigned r
= *(uint16_t *)(a
+ i
) + *(uint16_t *)(b
+ i
);
1203 if (r
> UINT16_MAX
) {
1206 *(uint16_t *)(d
+ i
) = r
;
1208 clear_high(d
, oprsz
, desc
);
1211 void HELPER(gvec_usadd32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1213 intptr_t oprsz
= simd_oprsz(desc
);
1216 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1217 uint32_t ai
= *(uint32_t *)(a
+ i
);
1218 uint32_t bi
= *(uint32_t *)(b
+ i
);
1220 if (uadd32_overflow(ai
, bi
, &di
)) {
1223 *(uint32_t *)(d
+ i
) = di
;
1225 clear_high(d
, oprsz
, desc
);
1228 void HELPER(gvec_usadd64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1230 intptr_t oprsz
= simd_oprsz(desc
);
1233 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1234 uint64_t ai
= *(uint64_t *)(a
+ i
);
1235 uint64_t bi
= *(uint64_t *)(b
+ i
);
1237 if (uadd64_overflow(ai
, bi
, &di
)) {
1240 *(uint64_t *)(d
+ i
) = di
;
1242 clear_high(d
, oprsz
, desc
);
1245 void HELPER(gvec_ussub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1247 intptr_t oprsz
= simd_oprsz(desc
);
1250 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1251 int r
= *(uint8_t *)(a
+ i
) - *(uint8_t *)(b
+ i
);
1255 *(uint8_t *)(d
+ i
) = r
;
1257 clear_high(d
, oprsz
, desc
);
1260 void HELPER(gvec_ussub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1262 intptr_t oprsz
= simd_oprsz(desc
);
1265 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
1266 int r
= *(uint16_t *)(a
+ i
) - *(uint16_t *)(b
+ i
);
1270 *(uint16_t *)(d
+ i
) = r
;
1272 clear_high(d
, oprsz
, desc
);
1275 void HELPER(gvec_ussub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1277 intptr_t oprsz
= simd_oprsz(desc
);
1280 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1281 uint32_t ai
= *(uint32_t *)(a
+ i
);
1282 uint32_t bi
= *(uint32_t *)(b
+ i
);
1284 if (usub32_overflow(ai
, bi
, &di
)) {
1287 *(uint32_t *)(d
+ i
) = di
;
1289 clear_high(d
, oprsz
, desc
);
1292 void HELPER(gvec_ussub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1294 intptr_t oprsz
= simd_oprsz(desc
);
1297 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1298 uint64_t ai
= *(uint64_t *)(a
+ i
);
1299 uint64_t bi
= *(uint64_t *)(b
+ i
);
1301 if (usub64_overflow(ai
, bi
, &di
)) {
1304 *(uint64_t *)(d
+ i
) = di
;
1306 clear_high(d
, oprsz
, desc
);
1309 void HELPER(gvec_smin8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1311 intptr_t oprsz
= simd_oprsz(desc
);
1314 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
1315 int8_t aa
= *(int8_t *)(a
+ i
);
1316 int8_t bb
= *(int8_t *)(b
+ i
);
1317 int8_t dd
= aa
< bb
? aa
: bb
;
1318 *(int8_t *)(d
+ i
) = dd
;
1320 clear_high(d
, oprsz
, desc
);
1323 void HELPER(gvec_smin16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1325 intptr_t oprsz
= simd_oprsz(desc
);
1328 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
1329 int16_t aa
= *(int16_t *)(a
+ i
);
1330 int16_t bb
= *(int16_t *)(b
+ i
);
1331 int16_t dd
= aa
< bb
? aa
: bb
;
1332 *(int16_t *)(d
+ i
) = dd
;
1334 clear_high(d
, oprsz
, desc
);
1337 void HELPER(gvec_smin32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1339 intptr_t oprsz
= simd_oprsz(desc
);
1342 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
1343 int32_t aa
= *(int32_t *)(a
+ i
);
1344 int32_t bb
= *(int32_t *)(b
+ i
);
1345 int32_t dd
= aa
< bb
? aa
: bb
;
1346 *(int32_t *)(d
+ i
) = dd
;
1348 clear_high(d
, oprsz
, desc
);
1351 void HELPER(gvec_smin64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1353 intptr_t oprsz
= simd_oprsz(desc
);
1356 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
1357 int64_t aa
= *(int64_t *)(a
+ i
);
1358 int64_t bb
= *(int64_t *)(b
+ i
);
1359 int64_t dd
= aa
< bb
? aa
: bb
;
1360 *(int64_t *)(d
+ i
) = dd
;
1362 clear_high(d
, oprsz
, desc
);
1365 void HELPER(gvec_smax8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1367 intptr_t oprsz
= simd_oprsz(desc
);
1370 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
1371 int8_t aa
= *(int8_t *)(a
+ i
);
1372 int8_t bb
= *(int8_t *)(b
+ i
);
1373 int8_t dd
= aa
> bb
? aa
: bb
;
1374 *(int8_t *)(d
+ i
) = dd
;
1376 clear_high(d
, oprsz
, desc
);
1379 void HELPER(gvec_smax16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1381 intptr_t oprsz
= simd_oprsz(desc
);
1384 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
1385 int16_t aa
= *(int16_t *)(a
+ i
);
1386 int16_t bb
= *(int16_t *)(b
+ i
);
1387 int16_t dd
= aa
> bb
? aa
: bb
;
1388 *(int16_t *)(d
+ i
) = dd
;
1390 clear_high(d
, oprsz
, desc
);
1393 void HELPER(gvec_smax32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1395 intptr_t oprsz
= simd_oprsz(desc
);
1398 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
1399 int32_t aa
= *(int32_t *)(a
+ i
);
1400 int32_t bb
= *(int32_t *)(b
+ i
);
1401 int32_t dd
= aa
> bb
? aa
: bb
;
1402 *(int32_t *)(d
+ i
) = dd
;
1404 clear_high(d
, oprsz
, desc
);
1407 void HELPER(gvec_smax64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1409 intptr_t oprsz
= simd_oprsz(desc
);
1412 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
1413 int64_t aa
= *(int64_t *)(a
+ i
);
1414 int64_t bb
= *(int64_t *)(b
+ i
);
1415 int64_t dd
= aa
> bb
? aa
: bb
;
1416 *(int64_t *)(d
+ i
) = dd
;
1418 clear_high(d
, oprsz
, desc
);
1421 void HELPER(gvec_umin8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1423 intptr_t oprsz
= simd_oprsz(desc
);
1426 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1427 uint8_t aa
= *(uint8_t *)(a
+ i
);
1428 uint8_t bb
= *(uint8_t *)(b
+ i
);
1429 uint8_t dd
= aa
< bb
? aa
: bb
;
1430 *(uint8_t *)(d
+ i
) = dd
;
1432 clear_high(d
, oprsz
, desc
);
1435 void HELPER(gvec_umin16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1437 intptr_t oprsz
= simd_oprsz(desc
);
1440 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
1441 uint16_t aa
= *(uint16_t *)(a
+ i
);
1442 uint16_t bb
= *(uint16_t *)(b
+ i
);
1443 uint16_t dd
= aa
< bb
? aa
: bb
;
1444 *(uint16_t *)(d
+ i
) = dd
;
1446 clear_high(d
, oprsz
, desc
);
1449 void HELPER(gvec_umin32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1451 intptr_t oprsz
= simd_oprsz(desc
);
1454 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1455 uint32_t aa
= *(uint32_t *)(a
+ i
);
1456 uint32_t bb
= *(uint32_t *)(b
+ i
);
1457 uint32_t dd
= aa
< bb
? aa
: bb
;
1458 *(uint32_t *)(d
+ i
) = dd
;
1460 clear_high(d
, oprsz
, desc
);
1463 void HELPER(gvec_umin64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1465 intptr_t oprsz
= simd_oprsz(desc
);
1468 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1469 uint64_t aa
= *(uint64_t *)(a
+ i
);
1470 uint64_t bb
= *(uint64_t *)(b
+ i
);
1471 uint64_t dd
= aa
< bb
? aa
: bb
;
1472 *(uint64_t *)(d
+ i
) = dd
;
1474 clear_high(d
, oprsz
, desc
);
1477 void HELPER(gvec_umax8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1479 intptr_t oprsz
= simd_oprsz(desc
);
1482 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1483 uint8_t aa
= *(uint8_t *)(a
+ i
);
1484 uint8_t bb
= *(uint8_t *)(b
+ i
);
1485 uint8_t dd
= aa
> bb
? aa
: bb
;
1486 *(uint8_t *)(d
+ i
) = dd
;
1488 clear_high(d
, oprsz
, desc
);
1491 void HELPER(gvec_umax16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1493 intptr_t oprsz
= simd_oprsz(desc
);
1496 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
1497 uint16_t aa
= *(uint16_t *)(a
+ i
);
1498 uint16_t bb
= *(uint16_t *)(b
+ i
);
1499 uint16_t dd
= aa
> bb
? aa
: bb
;
1500 *(uint16_t *)(d
+ i
) = dd
;
1502 clear_high(d
, oprsz
, desc
);
1505 void HELPER(gvec_umax32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1507 intptr_t oprsz
= simd_oprsz(desc
);
1510 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1511 uint32_t aa
= *(uint32_t *)(a
+ i
);
1512 uint32_t bb
= *(uint32_t *)(b
+ i
);
1513 uint32_t dd
= aa
> bb
? aa
: bb
;
1514 *(uint32_t *)(d
+ i
) = dd
;
1516 clear_high(d
, oprsz
, desc
);
1519 void HELPER(gvec_umax64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1521 intptr_t oprsz
= simd_oprsz(desc
);
1524 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1525 uint64_t aa
= *(uint64_t *)(a
+ i
);
1526 uint64_t bb
= *(uint64_t *)(b
+ i
);
1527 uint64_t dd
= aa
> bb
? aa
: bb
;
1528 *(uint64_t *)(d
+ i
) = dd
;
1530 clear_high(d
, oprsz
, desc
);
1533 void HELPER(gvec_bitsel
)(void *d
, void *a
, void *b
, void *c
, uint32_t desc
)
1535 intptr_t oprsz
= simd_oprsz(desc
);
1538 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1539 uint64_t aa
= *(uint64_t *)(a
+ i
);
1540 uint64_t bb
= *(uint64_t *)(b
+ i
);
1541 uint64_t cc
= *(uint64_t *)(c
+ i
);
1542 *(uint64_t *)(d
+ i
) = (bb
& aa
) | (cc
& ~aa
);
1544 clear_high(d
, oprsz
, desc
);