]> git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/tcg-runtime-gvec.c
tcg: Add support for vector absolute value
[mirror_qemu.git] / accel / tcg / tcg-runtime-gvec.c
1 /*
2 * Generic vectorized operation runtime
3 *
4 * Copyright (c) 2018 Linaro
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "tcg-gvec-desc.h"
25
26
27 /* Virtually all hosts support 16-byte vectors. Those that don't can emulate
28 * them via GCC's generic vector extension. This turns out to be simpler and
29 * more reliable than getting the compiler to autovectorize.
30 *
31 * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
32 * are multiples of 16.
33 *
34 * When the compiler does not support all of the operations we require, the
35 * loops are written so that we can always fall back on the base types.
36 */
37 #ifdef CONFIG_VECTOR16
38 typedef uint8_t vec8 __attribute__((vector_size(16)));
39 typedef uint16_t vec16 __attribute__((vector_size(16)));
40 typedef uint32_t vec32 __attribute__((vector_size(16)));
41 typedef uint64_t vec64 __attribute__((vector_size(16)));
42
43 typedef int8_t svec8 __attribute__((vector_size(16)));
44 typedef int16_t svec16 __attribute__((vector_size(16)));
45 typedef int32_t svec32 __attribute__((vector_size(16)));
46 typedef int64_t svec64 __attribute__((vector_size(16)));
47
48 #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
49 #define DUP8(X) { X, X, X, X, X, X, X, X }
50 #define DUP4(X) { X, X, X, X }
51 #define DUP2(X) { X, X }
52 #else
53 typedef uint8_t vec8;
54 typedef uint16_t vec16;
55 typedef uint32_t vec32;
56 typedef uint64_t vec64;
57
58 typedef int8_t svec8;
59 typedef int16_t svec16;
60 typedef int32_t svec32;
61 typedef int64_t svec64;
62
63 #define DUP16(X) X
64 #define DUP8(X) X
65 #define DUP4(X) X
66 #define DUP2(X) X
67 #endif /* CONFIG_VECTOR16 */
68
69 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
70 {
71 intptr_t maxsz = simd_maxsz(desc);
72 intptr_t i;
73
74 if (unlikely(maxsz > oprsz)) {
75 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
76 *(uint64_t *)(d + i) = 0;
77 }
78 }
79 }
80
81 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
82 {
83 intptr_t oprsz = simd_oprsz(desc);
84 intptr_t i;
85
86 for (i = 0; i < oprsz; i += sizeof(vec8)) {
87 *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
88 }
89 clear_high(d, oprsz, desc);
90 }
91
92 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
93 {
94 intptr_t oprsz = simd_oprsz(desc);
95 intptr_t i;
96
97 for (i = 0; i < oprsz; i += sizeof(vec16)) {
98 *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
99 }
100 clear_high(d, oprsz, desc);
101 }
102
103 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
104 {
105 intptr_t oprsz = simd_oprsz(desc);
106 intptr_t i;
107
108 for (i = 0; i < oprsz; i += sizeof(vec32)) {
109 *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
110 }
111 clear_high(d, oprsz, desc);
112 }
113
114 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
115 {
116 intptr_t oprsz = simd_oprsz(desc);
117 intptr_t i;
118
119 for (i = 0; i < oprsz; i += sizeof(vec64)) {
120 *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
121 }
122 clear_high(d, oprsz, desc);
123 }
124
125 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
126 {
127 intptr_t oprsz = simd_oprsz(desc);
128 vec8 vecb = (vec8)DUP16(b);
129 intptr_t i;
130
131 for (i = 0; i < oprsz; i += sizeof(vec8)) {
132 *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
133 }
134 clear_high(d, oprsz, desc);
135 }
136
137 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
138 {
139 intptr_t oprsz = simd_oprsz(desc);
140 vec16 vecb = (vec16)DUP8(b);
141 intptr_t i;
142
143 for (i = 0; i < oprsz; i += sizeof(vec16)) {
144 *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
145 }
146 clear_high(d, oprsz, desc);
147 }
148
149 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
150 {
151 intptr_t oprsz = simd_oprsz(desc);
152 vec32 vecb = (vec32)DUP4(b);
153 intptr_t i;
154
155 for (i = 0; i < oprsz; i += sizeof(vec32)) {
156 *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
157 }
158 clear_high(d, oprsz, desc);
159 }
160
161 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
162 {
163 intptr_t oprsz = simd_oprsz(desc);
164 vec64 vecb = (vec64)DUP2(b);
165 intptr_t i;
166
167 for (i = 0; i < oprsz; i += sizeof(vec64)) {
168 *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
169 }
170 clear_high(d, oprsz, desc);
171 }
172
173 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
174 {
175 intptr_t oprsz = simd_oprsz(desc);
176 intptr_t i;
177
178 for (i = 0; i < oprsz; i += sizeof(vec8)) {
179 *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
180 }
181 clear_high(d, oprsz, desc);
182 }
183
184 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
185 {
186 intptr_t oprsz = simd_oprsz(desc);
187 intptr_t i;
188
189 for (i = 0; i < oprsz; i += sizeof(vec16)) {
190 *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
191 }
192 clear_high(d, oprsz, desc);
193 }
194
195 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
196 {
197 intptr_t oprsz = simd_oprsz(desc);
198 intptr_t i;
199
200 for (i = 0; i < oprsz; i += sizeof(vec32)) {
201 *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
202 }
203 clear_high(d, oprsz, desc);
204 }
205
206 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
207 {
208 intptr_t oprsz = simd_oprsz(desc);
209 intptr_t i;
210
211 for (i = 0; i < oprsz; i += sizeof(vec64)) {
212 *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
213 }
214 clear_high(d, oprsz, desc);
215 }
216
217 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
218 {
219 intptr_t oprsz = simd_oprsz(desc);
220 vec8 vecb = (vec8)DUP16(b);
221 intptr_t i;
222
223 for (i = 0; i < oprsz; i += sizeof(vec8)) {
224 *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
225 }
226 clear_high(d, oprsz, desc);
227 }
228
229 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
230 {
231 intptr_t oprsz = simd_oprsz(desc);
232 vec16 vecb = (vec16)DUP8(b);
233 intptr_t i;
234
235 for (i = 0; i < oprsz; i += sizeof(vec16)) {
236 *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
237 }
238 clear_high(d, oprsz, desc);
239 }
240
241 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
242 {
243 intptr_t oprsz = simd_oprsz(desc);
244 vec32 vecb = (vec32)DUP4(b);
245 intptr_t i;
246
247 for (i = 0; i < oprsz; i += sizeof(vec32)) {
248 *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
249 }
250 clear_high(d, oprsz, desc);
251 }
252
253 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
254 {
255 intptr_t oprsz = simd_oprsz(desc);
256 vec64 vecb = (vec64)DUP2(b);
257 intptr_t i;
258
259 for (i = 0; i < oprsz; i += sizeof(vec64)) {
260 *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
261 }
262 clear_high(d, oprsz, desc);
263 }
264
265 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
266 {
267 intptr_t oprsz = simd_oprsz(desc);
268 intptr_t i;
269
270 for (i = 0; i < oprsz; i += sizeof(vec8)) {
271 *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
272 }
273 clear_high(d, oprsz, desc);
274 }
275
276 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
277 {
278 intptr_t oprsz = simd_oprsz(desc);
279 intptr_t i;
280
281 for (i = 0; i < oprsz; i += sizeof(vec16)) {
282 *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
283 }
284 clear_high(d, oprsz, desc);
285 }
286
287 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
288 {
289 intptr_t oprsz = simd_oprsz(desc);
290 intptr_t i;
291
292 for (i = 0; i < oprsz; i += sizeof(vec32)) {
293 *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
294 }
295 clear_high(d, oprsz, desc);
296 }
297
298 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
299 {
300 intptr_t oprsz = simd_oprsz(desc);
301 intptr_t i;
302
303 for (i = 0; i < oprsz; i += sizeof(vec64)) {
304 *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
305 }
306 clear_high(d, oprsz, desc);
307 }
308
309 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
310 {
311 intptr_t oprsz = simd_oprsz(desc);
312 vec8 vecb = (vec8)DUP16(b);
313 intptr_t i;
314
315 for (i = 0; i < oprsz; i += sizeof(vec8)) {
316 *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
317 }
318 clear_high(d, oprsz, desc);
319 }
320
321 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
322 {
323 intptr_t oprsz = simd_oprsz(desc);
324 vec16 vecb = (vec16)DUP8(b);
325 intptr_t i;
326
327 for (i = 0; i < oprsz; i += sizeof(vec16)) {
328 *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
329 }
330 clear_high(d, oprsz, desc);
331 }
332
333 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
334 {
335 intptr_t oprsz = simd_oprsz(desc);
336 vec32 vecb = (vec32)DUP4(b);
337 intptr_t i;
338
339 for (i = 0; i < oprsz; i += sizeof(vec32)) {
340 *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
341 }
342 clear_high(d, oprsz, desc);
343 }
344
345 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
346 {
347 intptr_t oprsz = simd_oprsz(desc);
348 vec64 vecb = (vec64)DUP2(b);
349 intptr_t i;
350
351 for (i = 0; i < oprsz; i += sizeof(vec64)) {
352 *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
353 }
354 clear_high(d, oprsz, desc);
355 }
356
357 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
358 {
359 intptr_t oprsz = simd_oprsz(desc);
360 intptr_t i;
361
362 for (i = 0; i < oprsz; i += sizeof(vec8)) {
363 *(vec8 *)(d + i) = -*(vec8 *)(a + i);
364 }
365 clear_high(d, oprsz, desc);
366 }
367
368 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
369 {
370 intptr_t oprsz = simd_oprsz(desc);
371 intptr_t i;
372
373 for (i = 0; i < oprsz; i += sizeof(vec16)) {
374 *(vec16 *)(d + i) = -*(vec16 *)(a + i);
375 }
376 clear_high(d, oprsz, desc);
377 }
378
379 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
380 {
381 intptr_t oprsz = simd_oprsz(desc);
382 intptr_t i;
383
384 for (i = 0; i < oprsz; i += sizeof(vec32)) {
385 *(vec32 *)(d + i) = -*(vec32 *)(a + i);
386 }
387 clear_high(d, oprsz, desc);
388 }
389
390 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
391 {
392 intptr_t oprsz = simd_oprsz(desc);
393 intptr_t i;
394
395 for (i = 0; i < oprsz; i += sizeof(vec64)) {
396 *(vec64 *)(d + i) = -*(vec64 *)(a + i);
397 }
398 clear_high(d, oprsz, desc);
399 }
400
401 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
402 {
403 intptr_t oprsz = simd_oprsz(desc);
404 intptr_t i;
405
406 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
407 int8_t aa = *(int8_t *)(a + i);
408 *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
409 }
410 clear_high(d, oprsz, desc);
411 }
412
413 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
414 {
415 intptr_t oprsz = simd_oprsz(desc);
416 intptr_t i;
417
418 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
419 int16_t aa = *(int16_t *)(a + i);
420 *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
421 }
422 clear_high(d, oprsz, desc);
423 }
424
425 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
426 {
427 intptr_t oprsz = simd_oprsz(desc);
428 intptr_t i;
429
430 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
431 int32_t aa = *(int32_t *)(a + i);
432 *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
433 }
434 clear_high(d, oprsz, desc);
435 }
436
437 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
438 {
439 intptr_t oprsz = simd_oprsz(desc);
440 intptr_t i;
441
442 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
443 int64_t aa = *(int64_t *)(a + i);
444 *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
445 }
446 clear_high(d, oprsz, desc);
447 }
448
449 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
450 {
451 intptr_t oprsz = simd_oprsz(desc);
452
453 memcpy(d, a, oprsz);
454 clear_high(d, oprsz, desc);
455 }
456
457 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
458 {
459 intptr_t oprsz = simd_oprsz(desc);
460 intptr_t i;
461
462 if (c == 0) {
463 oprsz = 0;
464 } else {
465 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
466 *(uint64_t *)(d + i) = c;
467 }
468 }
469 clear_high(d, oprsz, desc);
470 }
471
472 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
473 {
474 intptr_t oprsz = simd_oprsz(desc);
475 intptr_t i;
476
477 if (c == 0) {
478 oprsz = 0;
479 } else {
480 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
481 *(uint32_t *)(d + i) = c;
482 }
483 }
484 clear_high(d, oprsz, desc);
485 }
486
487 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
488 {
489 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
490 }
491
492 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
493 {
494 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
495 }
496
497 void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
498 {
499 intptr_t oprsz = simd_oprsz(desc);
500 intptr_t i;
501
502 for (i = 0; i < oprsz; i += sizeof(vec64)) {
503 *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
504 }
505 clear_high(d, oprsz, desc);
506 }
507
508 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
509 {
510 intptr_t oprsz = simd_oprsz(desc);
511 intptr_t i;
512
513 for (i = 0; i < oprsz; i += sizeof(vec64)) {
514 *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
515 }
516 clear_high(d, oprsz, desc);
517 }
518
519 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
520 {
521 intptr_t oprsz = simd_oprsz(desc);
522 intptr_t i;
523
524 for (i = 0; i < oprsz; i += sizeof(vec64)) {
525 *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
526 }
527 clear_high(d, oprsz, desc);
528 }
529
530 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
531 {
532 intptr_t oprsz = simd_oprsz(desc);
533 intptr_t i;
534
535 for (i = 0; i < oprsz; i += sizeof(vec64)) {
536 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
537 }
538 clear_high(d, oprsz, desc);
539 }
540
541 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
542 {
543 intptr_t oprsz = simd_oprsz(desc);
544 intptr_t i;
545
546 for (i = 0; i < oprsz; i += sizeof(vec64)) {
547 *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
548 }
549 clear_high(d, oprsz, desc);
550 }
551
552 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
553 {
554 intptr_t oprsz = simd_oprsz(desc);
555 intptr_t i;
556
557 for (i = 0; i < oprsz; i += sizeof(vec64)) {
558 *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
559 }
560 clear_high(d, oprsz, desc);
561 }
562
563 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
564 {
565 intptr_t oprsz = simd_oprsz(desc);
566 intptr_t i;
567
568 for (i = 0; i < oprsz; i += sizeof(vec64)) {
569 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i));
570 }
571 clear_high(d, oprsz, desc);
572 }
573
574 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
575 {
576 intptr_t oprsz = simd_oprsz(desc);
577 intptr_t i;
578
579 for (i = 0; i < oprsz; i += sizeof(vec64)) {
580 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i));
581 }
582 clear_high(d, oprsz, desc);
583 }
584
585 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
586 {
587 intptr_t oprsz = simd_oprsz(desc);
588 intptr_t i;
589
590 for (i = 0; i < oprsz; i += sizeof(vec64)) {
591 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i));
592 }
593 clear_high(d, oprsz, desc);
594 }
595
596 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
597 {
598 intptr_t oprsz = simd_oprsz(desc);
599 vec64 vecb = (vec64)DUP2(b);
600 intptr_t i;
601
602 for (i = 0; i < oprsz; i += sizeof(vec64)) {
603 *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
604 }
605 clear_high(d, oprsz, desc);
606 }
607
608 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
609 {
610 intptr_t oprsz = simd_oprsz(desc);
611 vec64 vecb = (vec64)DUP2(b);
612 intptr_t i;
613
614 for (i = 0; i < oprsz; i += sizeof(vec64)) {
615 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
616 }
617 clear_high(d, oprsz, desc);
618 }
619
620 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
621 {
622 intptr_t oprsz = simd_oprsz(desc);
623 vec64 vecb = (vec64)DUP2(b);
624 intptr_t i;
625
626 for (i = 0; i < oprsz; i += sizeof(vec64)) {
627 *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
628 }
629 clear_high(d, oprsz, desc);
630 }
631
632 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
633 {
634 intptr_t oprsz = simd_oprsz(desc);
635 int shift = simd_data(desc);
636 intptr_t i;
637
638 for (i = 0; i < oprsz; i += sizeof(vec8)) {
639 *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
640 }
641 clear_high(d, oprsz, desc);
642 }
643
644 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
645 {
646 intptr_t oprsz = simd_oprsz(desc);
647 int shift = simd_data(desc);
648 intptr_t i;
649
650 for (i = 0; i < oprsz; i += sizeof(vec16)) {
651 *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
652 }
653 clear_high(d, oprsz, desc);
654 }
655
656 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
657 {
658 intptr_t oprsz = simd_oprsz(desc);
659 int shift = simd_data(desc);
660 intptr_t i;
661
662 for (i = 0; i < oprsz; i += sizeof(vec32)) {
663 *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
664 }
665 clear_high(d, oprsz, desc);
666 }
667
668 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
669 {
670 intptr_t oprsz = simd_oprsz(desc);
671 int shift = simd_data(desc);
672 intptr_t i;
673
674 for (i = 0; i < oprsz; i += sizeof(vec64)) {
675 *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
676 }
677 clear_high(d, oprsz, desc);
678 }
679
680 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
681 {
682 intptr_t oprsz = simd_oprsz(desc);
683 int shift = simd_data(desc);
684 intptr_t i;
685
686 for (i = 0; i < oprsz; i += sizeof(vec8)) {
687 *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
688 }
689 clear_high(d, oprsz, desc);
690 }
691
692 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
693 {
694 intptr_t oprsz = simd_oprsz(desc);
695 int shift = simd_data(desc);
696 intptr_t i;
697
698 for (i = 0; i < oprsz; i += sizeof(vec16)) {
699 *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
700 }
701 clear_high(d, oprsz, desc);
702 }
703
704 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
705 {
706 intptr_t oprsz = simd_oprsz(desc);
707 int shift = simd_data(desc);
708 intptr_t i;
709
710 for (i = 0; i < oprsz; i += sizeof(vec32)) {
711 *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
712 }
713 clear_high(d, oprsz, desc);
714 }
715
716 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
717 {
718 intptr_t oprsz = simd_oprsz(desc);
719 int shift = simd_data(desc);
720 intptr_t i;
721
722 for (i = 0; i < oprsz; i += sizeof(vec64)) {
723 *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
724 }
725 clear_high(d, oprsz, desc);
726 }
727
728 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
729 {
730 intptr_t oprsz = simd_oprsz(desc);
731 int shift = simd_data(desc);
732 intptr_t i;
733
734 for (i = 0; i < oprsz; i += sizeof(vec8)) {
735 *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
736 }
737 clear_high(d, oprsz, desc);
738 }
739
740 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
741 {
742 intptr_t oprsz = simd_oprsz(desc);
743 int shift = simd_data(desc);
744 intptr_t i;
745
746 for (i = 0; i < oprsz; i += sizeof(vec16)) {
747 *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
748 }
749 clear_high(d, oprsz, desc);
750 }
751
752 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
753 {
754 intptr_t oprsz = simd_oprsz(desc);
755 int shift = simd_data(desc);
756 intptr_t i;
757
758 for (i = 0; i < oprsz; i += sizeof(vec32)) {
759 *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
760 }
761 clear_high(d, oprsz, desc);
762 }
763
764 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
765 {
766 intptr_t oprsz = simd_oprsz(desc);
767 int shift = simd_data(desc);
768 intptr_t i;
769
770 for (i = 0; i < oprsz; i += sizeof(vec64)) {
771 *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
772 }
773 clear_high(d, oprsz, desc);
774 }
775
776 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
777 {
778 intptr_t oprsz = simd_oprsz(desc);
779 intptr_t i;
780
781 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
782 uint8_t sh = *(uint8_t *)(b + i) & 7;
783 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
784 }
785 clear_high(d, oprsz, desc);
786 }
787
788 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
789 {
790 intptr_t oprsz = simd_oprsz(desc);
791 intptr_t i;
792
793 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
794 uint8_t sh = *(uint16_t *)(b + i) & 15;
795 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
796 }
797 clear_high(d, oprsz, desc);
798 }
799
800 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
801 {
802 intptr_t oprsz = simd_oprsz(desc);
803 intptr_t i;
804
805 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
806 uint8_t sh = *(uint32_t *)(b + i) & 31;
807 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
808 }
809 clear_high(d, oprsz, desc);
810 }
811
812 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
813 {
814 intptr_t oprsz = simd_oprsz(desc);
815 intptr_t i;
816
817 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
818 uint8_t sh = *(uint64_t *)(b + i) & 63;
819 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
820 }
821 clear_high(d, oprsz, desc);
822 }
823
824 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
825 {
826 intptr_t oprsz = simd_oprsz(desc);
827 intptr_t i;
828
829 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
830 uint8_t sh = *(uint8_t *)(b + i) & 7;
831 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
832 }
833 clear_high(d, oprsz, desc);
834 }
835
836 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
837 {
838 intptr_t oprsz = simd_oprsz(desc);
839 intptr_t i;
840
841 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
842 uint8_t sh = *(uint16_t *)(b + i) & 15;
843 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
844 }
845 clear_high(d, oprsz, desc);
846 }
847
848 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
849 {
850 intptr_t oprsz = simd_oprsz(desc);
851 intptr_t i;
852
853 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
854 uint8_t sh = *(uint32_t *)(b + i) & 31;
855 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
856 }
857 clear_high(d, oprsz, desc);
858 }
859
860 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
861 {
862 intptr_t oprsz = simd_oprsz(desc);
863 intptr_t i;
864
865 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
866 uint8_t sh = *(uint64_t *)(b + i) & 63;
867 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
868 }
869 clear_high(d, oprsz, desc);
870 }
871
872 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
873 {
874 intptr_t oprsz = simd_oprsz(desc);
875 intptr_t i;
876
877 for (i = 0; i < oprsz; i += sizeof(vec8)) {
878 uint8_t sh = *(uint8_t *)(b + i) & 7;
879 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
880 }
881 clear_high(d, oprsz, desc);
882 }
883
884 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
885 {
886 intptr_t oprsz = simd_oprsz(desc);
887 intptr_t i;
888
889 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
890 uint8_t sh = *(uint16_t *)(b + i) & 15;
891 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
892 }
893 clear_high(d, oprsz, desc);
894 }
895
896 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
897 {
898 intptr_t oprsz = simd_oprsz(desc);
899 intptr_t i;
900
901 for (i = 0; i < oprsz; i += sizeof(vec32)) {
902 uint8_t sh = *(uint32_t *)(b + i) & 31;
903 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
904 }
905 clear_high(d, oprsz, desc);
906 }
907
908 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
909 {
910 intptr_t oprsz = simd_oprsz(desc);
911 intptr_t i;
912
913 for (i = 0; i < oprsz; i += sizeof(vec64)) {
914 uint8_t sh = *(uint64_t *)(b + i) & 63;
915 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
916 }
917 clear_high(d, oprsz, desc);
918 }
919
920 /* If vectors are enabled, the compiler fills in -1 for true.
921 Otherwise, we must take care of this by hand. */
922 #ifdef CONFIG_VECTOR16
923 # define DO_CMP0(X) X
924 #else
925 # define DO_CMP0(X) -(X)
926 #endif
927
928 #define DO_CMP1(NAME, TYPE, OP) \
929 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
930 { \
931 intptr_t oprsz = simd_oprsz(desc); \
932 intptr_t i; \
933 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
934 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
935 } \
936 clear_high(d, oprsz, desc); \
937 }
938
939 #define DO_CMP2(SZ) \
940 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
941 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
942 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
943 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
944 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
945 DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
946
947 DO_CMP2(8)
948 DO_CMP2(16)
949 DO_CMP2(32)
950 DO_CMP2(64)
951
952 #undef DO_CMP0
953 #undef DO_CMP1
954 #undef DO_CMP2
955
956 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
957 {
958 intptr_t oprsz = simd_oprsz(desc);
959 intptr_t i;
960
961 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
962 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
963 if (r > INT8_MAX) {
964 r = INT8_MAX;
965 } else if (r < INT8_MIN) {
966 r = INT8_MIN;
967 }
968 *(int8_t *)(d + i) = r;
969 }
970 clear_high(d, oprsz, desc);
971 }
972
973 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
974 {
975 intptr_t oprsz = simd_oprsz(desc);
976 intptr_t i;
977
978 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
979 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
980 if (r > INT16_MAX) {
981 r = INT16_MAX;
982 } else if (r < INT16_MIN) {
983 r = INT16_MIN;
984 }
985 *(int16_t *)(d + i) = r;
986 }
987 clear_high(d, oprsz, desc);
988 }
989
990 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
991 {
992 intptr_t oprsz = simd_oprsz(desc);
993 intptr_t i;
994
995 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
996 int32_t ai = *(int32_t *)(a + i);
997 int32_t bi = *(int32_t *)(b + i);
998 int32_t di = ai + bi;
999 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
1000 /* Signed overflow. */
1001 di = (di < 0 ? INT32_MAX : INT32_MIN);
1002 }
1003 *(int32_t *)(d + i) = di;
1004 }
1005 clear_high(d, oprsz, desc);
1006 }
1007
1008 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1009 {
1010 intptr_t oprsz = simd_oprsz(desc);
1011 intptr_t i;
1012
1013 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1014 int64_t ai = *(int64_t *)(a + i);
1015 int64_t bi = *(int64_t *)(b + i);
1016 int64_t di = ai + bi;
1017 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
1018 /* Signed overflow. */
1019 di = (di < 0 ? INT64_MAX : INT64_MIN);
1020 }
1021 *(int64_t *)(d + i) = di;
1022 }
1023 clear_high(d, oprsz, desc);
1024 }
1025
1026 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1027 {
1028 intptr_t oprsz = simd_oprsz(desc);
1029 intptr_t i;
1030
1031 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1032 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1033 if (r > INT8_MAX) {
1034 r = INT8_MAX;
1035 } else if (r < INT8_MIN) {
1036 r = INT8_MIN;
1037 }
1038 *(uint8_t *)(d + i) = r;
1039 }
1040 clear_high(d, oprsz, desc);
1041 }
1042
1043 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1044 {
1045 intptr_t oprsz = simd_oprsz(desc);
1046 intptr_t i;
1047
1048 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1049 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1050 if (r > INT16_MAX) {
1051 r = INT16_MAX;
1052 } else if (r < INT16_MIN) {
1053 r = INT16_MIN;
1054 }
1055 *(int16_t *)(d + i) = r;
1056 }
1057 clear_high(d, oprsz, desc);
1058 }
1059
1060 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1061 {
1062 intptr_t oprsz = simd_oprsz(desc);
1063 intptr_t i;
1064
1065 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1066 int32_t ai = *(int32_t *)(a + i);
1067 int32_t bi = *(int32_t *)(b + i);
1068 int32_t di = ai - bi;
1069 if (((di ^ ai) & (ai ^ bi)) < 0) {
1070 /* Signed overflow. */
1071 di = (di < 0 ? INT32_MAX : INT32_MIN);
1072 }
1073 *(int32_t *)(d + i) = di;
1074 }
1075 clear_high(d, oprsz, desc);
1076 }
1077
1078 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1079 {
1080 intptr_t oprsz = simd_oprsz(desc);
1081 intptr_t i;
1082
1083 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1084 int64_t ai = *(int64_t *)(a + i);
1085 int64_t bi = *(int64_t *)(b + i);
1086 int64_t di = ai - bi;
1087 if (((di ^ ai) & (ai ^ bi)) < 0) {
1088 /* Signed overflow. */
1089 di = (di < 0 ? INT64_MAX : INT64_MIN);
1090 }
1091 *(int64_t *)(d + i) = di;
1092 }
1093 clear_high(d, oprsz, desc);
1094 }
1095
1096 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1097 {
1098 intptr_t oprsz = simd_oprsz(desc);
1099 intptr_t i;
1100
1101 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1102 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1103 if (r > UINT8_MAX) {
1104 r = UINT8_MAX;
1105 }
1106 *(uint8_t *)(d + i) = r;
1107 }
1108 clear_high(d, oprsz, desc);
1109 }
1110
1111 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1112 {
1113 intptr_t oprsz = simd_oprsz(desc);
1114 intptr_t i;
1115
1116 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1117 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1118 if (r > UINT16_MAX) {
1119 r = UINT16_MAX;
1120 }
1121 *(uint16_t *)(d + i) = r;
1122 }
1123 clear_high(d, oprsz, desc);
1124 }
1125
1126 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1127 {
1128 intptr_t oprsz = simd_oprsz(desc);
1129 intptr_t i;
1130
1131 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1132 uint32_t ai = *(uint32_t *)(a + i);
1133 uint32_t bi = *(uint32_t *)(b + i);
1134 uint32_t di = ai + bi;
1135 if (di < ai) {
1136 di = UINT32_MAX;
1137 }
1138 *(uint32_t *)(d + i) = di;
1139 }
1140 clear_high(d, oprsz, desc);
1141 }
1142
1143 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1144 {
1145 intptr_t oprsz = simd_oprsz(desc);
1146 intptr_t i;
1147
1148 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1149 uint64_t ai = *(uint64_t *)(a + i);
1150 uint64_t bi = *(uint64_t *)(b + i);
1151 uint64_t di = ai + bi;
1152 if (di < ai) {
1153 di = UINT64_MAX;
1154 }
1155 *(uint64_t *)(d + i) = di;
1156 }
1157 clear_high(d, oprsz, desc);
1158 }
1159
1160 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1161 {
1162 intptr_t oprsz = simd_oprsz(desc);
1163 intptr_t i;
1164
1165 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1166 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1167 if (r < 0) {
1168 r = 0;
1169 }
1170 *(uint8_t *)(d + i) = r;
1171 }
1172 clear_high(d, oprsz, desc);
1173 }
1174
1175 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1176 {
1177 intptr_t oprsz = simd_oprsz(desc);
1178 intptr_t i;
1179
1180 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1181 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1182 if (r < 0) {
1183 r = 0;
1184 }
1185 *(uint16_t *)(d + i) = r;
1186 }
1187 clear_high(d, oprsz, desc);
1188 }
1189
1190 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1191 {
1192 intptr_t oprsz = simd_oprsz(desc);
1193 intptr_t i;
1194
1195 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1196 uint32_t ai = *(uint32_t *)(a + i);
1197 uint32_t bi = *(uint32_t *)(b + i);
1198 uint32_t di = ai - bi;
1199 if (ai < bi) {
1200 di = 0;
1201 }
1202 *(uint32_t *)(d + i) = di;
1203 }
1204 clear_high(d, oprsz, desc);
1205 }
1206
1207 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1208 {
1209 intptr_t oprsz = simd_oprsz(desc);
1210 intptr_t i;
1211
1212 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1213 uint64_t ai = *(uint64_t *)(a + i);
1214 uint64_t bi = *(uint64_t *)(b + i);
1215 uint64_t di = ai - bi;
1216 if (ai < bi) {
1217 di = 0;
1218 }
1219 *(uint64_t *)(d + i) = di;
1220 }
1221 clear_high(d, oprsz, desc);
1222 }
1223
1224 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1225 {
1226 intptr_t oprsz = simd_oprsz(desc);
1227 intptr_t i;
1228
1229 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1230 int8_t aa = *(int8_t *)(a + i);
1231 int8_t bb = *(int8_t *)(b + i);
1232 int8_t dd = aa < bb ? aa : bb;
1233 *(int8_t *)(d + i) = dd;
1234 }
1235 clear_high(d, oprsz, desc);
1236 }
1237
1238 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1239 {
1240 intptr_t oprsz = simd_oprsz(desc);
1241 intptr_t i;
1242
1243 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1244 int16_t aa = *(int16_t *)(a + i);
1245 int16_t bb = *(int16_t *)(b + i);
1246 int16_t dd = aa < bb ? aa : bb;
1247 *(int16_t *)(d + i) = dd;
1248 }
1249 clear_high(d, oprsz, desc);
1250 }
1251
1252 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1253 {
1254 intptr_t oprsz = simd_oprsz(desc);
1255 intptr_t i;
1256
1257 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1258 int32_t aa = *(int32_t *)(a + i);
1259 int32_t bb = *(int32_t *)(b + i);
1260 int32_t dd = aa < bb ? aa : bb;
1261 *(int32_t *)(d + i) = dd;
1262 }
1263 clear_high(d, oprsz, desc);
1264 }
1265
1266 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1267 {
1268 intptr_t oprsz = simd_oprsz(desc);
1269 intptr_t i;
1270
1271 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1272 int64_t aa = *(int64_t *)(a + i);
1273 int64_t bb = *(int64_t *)(b + i);
1274 int64_t dd = aa < bb ? aa : bb;
1275 *(int64_t *)(d + i) = dd;
1276 }
1277 clear_high(d, oprsz, desc);
1278 }
1279
1280 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1281 {
1282 intptr_t oprsz = simd_oprsz(desc);
1283 intptr_t i;
1284
1285 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1286 int8_t aa = *(int8_t *)(a + i);
1287 int8_t bb = *(int8_t *)(b + i);
1288 int8_t dd = aa > bb ? aa : bb;
1289 *(int8_t *)(d + i) = dd;
1290 }
1291 clear_high(d, oprsz, desc);
1292 }
1293
1294 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1295 {
1296 intptr_t oprsz = simd_oprsz(desc);
1297 intptr_t i;
1298
1299 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1300 int16_t aa = *(int16_t *)(a + i);
1301 int16_t bb = *(int16_t *)(b + i);
1302 int16_t dd = aa > bb ? aa : bb;
1303 *(int16_t *)(d + i) = dd;
1304 }
1305 clear_high(d, oprsz, desc);
1306 }
1307
1308 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1309 {
1310 intptr_t oprsz = simd_oprsz(desc);
1311 intptr_t i;
1312
1313 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1314 int32_t aa = *(int32_t *)(a + i);
1315 int32_t bb = *(int32_t *)(b + i);
1316 int32_t dd = aa > bb ? aa : bb;
1317 *(int32_t *)(d + i) = dd;
1318 }
1319 clear_high(d, oprsz, desc);
1320 }
1321
1322 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1323 {
1324 intptr_t oprsz = simd_oprsz(desc);
1325 intptr_t i;
1326
1327 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1328 int64_t aa = *(int64_t *)(a + i);
1329 int64_t bb = *(int64_t *)(b + i);
1330 int64_t dd = aa > bb ? aa : bb;
1331 *(int64_t *)(d + i) = dd;
1332 }
1333 clear_high(d, oprsz, desc);
1334 }
1335
1336 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1337 {
1338 intptr_t oprsz = simd_oprsz(desc);
1339 intptr_t i;
1340
1341 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1342 uint8_t aa = *(uint8_t *)(a + i);
1343 uint8_t bb = *(uint8_t *)(b + i);
1344 uint8_t dd = aa < bb ? aa : bb;
1345 *(uint8_t *)(d + i) = dd;
1346 }
1347 clear_high(d, oprsz, desc);
1348 }
1349
1350 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1351 {
1352 intptr_t oprsz = simd_oprsz(desc);
1353 intptr_t i;
1354
1355 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1356 uint16_t aa = *(uint16_t *)(a + i);
1357 uint16_t bb = *(uint16_t *)(b + i);
1358 uint16_t dd = aa < bb ? aa : bb;
1359 *(uint16_t *)(d + i) = dd;
1360 }
1361 clear_high(d, oprsz, desc);
1362 }
1363
1364 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1365 {
1366 intptr_t oprsz = simd_oprsz(desc);
1367 intptr_t i;
1368
1369 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1370 uint32_t aa = *(uint32_t *)(a + i);
1371 uint32_t bb = *(uint32_t *)(b + i);
1372 uint32_t dd = aa < bb ? aa : bb;
1373 *(uint32_t *)(d + i) = dd;
1374 }
1375 clear_high(d, oprsz, desc);
1376 }
1377
1378 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1379 {
1380 intptr_t oprsz = simd_oprsz(desc);
1381 intptr_t i;
1382
1383 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1384 uint64_t aa = *(uint64_t *)(a + i);
1385 uint64_t bb = *(uint64_t *)(b + i);
1386 uint64_t dd = aa < bb ? aa : bb;
1387 *(uint64_t *)(d + i) = dd;
1388 }
1389 clear_high(d, oprsz, desc);
1390 }
1391
1392 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1393 {
1394 intptr_t oprsz = simd_oprsz(desc);
1395 intptr_t i;
1396
1397 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1398 uint8_t aa = *(uint8_t *)(a + i);
1399 uint8_t bb = *(uint8_t *)(b + i);
1400 uint8_t dd = aa > bb ? aa : bb;
1401 *(uint8_t *)(d + i) = dd;
1402 }
1403 clear_high(d, oprsz, desc);
1404 }
1405
1406 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1407 {
1408 intptr_t oprsz = simd_oprsz(desc);
1409 intptr_t i;
1410
1411 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1412 uint16_t aa = *(uint16_t *)(a + i);
1413 uint16_t bb = *(uint16_t *)(b + i);
1414 uint16_t dd = aa > bb ? aa : bb;
1415 *(uint16_t *)(d + i) = dd;
1416 }
1417 clear_high(d, oprsz, desc);
1418 }
1419
1420 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1421 {
1422 intptr_t oprsz = simd_oprsz(desc);
1423 intptr_t i;
1424
1425 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1426 uint32_t aa = *(uint32_t *)(a + i);
1427 uint32_t bb = *(uint32_t *)(b + i);
1428 uint32_t dd = aa > bb ? aa : bb;
1429 *(uint32_t *)(d + i) = dd;
1430 }
1431 clear_high(d, oprsz, desc);
1432 }
1433
1434 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1435 {
1436 intptr_t oprsz = simd_oprsz(desc);
1437 intptr_t i;
1438
1439 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1440 uint64_t aa = *(uint64_t *)(a + i);
1441 uint64_t bb = *(uint64_t *)(b + i);
1442 uint64_t dd = aa > bb ? aa : bb;
1443 *(uint64_t *)(d + i) = dd;
1444 }
1445 clear_high(d, oprsz, desc);
1446 }