]> git.proxmox.com Git - mirror_qemu.git/blame - accel/tcg/tcg-runtime-gvec.c
tcg: Tidy tcg-runtime-gvec.c types
[mirror_qemu.git] / accel / tcg / tcg-runtime-gvec.c
CommitLineData
db432672
RH
1/*
2 * Generic vectorized operation runtime
3 *
4 * Copyright (c) 2018 Linaro
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
fb0343d5 9 * version 2.1 of the License, or (at your option) any later version.
db432672
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "qemu/host-utils.h"
22#include "cpu.h"
23#include "exec/helper-proto.h"
dcb32f1d 24#include "tcg/tcg-gvec-desc.h"
db432672
RH
25
26
db432672
RH
27#define DUP16(X) X
28#define DUP8(X) X
29#define DUP4(X) X
30#define DUP2(X) X
db432672
RH
31
32static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
33{
34 intptr_t maxsz = simd_maxsz(desc);
35 intptr_t i;
36
37 if (unlikely(maxsz > oprsz)) {
38 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
39 *(uint64_t *)(d + i) = 0;
40 }
41 }
42}
43
44void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
45{
46 intptr_t oprsz = simd_oprsz(desc);
47 intptr_t i;
48
6c7ab301
RH
49 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
50 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
db432672
RH
51 }
52 clear_high(d, oprsz, desc);
53}
54
55void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
56{
57 intptr_t oprsz = simd_oprsz(desc);
58 intptr_t i;
59
6c7ab301
RH
60 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
61 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
db432672
RH
62 }
63 clear_high(d, oprsz, desc);
64}
65
66void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
67{
68 intptr_t oprsz = simd_oprsz(desc);
69 intptr_t i;
70
6c7ab301
RH
71 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
72 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
db432672
RH
73 }
74 clear_high(d, oprsz, desc);
75}
76
77void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
78{
79 intptr_t oprsz = simd_oprsz(desc);
80 intptr_t i;
81
6c7ab301
RH
82 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
83 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
db432672
RH
84 }
85 clear_high(d, oprsz, desc);
86}
87
22fc3527
RH
88void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
89{
90 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 91 uint8_t vecb = (uint8_t)DUP16(b);
22fc3527
RH
92 intptr_t i;
93
6c7ab301
RH
94 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
95 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
22fc3527
RH
96 }
97 clear_high(d, oprsz, desc);
98}
99
100void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
101{
102 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 103 uint16_t vecb = (uint16_t)DUP8(b);
22fc3527
RH
104 intptr_t i;
105
6c7ab301
RH
106 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
107 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
22fc3527
RH
108 }
109 clear_high(d, oprsz, desc);
110}
111
112void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
113{
114 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 115 uint32_t vecb = (uint32_t)DUP4(b);
22fc3527
RH
116 intptr_t i;
117
6c7ab301
RH
118 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
119 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
22fc3527
RH
120 }
121 clear_high(d, oprsz, desc);
122}
123
124void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
125{
126 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 127 uint64_t vecb = (uint64_t)DUP2(b);
22fc3527
RH
128 intptr_t i;
129
6c7ab301
RH
130 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
131 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
22fc3527
RH
132 }
133 clear_high(d, oprsz, desc);
134}
135
db432672
RH
136void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
137{
138 intptr_t oprsz = simd_oprsz(desc);
139 intptr_t i;
140
6c7ab301
RH
141 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
142 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
db432672
RH
143 }
144 clear_high(d, oprsz, desc);
145}
146
147void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
148{
149 intptr_t oprsz = simd_oprsz(desc);
150 intptr_t i;
151
6c7ab301
RH
152 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
153 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
db432672
RH
154 }
155 clear_high(d, oprsz, desc);
156}
157
158void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
159{
160 intptr_t oprsz = simd_oprsz(desc);
161 intptr_t i;
162
6c7ab301
RH
163 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
164 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
db432672
RH
165 }
166 clear_high(d, oprsz, desc);
167}
168
169void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
170{
171 intptr_t oprsz = simd_oprsz(desc);
172 intptr_t i;
173
6c7ab301
RH
174 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
175 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
db432672
RH
176 }
177 clear_high(d, oprsz, desc);
178}
179
22fc3527
RH
180void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
181{
182 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 183 uint8_t vecb = (uint8_t)DUP16(b);
22fc3527
RH
184 intptr_t i;
185
6c7ab301
RH
186 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
187 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
22fc3527
RH
188 }
189 clear_high(d, oprsz, desc);
190}
191
192void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
193{
194 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 195 uint16_t vecb = (uint16_t)DUP8(b);
22fc3527
RH
196 intptr_t i;
197
6c7ab301
RH
198 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
199 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
22fc3527
RH
200 }
201 clear_high(d, oprsz, desc);
202}
203
204void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
205{
206 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 207 uint32_t vecb = (uint32_t)DUP4(b);
22fc3527
RH
208 intptr_t i;
209
6c7ab301
RH
210 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
211 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
22fc3527
RH
212 }
213 clear_high(d, oprsz, desc);
214}
215
216void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
217{
218 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 219 uint64_t vecb = (uint64_t)DUP2(b);
22fc3527
RH
220 intptr_t i;
221
6c7ab301
RH
222 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
223 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
22fc3527
RH
224 }
225 clear_high(d, oprsz, desc);
226}
227
3774030a
RH
228void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
229{
230 intptr_t oprsz = simd_oprsz(desc);
231 intptr_t i;
232
6c7ab301
RH
233 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
234 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
3774030a
RH
235 }
236 clear_high(d, oprsz, desc);
237}
238
239void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
240{
241 intptr_t oprsz = simd_oprsz(desc);
242 intptr_t i;
243
6c7ab301
RH
244 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
245 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
3774030a
RH
246 }
247 clear_high(d, oprsz, desc);
248}
249
250void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
251{
252 intptr_t oprsz = simd_oprsz(desc);
253 intptr_t i;
254
6c7ab301
RH
255 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
256 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
3774030a
RH
257 }
258 clear_high(d, oprsz, desc);
259}
260
261void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
262{
263 intptr_t oprsz = simd_oprsz(desc);
264 intptr_t i;
265
6c7ab301
RH
266 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
267 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
3774030a
RH
268 }
269 clear_high(d, oprsz, desc);
270}
271
22fc3527
RH
272void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
273{
274 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 275 uint8_t vecb = (uint8_t)DUP16(b);
22fc3527
RH
276 intptr_t i;
277
6c7ab301
RH
278 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
279 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
22fc3527
RH
280 }
281 clear_high(d, oprsz, desc);
282}
283
284void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
285{
286 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 287 uint16_t vecb = (uint16_t)DUP8(b);
22fc3527
RH
288 intptr_t i;
289
6c7ab301
RH
290 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
291 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
22fc3527
RH
292 }
293 clear_high(d, oprsz, desc);
294}
295
296void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
297{
298 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 299 uint32_t vecb = (uint32_t)DUP4(b);
22fc3527
RH
300 intptr_t i;
301
6c7ab301
RH
302 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
303 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
22fc3527
RH
304 }
305 clear_high(d, oprsz, desc);
306}
307
308void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
309{
310 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 311 uint64_t vecb = (uint64_t)DUP2(b);
22fc3527
RH
312 intptr_t i;
313
6c7ab301
RH
314 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
315 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
22fc3527
RH
316 }
317 clear_high(d, oprsz, desc);
318}
319
db432672
RH
320void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
321{
322 intptr_t oprsz = simd_oprsz(desc);
323 intptr_t i;
324
6c7ab301
RH
325 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
326 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
db432672
RH
327 }
328 clear_high(d, oprsz, desc);
329}
330
331void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
332{
333 intptr_t oprsz = simd_oprsz(desc);
334 intptr_t i;
335
6c7ab301
RH
336 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
337 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
db432672
RH
338 }
339 clear_high(d, oprsz, desc);
340}
341
342void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
343{
344 intptr_t oprsz = simd_oprsz(desc);
345 intptr_t i;
346
6c7ab301
RH
347 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
348 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
db432672
RH
349 }
350 clear_high(d, oprsz, desc);
351}
352
353void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
354{
355 intptr_t oprsz = simd_oprsz(desc);
356 intptr_t i;
357
6c7ab301
RH
358 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
359 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
db432672
RH
360 }
361 clear_high(d, oprsz, desc);
362}
363
bcefc902
RH
364void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
365{
366 intptr_t oprsz = simd_oprsz(desc);
367 intptr_t i;
368
369 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
370 int8_t aa = *(int8_t *)(a + i);
371 *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
372 }
373 clear_high(d, oprsz, desc);
374}
375
376void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
377{
378 intptr_t oprsz = simd_oprsz(desc);
379 intptr_t i;
380
381 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
382 int16_t aa = *(int16_t *)(a + i);
383 *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
384 }
385 clear_high(d, oprsz, desc);
386}
387
388void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
389{
390 intptr_t oprsz = simd_oprsz(desc);
391 intptr_t i;
392
393 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
394 int32_t aa = *(int32_t *)(a + i);
395 *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
396 }
397 clear_high(d, oprsz, desc);
398}
399
400void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
401{
402 intptr_t oprsz = simd_oprsz(desc);
403 intptr_t i;
404
405 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
406 int64_t aa = *(int64_t *)(a + i);
407 *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
408 }
409 clear_high(d, oprsz, desc);
410}
411
db432672
RH
412void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
413{
414 intptr_t oprsz = simd_oprsz(desc);
415
416 memcpy(d, a, oprsz);
417 clear_high(d, oprsz, desc);
418}
419
420void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
421{
422 intptr_t oprsz = simd_oprsz(desc);
423 intptr_t i;
424
425 if (c == 0) {
426 oprsz = 0;
427 } else {
428 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
429 *(uint64_t *)(d + i) = c;
430 }
431 }
432 clear_high(d, oprsz, desc);
433}
434
435void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
436{
437 intptr_t oprsz = simd_oprsz(desc);
438 intptr_t i;
439
440 if (c == 0) {
441 oprsz = 0;
442 } else {
443 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
444 *(uint32_t *)(d + i) = c;
445 }
446 }
447 clear_high(d, oprsz, desc);
448}
449
450void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
451{
452 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
453}
454
455void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
456{
457 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
458}
459
460void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
461{
462 intptr_t oprsz = simd_oprsz(desc);
463 intptr_t i;
464
6c7ab301
RH
465 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
466 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
db432672
RH
467 }
468 clear_high(d, oprsz, desc);
469}
470
471void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
472{
473 intptr_t oprsz = simd_oprsz(desc);
474 intptr_t i;
475
6c7ab301
RH
476 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
477 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
db432672
RH
478 }
479 clear_high(d, oprsz, desc);
480}
481
482void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
483{
484 intptr_t oprsz = simd_oprsz(desc);
485 intptr_t i;
486
6c7ab301
RH
487 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
488 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
db432672
RH
489 }
490 clear_high(d, oprsz, desc);
491}
492
493void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
494{
495 intptr_t oprsz = simd_oprsz(desc);
496 intptr_t i;
497
6c7ab301
RH
498 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
499 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
db432672
RH
500 }
501 clear_high(d, oprsz, desc);
502}
503
504void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
505{
506 intptr_t oprsz = simd_oprsz(desc);
507 intptr_t i;
508
6c7ab301
RH
509 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
510 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
db432672
RH
511 }
512 clear_high(d, oprsz, desc);
513}
514
515void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
516{
517 intptr_t oprsz = simd_oprsz(desc);
518 intptr_t i;
519
6c7ab301
RH
520 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
521 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
db432672
RH
522 }
523 clear_high(d, oprsz, desc);
524}
d0ec9796 525
f550805d
RH
526void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
527{
528 intptr_t oprsz = simd_oprsz(desc);
529 intptr_t i;
530
6c7ab301
RH
531 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
532 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
f550805d
RH
533 }
534 clear_high(d, oprsz, desc);
535}
536
537void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
538{
539 intptr_t oprsz = simd_oprsz(desc);
540 intptr_t i;
541
6c7ab301
RH
542 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
543 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
f550805d
RH
544 }
545 clear_high(d, oprsz, desc);
546}
547
548void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
549{
550 intptr_t oprsz = simd_oprsz(desc);
551 intptr_t i;
552
6c7ab301
RH
553 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
554 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
f550805d
RH
555 }
556 clear_high(d, oprsz, desc);
557}
558
22fc3527
RH
559void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
560{
561 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 562 uint64_t vecb = (uint64_t)DUP2(b);
22fc3527
RH
563 intptr_t i;
564
6c7ab301
RH
565 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
566 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
22fc3527
RH
567 }
568 clear_high(d, oprsz, desc);
569}
570
571void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
572{
573 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 574 uint64_t vecb = (uint64_t)DUP2(b);
22fc3527
RH
575 intptr_t i;
576
6c7ab301
RH
577 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
578 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
22fc3527
RH
579 }
580 clear_high(d, oprsz, desc);
581}
582
583void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
584{
585 intptr_t oprsz = simd_oprsz(desc);
6c7ab301 586 uint64_t vecb = (uint64_t)DUP2(b);
22fc3527
RH
587 intptr_t i;
588
6c7ab301
RH
589 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
590 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
22fc3527
RH
591 }
592 clear_high(d, oprsz, desc);
593}
594
d0ec9796
RH
595void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
596{
597 intptr_t oprsz = simd_oprsz(desc);
598 int shift = simd_data(desc);
599 intptr_t i;
600
6c7ab301
RH
601 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
602 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
d0ec9796
RH
603 }
604 clear_high(d, oprsz, desc);
605}
606
607void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
608{
609 intptr_t oprsz = simd_oprsz(desc);
610 int shift = simd_data(desc);
611 intptr_t i;
612
6c7ab301
RH
613 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
614 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
d0ec9796
RH
615 }
616 clear_high(d, oprsz, desc);
617}
618
619void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
620{
621 intptr_t oprsz = simd_oprsz(desc);
622 int shift = simd_data(desc);
623 intptr_t i;
624
6c7ab301
RH
625 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
626 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
d0ec9796
RH
627 }
628 clear_high(d, oprsz, desc);
629}
630
631void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
632{
633 intptr_t oprsz = simd_oprsz(desc);
634 int shift = simd_data(desc);
635 intptr_t i;
636
6c7ab301
RH
637 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
638 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
d0ec9796
RH
639 }
640 clear_high(d, oprsz, desc);
641}
642
643void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
644{
645 intptr_t oprsz = simd_oprsz(desc);
646 int shift = simd_data(desc);
647 intptr_t i;
648
6c7ab301
RH
649 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
650 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
d0ec9796
RH
651 }
652 clear_high(d, oprsz, desc);
653}
654
655void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
656{
657 intptr_t oprsz = simd_oprsz(desc);
658 int shift = simd_data(desc);
659 intptr_t i;
660
6c7ab301
RH
661 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
662 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
d0ec9796
RH
663 }
664 clear_high(d, oprsz, desc);
665}
666
667void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
668{
669 intptr_t oprsz = simd_oprsz(desc);
670 int shift = simd_data(desc);
671 intptr_t i;
672
6c7ab301
RH
673 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
674 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
d0ec9796
RH
675 }
676 clear_high(d, oprsz, desc);
677}
678
679void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
680{
681 intptr_t oprsz = simd_oprsz(desc);
682 int shift = simd_data(desc);
683 intptr_t i;
684
6c7ab301
RH
685 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
686 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
d0ec9796
RH
687 }
688 clear_high(d, oprsz, desc);
689}
690
691void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
692{
693 intptr_t oprsz = simd_oprsz(desc);
694 int shift = simd_data(desc);
695 intptr_t i;
696
6c7ab301
RH
697 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
698 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
d0ec9796
RH
699 }
700 clear_high(d, oprsz, desc);
701}
702
703void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
704{
705 intptr_t oprsz = simd_oprsz(desc);
706 int shift = simd_data(desc);
707 intptr_t i;
708
6c7ab301
RH
709 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
710 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
d0ec9796
RH
711 }
712 clear_high(d, oprsz, desc);
713}
714
715void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
716{
717 intptr_t oprsz = simd_oprsz(desc);
718 int shift = simd_data(desc);
719 intptr_t i;
720
6c7ab301
RH
721 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
722 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
d0ec9796
RH
723 }
724 clear_high(d, oprsz, desc);
725}
726
727void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
728{
729 intptr_t oprsz = simd_oprsz(desc);
730 int shift = simd_data(desc);
731 intptr_t i;
732
6c7ab301
RH
733 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
734 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
d0ec9796
RH
735 }
736 clear_high(d, oprsz, desc);
737}
212be173 738
5ee5c14c
RH
739void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
740{
741 intptr_t oprsz = simd_oprsz(desc);
742 intptr_t i;
743
744 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
745 uint8_t sh = *(uint8_t *)(b + i) & 7;
746 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
747 }
748 clear_high(d, oprsz, desc);
749}
750
751void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
752{
753 intptr_t oprsz = simd_oprsz(desc);
754 intptr_t i;
755
756 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
757 uint8_t sh = *(uint16_t *)(b + i) & 15;
758 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
759 }
760 clear_high(d, oprsz, desc);
761}
762
763void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
764{
765 intptr_t oprsz = simd_oprsz(desc);
766 intptr_t i;
767
768 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
769 uint8_t sh = *(uint32_t *)(b + i) & 31;
770 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
771 }
772 clear_high(d, oprsz, desc);
773}
774
775void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
776{
777 intptr_t oprsz = simd_oprsz(desc);
778 intptr_t i;
779
780 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
781 uint8_t sh = *(uint64_t *)(b + i) & 63;
782 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
783 }
784 clear_high(d, oprsz, desc);
785}
786
787void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
788{
789 intptr_t oprsz = simd_oprsz(desc);
790 intptr_t i;
791
792 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
793 uint8_t sh = *(uint8_t *)(b + i) & 7;
794 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
795 }
796 clear_high(d, oprsz, desc);
797}
798
799void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
800{
801 intptr_t oprsz = simd_oprsz(desc);
802 intptr_t i;
803
804 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
805 uint8_t sh = *(uint16_t *)(b + i) & 15;
806 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
807 }
808 clear_high(d, oprsz, desc);
809}
810
811void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
812{
813 intptr_t oprsz = simd_oprsz(desc);
814 intptr_t i;
815
816 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
817 uint8_t sh = *(uint32_t *)(b + i) & 31;
818 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
819 }
820 clear_high(d, oprsz, desc);
821}
822
823void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
824{
825 intptr_t oprsz = simd_oprsz(desc);
826 intptr_t i;
827
828 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
829 uint8_t sh = *(uint64_t *)(b + i) & 63;
830 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
831 }
832 clear_high(d, oprsz, desc);
833}
834
835void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
836{
837 intptr_t oprsz = simd_oprsz(desc);
838 intptr_t i;
839
899f08ad 840 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
5ee5c14c
RH
841 uint8_t sh = *(uint8_t *)(b + i) & 7;
842 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
843 }
844 clear_high(d, oprsz, desc);
845}
846
847void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
848{
849 intptr_t oprsz = simd_oprsz(desc);
850 intptr_t i;
851
852 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
853 uint8_t sh = *(uint16_t *)(b + i) & 15;
854 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
855 }
856 clear_high(d, oprsz, desc);
857}
858
859void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
860{
861 intptr_t oprsz = simd_oprsz(desc);
862 intptr_t i;
863
899f08ad 864 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
5ee5c14c
RH
865 uint8_t sh = *(uint32_t *)(b + i) & 31;
866 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
867 }
868 clear_high(d, oprsz, desc);
869}
870
871void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
872{
873 intptr_t oprsz = simd_oprsz(desc);
874 intptr_t i;
875
899f08ad 876 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
5ee5c14c
RH
877 uint8_t sh = *(uint64_t *)(b + i) & 63;
878 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
879 }
880 clear_high(d, oprsz, desc);
881}
882
43d1ccd2 883#define DO_CMP0(X) -(X)
212be173
RH
884
885#define DO_CMP1(NAME, TYPE, OP) \
886void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
887{ \
888 intptr_t oprsz = simd_oprsz(desc); \
889 intptr_t i; \
6cb1d3b8 890 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
212be173
RH
891 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
892 } \
893 clear_high(d, oprsz, desc); \
894}
895
896#define DO_CMP2(SZ) \
6c7ab301
RH
897 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
898 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
899 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
900 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
901 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
902 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
212be173
RH
903
904DO_CMP2(8)
905DO_CMP2(16)
906DO_CMP2(32)
907DO_CMP2(64)
908
909#undef DO_CMP0
910#undef DO_CMP1
911#undef DO_CMP2
f49b12c6
RH
912
913void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
914{
915 intptr_t oprsz = simd_oprsz(desc);
916 intptr_t i;
917
918 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
919 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
920 if (r > INT8_MAX) {
921 r = INT8_MAX;
922 } else if (r < INT8_MIN) {
923 r = INT8_MIN;
924 }
925 *(int8_t *)(d + i) = r;
926 }
927 clear_high(d, oprsz, desc);
928}
929
930void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
931{
932 intptr_t oprsz = simd_oprsz(desc);
933 intptr_t i;
934
935 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
936 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
937 if (r > INT16_MAX) {
938 r = INT16_MAX;
939 } else if (r < INT16_MIN) {
940 r = INT16_MIN;
941 }
942 *(int16_t *)(d + i) = r;
943 }
944 clear_high(d, oprsz, desc);
945}
946
947void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
948{
949 intptr_t oprsz = simd_oprsz(desc);
950 intptr_t i;
951
952 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
953 int32_t ai = *(int32_t *)(a + i);
954 int32_t bi = *(int32_t *)(b + i);
955 int32_t di = ai + bi;
956 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
957 /* Signed overflow. */
958 di = (di < 0 ? INT32_MAX : INT32_MIN);
959 }
960 *(int32_t *)(d + i) = di;
961 }
962 clear_high(d, oprsz, desc);
963}
964
965void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
966{
967 intptr_t oprsz = simd_oprsz(desc);
968 intptr_t i;
969
970 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
971 int64_t ai = *(int64_t *)(a + i);
972 int64_t bi = *(int64_t *)(b + i);
973 int64_t di = ai + bi;
974 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
975 /* Signed overflow. */
976 di = (di < 0 ? INT64_MAX : INT64_MIN);
977 }
978 *(int64_t *)(d + i) = di;
979 }
980 clear_high(d, oprsz, desc);
981}
982
983void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
984{
985 intptr_t oprsz = simd_oprsz(desc);
986 intptr_t i;
987
988 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
989 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
990 if (r > INT8_MAX) {
991 r = INT8_MAX;
992 } else if (r < INT8_MIN) {
993 r = INT8_MIN;
994 }
995 *(uint8_t *)(d + i) = r;
996 }
997 clear_high(d, oprsz, desc);
998}
999
1000void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1001{
1002 intptr_t oprsz = simd_oprsz(desc);
1003 intptr_t i;
1004
1005 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1006 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1007 if (r > INT16_MAX) {
1008 r = INT16_MAX;
1009 } else if (r < INT16_MIN) {
1010 r = INT16_MIN;
1011 }
1012 *(int16_t *)(d + i) = r;
1013 }
1014 clear_high(d, oprsz, desc);
1015}
1016
1017void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1018{
1019 intptr_t oprsz = simd_oprsz(desc);
1020 intptr_t i;
1021
1022 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1023 int32_t ai = *(int32_t *)(a + i);
1024 int32_t bi = *(int32_t *)(b + i);
1025 int32_t di = ai - bi;
1026 if (((di ^ ai) & (ai ^ bi)) < 0) {
1027 /* Signed overflow. */
1028 di = (di < 0 ? INT32_MAX : INT32_MIN);
1029 }
1030 *(int32_t *)(d + i) = di;
1031 }
1032 clear_high(d, oprsz, desc);
1033}
1034
1035void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1036{
1037 intptr_t oprsz = simd_oprsz(desc);
1038 intptr_t i;
1039
1040 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1041 int64_t ai = *(int64_t *)(a + i);
1042 int64_t bi = *(int64_t *)(b + i);
1043 int64_t di = ai - bi;
1044 if (((di ^ ai) & (ai ^ bi)) < 0) {
1045 /* Signed overflow. */
1046 di = (di < 0 ? INT64_MAX : INT64_MIN);
1047 }
1048 *(int64_t *)(d + i) = di;
1049 }
1050 clear_high(d, oprsz, desc);
1051}
1052
1053void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1054{
1055 intptr_t oprsz = simd_oprsz(desc);
1056 intptr_t i;
1057
1058 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1059 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1060 if (r > UINT8_MAX) {
1061 r = UINT8_MAX;
1062 }
1063 *(uint8_t *)(d + i) = r;
1064 }
1065 clear_high(d, oprsz, desc);
1066}
1067
1068void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1069{
1070 intptr_t oprsz = simd_oprsz(desc);
1071 intptr_t i;
1072
1073 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1074 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1075 if (r > UINT16_MAX) {
1076 r = UINT16_MAX;
1077 }
1078 *(uint16_t *)(d + i) = r;
1079 }
1080 clear_high(d, oprsz, desc);
1081}
1082
1083void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1084{
1085 intptr_t oprsz = simd_oprsz(desc);
1086 intptr_t i;
1087
1088 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1089 uint32_t ai = *(uint32_t *)(a + i);
1090 uint32_t bi = *(uint32_t *)(b + i);
1091 uint32_t di = ai + bi;
1092 if (di < ai) {
1093 di = UINT32_MAX;
1094 }
1095 *(uint32_t *)(d + i) = di;
1096 }
1097 clear_high(d, oprsz, desc);
1098}
1099
1100void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1101{
1102 intptr_t oprsz = simd_oprsz(desc);
1103 intptr_t i;
1104
1105 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1106 uint64_t ai = *(uint64_t *)(a + i);
1107 uint64_t bi = *(uint64_t *)(b + i);
1108 uint64_t di = ai + bi;
1109 if (di < ai) {
1110 di = UINT64_MAX;
1111 }
1112 *(uint64_t *)(d + i) = di;
1113 }
1114 clear_high(d, oprsz, desc);
1115}
1116
1117void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1118{
1119 intptr_t oprsz = simd_oprsz(desc);
1120 intptr_t i;
1121
1122 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1123 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1124 if (r < 0) {
1125 r = 0;
1126 }
1127 *(uint8_t *)(d + i) = r;
1128 }
1129 clear_high(d, oprsz, desc);
1130}
1131
1132void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1133{
1134 intptr_t oprsz = simd_oprsz(desc);
1135 intptr_t i;
1136
1137 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1138 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1139 if (r < 0) {
1140 r = 0;
1141 }
1142 *(uint16_t *)(d + i) = r;
1143 }
1144 clear_high(d, oprsz, desc);
1145}
1146
1147void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1148{
1149 intptr_t oprsz = simd_oprsz(desc);
1150 intptr_t i;
1151
1152 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1153 uint32_t ai = *(uint32_t *)(a + i);
1154 uint32_t bi = *(uint32_t *)(b + i);
1155 uint32_t di = ai - bi;
1156 if (ai < bi) {
1157 di = 0;
1158 }
1159 *(uint32_t *)(d + i) = di;
1160 }
1161 clear_high(d, oprsz, desc);
1162}
1163
1164void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1165{
1166 intptr_t oprsz = simd_oprsz(desc);
1167 intptr_t i;
1168
1169 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1170 uint64_t ai = *(uint64_t *)(a + i);
1171 uint64_t bi = *(uint64_t *)(b + i);
1172 uint64_t di = ai - bi;
1173 if (ai < bi) {
1174 di = 0;
1175 }
1176 *(uint64_t *)(d + i) = di;
1177 }
1178 clear_high(d, oprsz, desc);
1179}
dd0a0fcd
RH
1180
1181void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1182{
1183 intptr_t oprsz = simd_oprsz(desc);
1184 intptr_t i;
1185
1186 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1187 int8_t aa = *(int8_t *)(a + i);
1188 int8_t bb = *(int8_t *)(b + i);
1189 int8_t dd = aa < bb ? aa : bb;
1190 *(int8_t *)(d + i) = dd;
1191 }
1192 clear_high(d, oprsz, desc);
1193}
1194
1195void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1196{
1197 intptr_t oprsz = simd_oprsz(desc);
1198 intptr_t i;
1199
1200 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1201 int16_t aa = *(int16_t *)(a + i);
1202 int16_t bb = *(int16_t *)(b + i);
1203 int16_t dd = aa < bb ? aa : bb;
1204 *(int16_t *)(d + i) = dd;
1205 }
1206 clear_high(d, oprsz, desc);
1207}
1208
1209void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1210{
1211 intptr_t oprsz = simd_oprsz(desc);
1212 intptr_t i;
1213
1214 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1215 int32_t aa = *(int32_t *)(a + i);
1216 int32_t bb = *(int32_t *)(b + i);
1217 int32_t dd = aa < bb ? aa : bb;
1218 *(int32_t *)(d + i) = dd;
1219 }
1220 clear_high(d, oprsz, desc);
1221}
1222
1223void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1224{
1225 intptr_t oprsz = simd_oprsz(desc);
1226 intptr_t i;
1227
1228 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1229 int64_t aa = *(int64_t *)(a + i);
1230 int64_t bb = *(int64_t *)(b + i);
1231 int64_t dd = aa < bb ? aa : bb;
1232 *(int64_t *)(d + i) = dd;
1233 }
1234 clear_high(d, oprsz, desc);
1235}
1236
1237void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1238{
1239 intptr_t oprsz = simd_oprsz(desc);
1240 intptr_t i;
1241
1242 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1243 int8_t aa = *(int8_t *)(a + i);
1244 int8_t bb = *(int8_t *)(b + i);
1245 int8_t dd = aa > bb ? aa : bb;
1246 *(int8_t *)(d + i) = dd;
1247 }
1248 clear_high(d, oprsz, desc);
1249}
1250
1251void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1252{
1253 intptr_t oprsz = simd_oprsz(desc);
1254 intptr_t i;
1255
1256 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1257 int16_t aa = *(int16_t *)(a + i);
1258 int16_t bb = *(int16_t *)(b + i);
1259 int16_t dd = aa > bb ? aa : bb;
1260 *(int16_t *)(d + i) = dd;
1261 }
1262 clear_high(d, oprsz, desc);
1263}
1264
1265void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1266{
1267 intptr_t oprsz = simd_oprsz(desc);
1268 intptr_t i;
1269
1270 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1271 int32_t aa = *(int32_t *)(a + i);
1272 int32_t bb = *(int32_t *)(b + i);
1273 int32_t dd = aa > bb ? aa : bb;
1274 *(int32_t *)(d + i) = dd;
1275 }
1276 clear_high(d, oprsz, desc);
1277}
1278
1279void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1280{
1281 intptr_t oprsz = simd_oprsz(desc);
1282 intptr_t i;
1283
1284 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1285 int64_t aa = *(int64_t *)(a + i);
1286 int64_t bb = *(int64_t *)(b + i);
1287 int64_t dd = aa > bb ? aa : bb;
1288 *(int64_t *)(d + i) = dd;
1289 }
1290 clear_high(d, oprsz, desc);
1291}
1292
1293void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1294{
1295 intptr_t oprsz = simd_oprsz(desc);
1296 intptr_t i;
1297
1298 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1299 uint8_t aa = *(uint8_t *)(a + i);
1300 uint8_t bb = *(uint8_t *)(b + i);
1301 uint8_t dd = aa < bb ? aa : bb;
1302 *(uint8_t *)(d + i) = dd;
1303 }
1304 clear_high(d, oprsz, desc);
1305}
1306
1307void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1308{
1309 intptr_t oprsz = simd_oprsz(desc);
1310 intptr_t i;
1311
1312 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1313 uint16_t aa = *(uint16_t *)(a + i);
1314 uint16_t bb = *(uint16_t *)(b + i);
1315 uint16_t dd = aa < bb ? aa : bb;
1316 *(uint16_t *)(d + i) = dd;
1317 }
1318 clear_high(d, oprsz, desc);
1319}
1320
1321void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1322{
1323 intptr_t oprsz = simd_oprsz(desc);
1324 intptr_t i;
1325
1326 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1327 uint32_t aa = *(uint32_t *)(a + i);
1328 uint32_t bb = *(uint32_t *)(b + i);
1329 uint32_t dd = aa < bb ? aa : bb;
1330 *(uint32_t *)(d + i) = dd;
1331 }
1332 clear_high(d, oprsz, desc);
1333}
1334
1335void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1336{
1337 intptr_t oprsz = simd_oprsz(desc);
1338 intptr_t i;
1339
1340 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1341 uint64_t aa = *(uint64_t *)(a + i);
1342 uint64_t bb = *(uint64_t *)(b + i);
1343 uint64_t dd = aa < bb ? aa : bb;
1344 *(uint64_t *)(d + i) = dd;
1345 }
1346 clear_high(d, oprsz, desc);
1347}
1348
1349void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1350{
1351 intptr_t oprsz = simd_oprsz(desc);
1352 intptr_t i;
1353
1354 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1355 uint8_t aa = *(uint8_t *)(a + i);
1356 uint8_t bb = *(uint8_t *)(b + i);
1357 uint8_t dd = aa > bb ? aa : bb;
1358 *(uint8_t *)(d + i) = dd;
1359 }
1360 clear_high(d, oprsz, desc);
1361}
1362
1363void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1364{
1365 intptr_t oprsz = simd_oprsz(desc);
1366 intptr_t i;
1367
1368 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1369 uint16_t aa = *(uint16_t *)(a + i);
1370 uint16_t bb = *(uint16_t *)(b + i);
1371 uint16_t dd = aa > bb ? aa : bb;
1372 *(uint16_t *)(d + i) = dd;
1373 }
1374 clear_high(d, oprsz, desc);
1375}
1376
1377void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1378{
1379 intptr_t oprsz = simd_oprsz(desc);
1380 intptr_t i;
1381
1382 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1383 uint32_t aa = *(uint32_t *)(a + i);
1384 uint32_t bb = *(uint32_t *)(b + i);
1385 uint32_t dd = aa > bb ? aa : bb;
1386 *(uint32_t *)(d + i) = dd;
1387 }
1388 clear_high(d, oprsz, desc);
1389}
1390
1391void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1392{
1393 intptr_t oprsz = simd_oprsz(desc);
1394 intptr_t i;
1395
1396 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1397 uint64_t aa = *(uint64_t *)(a + i);
1398 uint64_t bb = *(uint64_t *)(b + i);
1399 uint64_t dd = aa > bb ? aa : bb;
1400 *(uint64_t *)(d + i) = dd;
1401 }
1402 clear_high(d, oprsz, desc);
1403}
38dc1294
RH
1404
1405void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
1406{
1407 intptr_t oprsz = simd_oprsz(desc);
1408 intptr_t i;
1409
6c7ab301
RH
1410 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1411 uint64_t aa = *(uint64_t *)(a + i);
1412 uint64_t bb = *(uint64_t *)(b + i);
1413 uint64_t cc = *(uint64_t *)(c + i);
1414 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
38dc1294
RH
1415 }
1416 clear_high(d, oprsz, desc);
1417}