]> git.proxmox.com Git - mirror_qemu.git/blame - accel/tcg/tcg-runtime-gvec.c
Merge remote-tracking branch 'remotes/nvme/tags/nvme-next-pull-request' into staging
[mirror_qemu.git] / accel / tcg / tcg-runtime-gvec.c
CommitLineData
db432672
RH
1/*
2 * Generic vectorized operation runtime
3 *
4 * Copyright (c) 2018 Linaro
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
fb0343d5 9 * version 2.1 of the License, or (at your option) any later version.
db432672
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "qemu/host-utils.h"
22#include "cpu.h"
23#include "exec/helper-proto.h"
dcb32f1d 24#include "tcg/tcg-gvec-desc.h"
db432672
RH
25
26
db432672
RH
27static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
28{
29 intptr_t maxsz = simd_maxsz(desc);
30 intptr_t i;
31
32 if (unlikely(maxsz > oprsz)) {
33 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
34 *(uint64_t *)(d + i) = 0;
35 }
36 }
37}
38
39void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
40{
41 intptr_t oprsz = simd_oprsz(desc);
42 intptr_t i;
43
6c7ab301
RH
44 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
45 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
db432672
RH
46 }
47 clear_high(d, oprsz, desc);
48}
49
50void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
51{
52 intptr_t oprsz = simd_oprsz(desc);
53 intptr_t i;
54
6c7ab301
RH
55 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
56 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
db432672
RH
57 }
58 clear_high(d, oprsz, desc);
59}
60
61void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
62{
63 intptr_t oprsz = simd_oprsz(desc);
64 intptr_t i;
65
6c7ab301
RH
66 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
67 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
db432672
RH
68 }
69 clear_high(d, oprsz, desc);
70}
71
72void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
73{
74 intptr_t oprsz = simd_oprsz(desc);
75 intptr_t i;
76
6c7ab301
RH
77 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
78 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
db432672
RH
79 }
80 clear_high(d, oprsz, desc);
81}
82
22fc3527
RH
83void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
84{
85 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
86 intptr_t i;
87
6c7ab301 88 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
0a83e43a 89 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
22fc3527
RH
90 }
91 clear_high(d, oprsz, desc);
92}
93
94void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
95{
96 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
97 intptr_t i;
98
6c7ab301 99 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
0a83e43a 100 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
22fc3527
RH
101 }
102 clear_high(d, oprsz, desc);
103}
104
105void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
106{
107 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
108 intptr_t i;
109
6c7ab301 110 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
0a83e43a 111 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
22fc3527
RH
112 }
113 clear_high(d, oprsz, desc);
114}
115
116void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
117{
118 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
119 intptr_t i;
120
6c7ab301 121 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
0a83e43a 122 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
22fc3527
RH
123 }
124 clear_high(d, oprsz, desc);
125}
126
db432672
RH
127void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
128{
129 intptr_t oprsz = simd_oprsz(desc);
130 intptr_t i;
131
6c7ab301
RH
132 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
133 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
db432672
RH
134 }
135 clear_high(d, oprsz, desc);
136}
137
138void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
139{
140 intptr_t oprsz = simd_oprsz(desc);
141 intptr_t i;
142
6c7ab301
RH
143 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
144 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
db432672
RH
145 }
146 clear_high(d, oprsz, desc);
147}
148
149void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
150{
151 intptr_t oprsz = simd_oprsz(desc);
152 intptr_t i;
153
6c7ab301
RH
154 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
155 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
db432672
RH
156 }
157 clear_high(d, oprsz, desc);
158}
159
160void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
161{
162 intptr_t oprsz = simd_oprsz(desc);
163 intptr_t i;
164
6c7ab301
RH
165 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
166 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
db432672
RH
167 }
168 clear_high(d, oprsz, desc);
169}
170
22fc3527
RH
171void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
172{
173 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
174 intptr_t i;
175
6c7ab301 176 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
0a83e43a 177 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
22fc3527
RH
178 }
179 clear_high(d, oprsz, desc);
180}
181
182void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
183{
184 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
185 intptr_t i;
186
6c7ab301 187 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
0a83e43a 188 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
22fc3527
RH
189 }
190 clear_high(d, oprsz, desc);
191}
192
193void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
194{
195 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
196 intptr_t i;
197
6c7ab301 198 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
0a83e43a 199 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
22fc3527
RH
200 }
201 clear_high(d, oprsz, desc);
202}
203
204void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
205{
206 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
207 intptr_t i;
208
6c7ab301 209 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
0a83e43a 210 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
22fc3527
RH
211 }
212 clear_high(d, oprsz, desc);
213}
214
3774030a
RH
215void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
216{
217 intptr_t oprsz = simd_oprsz(desc);
218 intptr_t i;
219
6c7ab301
RH
220 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
221 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
3774030a
RH
222 }
223 clear_high(d, oprsz, desc);
224}
225
226void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
227{
228 intptr_t oprsz = simd_oprsz(desc);
229 intptr_t i;
230
6c7ab301
RH
231 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
232 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
3774030a
RH
233 }
234 clear_high(d, oprsz, desc);
235}
236
237void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
238{
239 intptr_t oprsz = simd_oprsz(desc);
240 intptr_t i;
241
6c7ab301
RH
242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
243 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
3774030a
RH
244 }
245 clear_high(d, oprsz, desc);
246}
247
248void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
249{
250 intptr_t oprsz = simd_oprsz(desc);
251 intptr_t i;
252
6c7ab301
RH
253 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
254 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
3774030a
RH
255 }
256 clear_high(d, oprsz, desc);
257}
258
22fc3527
RH
259void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
260{
261 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
262 intptr_t i;
263
6c7ab301 264 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
0a83e43a 265 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
22fc3527
RH
266 }
267 clear_high(d, oprsz, desc);
268}
269
270void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
271{
272 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
273 intptr_t i;
274
6c7ab301 275 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
0a83e43a 276 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
22fc3527
RH
277 }
278 clear_high(d, oprsz, desc);
279}
280
281void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
282{
283 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
284 intptr_t i;
285
6c7ab301 286 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
0a83e43a 287 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
22fc3527
RH
288 }
289 clear_high(d, oprsz, desc);
290}
291
292void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
293{
294 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
295 intptr_t i;
296
6c7ab301 297 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
0a83e43a 298 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
22fc3527
RH
299 }
300 clear_high(d, oprsz, desc);
301}
302
db432672
RH
303void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
304{
305 intptr_t oprsz = simd_oprsz(desc);
306 intptr_t i;
307
6c7ab301
RH
308 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
309 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
db432672
RH
310 }
311 clear_high(d, oprsz, desc);
312}
313
314void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
315{
316 intptr_t oprsz = simd_oprsz(desc);
317 intptr_t i;
318
6c7ab301
RH
319 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
320 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
db432672
RH
321 }
322 clear_high(d, oprsz, desc);
323}
324
325void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
326{
327 intptr_t oprsz = simd_oprsz(desc);
328 intptr_t i;
329
6c7ab301
RH
330 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
331 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
db432672
RH
332 }
333 clear_high(d, oprsz, desc);
334}
335
336void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
337{
338 intptr_t oprsz = simd_oprsz(desc);
339 intptr_t i;
340
6c7ab301
RH
341 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
342 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
db432672
RH
343 }
344 clear_high(d, oprsz, desc);
345}
346
bcefc902
RH
347void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
348{
349 intptr_t oprsz = simd_oprsz(desc);
350 intptr_t i;
351
352 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
353 int8_t aa = *(int8_t *)(a + i);
354 *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
355 }
356 clear_high(d, oprsz, desc);
357}
358
359void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
360{
361 intptr_t oprsz = simd_oprsz(desc);
362 intptr_t i;
363
364 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
365 int16_t aa = *(int16_t *)(a + i);
366 *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
367 }
368 clear_high(d, oprsz, desc);
369}
370
371void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
372{
373 intptr_t oprsz = simd_oprsz(desc);
374 intptr_t i;
375
376 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
377 int32_t aa = *(int32_t *)(a + i);
378 *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
379 }
380 clear_high(d, oprsz, desc);
381}
382
383void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
384{
385 intptr_t oprsz = simd_oprsz(desc);
386 intptr_t i;
387
388 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
389 int64_t aa = *(int64_t *)(a + i);
390 *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
391 }
392 clear_high(d, oprsz, desc);
393}
394
db432672
RH
395void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
396{
397 intptr_t oprsz = simd_oprsz(desc);
398
399 memcpy(d, a, oprsz);
400 clear_high(d, oprsz, desc);
401}
402
403void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
404{
405 intptr_t oprsz = simd_oprsz(desc);
406 intptr_t i;
407
408 if (c == 0) {
409 oprsz = 0;
410 } else {
411 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
412 *(uint64_t *)(d + i) = c;
413 }
414 }
415 clear_high(d, oprsz, desc);
416}
417
418void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
419{
420 intptr_t oprsz = simd_oprsz(desc);
421 intptr_t i;
422
423 if (c == 0) {
424 oprsz = 0;
425 } else {
426 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
427 *(uint32_t *)(d + i) = c;
428 }
429 }
430 clear_high(d, oprsz, desc);
431}
432
433void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
434{
435 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
436}
437
438void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
439{
440 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
441}
442
443void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
444{
445 intptr_t oprsz = simd_oprsz(desc);
446 intptr_t i;
447
6c7ab301
RH
448 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
449 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
db432672
RH
450 }
451 clear_high(d, oprsz, desc);
452}
453
454void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
455{
456 intptr_t oprsz = simd_oprsz(desc);
457 intptr_t i;
458
6c7ab301
RH
459 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
460 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
db432672
RH
461 }
462 clear_high(d, oprsz, desc);
463}
464
465void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
466{
467 intptr_t oprsz = simd_oprsz(desc);
468 intptr_t i;
469
6c7ab301
RH
470 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
471 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
db432672
RH
472 }
473 clear_high(d, oprsz, desc);
474}
475
476void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
477{
478 intptr_t oprsz = simd_oprsz(desc);
479 intptr_t i;
480
6c7ab301
RH
481 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
482 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
db432672
RH
483 }
484 clear_high(d, oprsz, desc);
485}
486
487void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
488{
489 intptr_t oprsz = simd_oprsz(desc);
490 intptr_t i;
491
6c7ab301
RH
492 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
493 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
db432672
RH
494 }
495 clear_high(d, oprsz, desc);
496}
497
498void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
499{
500 intptr_t oprsz = simd_oprsz(desc);
501 intptr_t i;
502
6c7ab301
RH
503 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
504 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
db432672
RH
505 }
506 clear_high(d, oprsz, desc);
507}
d0ec9796 508
f550805d
RH
509void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
510{
511 intptr_t oprsz = simd_oprsz(desc);
512 intptr_t i;
513
6c7ab301
RH
514 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
515 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
f550805d
RH
516 }
517 clear_high(d, oprsz, desc);
518}
519
520void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
521{
522 intptr_t oprsz = simd_oprsz(desc);
523 intptr_t i;
524
6c7ab301
RH
525 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
526 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
f550805d
RH
527 }
528 clear_high(d, oprsz, desc);
529}
530
531void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
532{
533 intptr_t oprsz = simd_oprsz(desc);
534 intptr_t i;
535
6c7ab301
RH
536 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
537 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
f550805d
RH
538 }
539 clear_high(d, oprsz, desc);
540}
541
22fc3527
RH
542void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
543{
544 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
545 intptr_t i;
546
6c7ab301 547 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
0a83e43a 548 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
22fc3527
RH
549 }
550 clear_high(d, oprsz, desc);
551}
552
553void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
554{
555 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
556 intptr_t i;
557
6c7ab301 558 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
0a83e43a 559 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
22fc3527
RH
560 }
561 clear_high(d, oprsz, desc);
562}
563
564void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
565{
566 intptr_t oprsz = simd_oprsz(desc);
22fc3527
RH
567 intptr_t i;
568
6c7ab301 569 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
0a83e43a 570 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
22fc3527
RH
571 }
572 clear_high(d, oprsz, desc);
573}
574
d0ec9796
RH
575void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
576{
577 intptr_t oprsz = simd_oprsz(desc);
578 int shift = simd_data(desc);
579 intptr_t i;
580
6c7ab301
RH
581 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
582 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
d0ec9796
RH
583 }
584 clear_high(d, oprsz, desc);
585}
586
587void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
588{
589 intptr_t oprsz = simd_oprsz(desc);
590 int shift = simd_data(desc);
591 intptr_t i;
592
6c7ab301
RH
593 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
594 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
d0ec9796
RH
595 }
596 clear_high(d, oprsz, desc);
597}
598
599void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
600{
601 intptr_t oprsz = simd_oprsz(desc);
602 int shift = simd_data(desc);
603 intptr_t i;
604
6c7ab301
RH
605 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
606 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
d0ec9796
RH
607 }
608 clear_high(d, oprsz, desc);
609}
610
611void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
612{
613 intptr_t oprsz = simd_oprsz(desc);
614 int shift = simd_data(desc);
615 intptr_t i;
616
6c7ab301
RH
617 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
618 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
d0ec9796
RH
619 }
620 clear_high(d, oprsz, desc);
621}
622
623void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
624{
625 intptr_t oprsz = simd_oprsz(desc);
626 int shift = simd_data(desc);
627 intptr_t i;
628
6c7ab301
RH
629 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
630 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
d0ec9796
RH
631 }
632 clear_high(d, oprsz, desc);
633}
634
635void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
636{
637 intptr_t oprsz = simd_oprsz(desc);
638 int shift = simd_data(desc);
639 intptr_t i;
640
6c7ab301
RH
641 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
642 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
d0ec9796
RH
643 }
644 clear_high(d, oprsz, desc);
645}
646
647void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
648{
649 intptr_t oprsz = simd_oprsz(desc);
650 int shift = simd_data(desc);
651 intptr_t i;
652
6c7ab301
RH
653 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
654 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
d0ec9796
RH
655 }
656 clear_high(d, oprsz, desc);
657}
658
659void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
660{
661 intptr_t oprsz = simd_oprsz(desc);
662 int shift = simd_data(desc);
663 intptr_t i;
664
6c7ab301
RH
665 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
666 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
d0ec9796
RH
667 }
668 clear_high(d, oprsz, desc);
669}
670
671void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
672{
673 intptr_t oprsz = simd_oprsz(desc);
674 int shift = simd_data(desc);
675 intptr_t i;
676
6c7ab301
RH
677 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
678 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
d0ec9796
RH
679 }
680 clear_high(d, oprsz, desc);
681}
682
683void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
684{
685 intptr_t oprsz = simd_oprsz(desc);
686 int shift = simd_data(desc);
687 intptr_t i;
688
6c7ab301
RH
689 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
690 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
d0ec9796
RH
691 }
692 clear_high(d, oprsz, desc);
693}
694
695void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
696{
697 intptr_t oprsz = simd_oprsz(desc);
698 int shift = simd_data(desc);
699 intptr_t i;
700
6c7ab301
RH
701 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
702 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
d0ec9796
RH
703 }
704 clear_high(d, oprsz, desc);
705}
706
707void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
708{
709 intptr_t oprsz = simd_oprsz(desc);
710 int shift = simd_data(desc);
711 intptr_t i;
712
6c7ab301
RH
713 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
714 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
d0ec9796
RH
715 }
716 clear_high(d, oprsz, desc);
717}
212be173 718
b0f7e744
RH
719void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
720{
721 intptr_t oprsz = simd_oprsz(desc);
722 int shift = simd_data(desc);
723 intptr_t i;
724
725 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
726 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
727 }
728 clear_high(d, oprsz, desc);
729}
730
731void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
732{
733 intptr_t oprsz = simd_oprsz(desc);
734 int shift = simd_data(desc);
735 intptr_t i;
736
737 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
738 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
739 }
740 clear_high(d, oprsz, desc);
741}
742
743void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
744{
745 intptr_t oprsz = simd_oprsz(desc);
746 int shift = simd_data(desc);
747 intptr_t i;
748
749 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
750 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
751 }
752 clear_high(d, oprsz, desc);
753}
754
755void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
756{
757 intptr_t oprsz = simd_oprsz(desc);
758 int shift = simd_data(desc);
759 intptr_t i;
760
761 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
762 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
763 }
764 clear_high(d, oprsz, desc);
765}
766
5ee5c14c
RH
767void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
768{
769 intptr_t oprsz = simd_oprsz(desc);
770 intptr_t i;
771
772 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
773 uint8_t sh = *(uint8_t *)(b + i) & 7;
774 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
775 }
776 clear_high(d, oprsz, desc);
777}
778
779void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
780{
781 intptr_t oprsz = simd_oprsz(desc);
782 intptr_t i;
783
784 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
785 uint8_t sh = *(uint16_t *)(b + i) & 15;
786 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
787 }
788 clear_high(d, oprsz, desc);
789}
790
791void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
792{
793 intptr_t oprsz = simd_oprsz(desc);
794 intptr_t i;
795
796 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
797 uint8_t sh = *(uint32_t *)(b + i) & 31;
798 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
799 }
800 clear_high(d, oprsz, desc);
801}
802
803void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
804{
805 intptr_t oprsz = simd_oprsz(desc);
806 intptr_t i;
807
808 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
809 uint8_t sh = *(uint64_t *)(b + i) & 63;
810 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
811 }
812 clear_high(d, oprsz, desc);
813}
814
815void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
816{
817 intptr_t oprsz = simd_oprsz(desc);
818 intptr_t i;
819
820 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
821 uint8_t sh = *(uint8_t *)(b + i) & 7;
822 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
823 }
824 clear_high(d, oprsz, desc);
825}
826
827void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
828{
829 intptr_t oprsz = simd_oprsz(desc);
830 intptr_t i;
831
832 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
833 uint8_t sh = *(uint16_t *)(b + i) & 15;
834 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
835 }
836 clear_high(d, oprsz, desc);
837}
838
839void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
840{
841 intptr_t oprsz = simd_oprsz(desc);
842 intptr_t i;
843
844 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
845 uint8_t sh = *(uint32_t *)(b + i) & 31;
846 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
847 }
848 clear_high(d, oprsz, desc);
849}
850
851void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
852{
853 intptr_t oprsz = simd_oprsz(desc);
854 intptr_t i;
855
856 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
857 uint8_t sh = *(uint64_t *)(b + i) & 63;
858 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
859 }
860 clear_high(d, oprsz, desc);
861}
862
863void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
864{
865 intptr_t oprsz = simd_oprsz(desc);
866 intptr_t i;
867
899f08ad 868 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
5ee5c14c
RH
869 uint8_t sh = *(uint8_t *)(b + i) & 7;
870 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
871 }
872 clear_high(d, oprsz, desc);
873}
874
875void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
876{
877 intptr_t oprsz = simd_oprsz(desc);
878 intptr_t i;
879
880 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
881 uint8_t sh = *(uint16_t *)(b + i) & 15;
882 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
883 }
884 clear_high(d, oprsz, desc);
885}
886
887void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
888{
889 intptr_t oprsz = simd_oprsz(desc);
890 intptr_t i;
891
899f08ad 892 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
5ee5c14c
RH
893 uint8_t sh = *(uint32_t *)(b + i) & 31;
894 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
895 }
896 clear_high(d, oprsz, desc);
897}
898
899void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
900{
901 intptr_t oprsz = simd_oprsz(desc);
902 intptr_t i;
903
899f08ad 904 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
5ee5c14c
RH
905 uint8_t sh = *(uint64_t *)(b + i) & 63;
906 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
907 }
908 clear_high(d, oprsz, desc);
909}
910
5d0ceda9
RH
911void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
912{
913 intptr_t oprsz = simd_oprsz(desc);
914 intptr_t i;
915
916 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
917 uint8_t sh = *(uint8_t *)(b + i) & 7;
918 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
919 }
920 clear_high(d, oprsz, desc);
921}
922
923void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
924{
925 intptr_t oprsz = simd_oprsz(desc);
926 intptr_t i;
927
928 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
929 uint8_t sh = *(uint16_t *)(b + i) & 15;
930 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
931 }
932 clear_high(d, oprsz, desc);
933}
934
935void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
936{
937 intptr_t oprsz = simd_oprsz(desc);
938 intptr_t i;
939
940 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
941 uint8_t sh = *(uint32_t *)(b + i) & 31;
942 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
943 }
944 clear_high(d, oprsz, desc);
945}
946
947void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
948{
949 intptr_t oprsz = simd_oprsz(desc);
950 intptr_t i;
951
952 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
953 uint8_t sh = *(uint64_t *)(b + i) & 63;
954 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
955 }
956 clear_high(d, oprsz, desc);
957}
958
959void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
960{
961 intptr_t oprsz = simd_oprsz(desc);
962 intptr_t i;
963
964 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
965 uint8_t sh = *(uint8_t *)(b + i) & 7;
966 *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
967 }
968 clear_high(d, oprsz, desc);
969}
970
971void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
972{
973 intptr_t oprsz = simd_oprsz(desc);
974 intptr_t i;
975
976 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
977 uint8_t sh = *(uint16_t *)(b + i) & 15;
978 *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
979 }
980 clear_high(d, oprsz, desc);
981}
982
983void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
984{
985 intptr_t oprsz = simd_oprsz(desc);
986 intptr_t i;
987
988 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
989 uint8_t sh = *(uint32_t *)(b + i) & 31;
990 *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
991 }
992 clear_high(d, oprsz, desc);
993}
994
995void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
996{
997 intptr_t oprsz = simd_oprsz(desc);
998 intptr_t i;
999
1000 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1001 uint8_t sh = *(uint64_t *)(b + i) & 63;
1002 *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
1003 }
1004 clear_high(d, oprsz, desc);
1005}
1006
212be173
RH
1007#define DO_CMP1(NAME, TYPE, OP) \
1008void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
1009{ \
1010 intptr_t oprsz = simd_oprsz(desc); \
1011 intptr_t i; \
6cb1d3b8 1012 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
0270bd50 1013 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
212be173
RH
1014 } \
1015 clear_high(d, oprsz, desc); \
1016}
1017
1018#define DO_CMP2(SZ) \
6c7ab301
RH
1019 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
1020 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
1021 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
1022 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
1023 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
1024 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
212be173
RH
1025
1026DO_CMP2(8)
1027DO_CMP2(16)
1028DO_CMP2(32)
1029DO_CMP2(64)
1030
212be173
RH
1031#undef DO_CMP1
1032#undef DO_CMP2
f49b12c6
RH
1033
1034void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1035{
1036 intptr_t oprsz = simd_oprsz(desc);
1037 intptr_t i;
1038
1039 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1040 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1041 if (r > INT8_MAX) {
1042 r = INT8_MAX;
1043 } else if (r < INT8_MIN) {
1044 r = INT8_MIN;
1045 }
1046 *(int8_t *)(d + i) = r;
1047 }
1048 clear_high(d, oprsz, desc);
1049}
1050
1051void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1052{
1053 intptr_t oprsz = simd_oprsz(desc);
1054 intptr_t i;
1055
1056 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1057 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1058 if (r > INT16_MAX) {
1059 r = INT16_MAX;
1060 } else if (r < INT16_MIN) {
1061 r = INT16_MIN;
1062 }
1063 *(int16_t *)(d + i) = r;
1064 }
1065 clear_high(d, oprsz, desc);
1066}
1067
1068void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1069{
1070 intptr_t oprsz = simd_oprsz(desc);
1071 intptr_t i;
1072
1073 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1074 int32_t ai = *(int32_t *)(a + i);
1075 int32_t bi = *(int32_t *)(b + i);
7702a855
RH
1076 int32_t di;
1077 if (sadd32_overflow(ai, bi, &di)) {
f49b12c6
RH
1078 di = (di < 0 ? INT32_MAX : INT32_MIN);
1079 }
1080 *(int32_t *)(d + i) = di;
1081 }
1082 clear_high(d, oprsz, desc);
1083}
1084
1085void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1086{
1087 intptr_t oprsz = simd_oprsz(desc);
1088 intptr_t i;
1089
1090 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1091 int64_t ai = *(int64_t *)(a + i);
1092 int64_t bi = *(int64_t *)(b + i);
7702a855
RH
1093 int64_t di;
1094 if (sadd64_overflow(ai, bi, &di)) {
f49b12c6
RH
1095 di = (di < 0 ? INT64_MAX : INT64_MIN);
1096 }
1097 *(int64_t *)(d + i) = di;
1098 }
1099 clear_high(d, oprsz, desc);
1100}
1101
1102void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1103{
1104 intptr_t oprsz = simd_oprsz(desc);
1105 intptr_t i;
1106
1107 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1108 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1109 if (r > INT8_MAX) {
1110 r = INT8_MAX;
1111 } else if (r < INT8_MIN) {
1112 r = INT8_MIN;
1113 }
1114 *(uint8_t *)(d + i) = r;
1115 }
1116 clear_high(d, oprsz, desc);
1117}
1118
1119void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1120{
1121 intptr_t oprsz = simd_oprsz(desc);
1122 intptr_t i;
1123
1124 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1125 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1126 if (r > INT16_MAX) {
1127 r = INT16_MAX;
1128 } else if (r < INT16_MIN) {
1129 r = INT16_MIN;
1130 }
1131 *(int16_t *)(d + i) = r;
1132 }
1133 clear_high(d, oprsz, desc);
1134}
1135
1136void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1137{
1138 intptr_t oprsz = simd_oprsz(desc);
1139 intptr_t i;
1140
1141 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1142 int32_t ai = *(int32_t *)(a + i);
1143 int32_t bi = *(int32_t *)(b + i);
7702a855
RH
1144 int32_t di;
1145 if (ssub32_overflow(ai, bi, &di)) {
f49b12c6
RH
1146 di = (di < 0 ? INT32_MAX : INT32_MIN);
1147 }
1148 *(int32_t *)(d + i) = di;
1149 }
1150 clear_high(d, oprsz, desc);
1151}
1152
1153void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1154{
1155 intptr_t oprsz = simd_oprsz(desc);
1156 intptr_t i;
1157
1158 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1159 int64_t ai = *(int64_t *)(a + i);
1160 int64_t bi = *(int64_t *)(b + i);
7702a855
RH
1161 int64_t di;
1162 if (ssub64_overflow(ai, bi, &di)) {
f49b12c6
RH
1163 di = (di < 0 ? INT64_MAX : INT64_MIN);
1164 }
1165 *(int64_t *)(d + i) = di;
1166 }
1167 clear_high(d, oprsz, desc);
1168}
1169
1170void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1171{
1172 intptr_t oprsz = simd_oprsz(desc);
1173 intptr_t i;
1174
1175 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1176 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1177 if (r > UINT8_MAX) {
1178 r = UINT8_MAX;
1179 }
1180 *(uint8_t *)(d + i) = r;
1181 }
1182 clear_high(d, oprsz, desc);
1183}
1184
1185void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1186{
1187 intptr_t oprsz = simd_oprsz(desc);
1188 intptr_t i;
1189
1190 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1191 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1192 if (r > UINT16_MAX) {
1193 r = UINT16_MAX;
1194 }
1195 *(uint16_t *)(d + i) = r;
1196 }
1197 clear_high(d, oprsz, desc);
1198}
1199
1200void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1201{
1202 intptr_t oprsz = simd_oprsz(desc);
1203 intptr_t i;
1204
1205 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1206 uint32_t ai = *(uint32_t *)(a + i);
1207 uint32_t bi = *(uint32_t *)(b + i);
7702a855
RH
1208 uint32_t di;
1209 if (uadd32_overflow(ai, bi, &di)) {
f49b12c6
RH
1210 di = UINT32_MAX;
1211 }
1212 *(uint32_t *)(d + i) = di;
1213 }
1214 clear_high(d, oprsz, desc);
1215}
1216
1217void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1218{
1219 intptr_t oprsz = simd_oprsz(desc);
1220 intptr_t i;
1221
1222 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1223 uint64_t ai = *(uint64_t *)(a + i);
1224 uint64_t bi = *(uint64_t *)(b + i);
7702a855
RH
1225 uint64_t di;
1226 if (uadd64_overflow(ai, bi, &di)) {
f49b12c6
RH
1227 di = UINT64_MAX;
1228 }
1229 *(uint64_t *)(d + i) = di;
1230 }
1231 clear_high(d, oprsz, desc);
1232}
1233
1234void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1235{
1236 intptr_t oprsz = simd_oprsz(desc);
1237 intptr_t i;
1238
1239 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1240 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1241 if (r < 0) {
1242 r = 0;
1243 }
1244 *(uint8_t *)(d + i) = r;
1245 }
1246 clear_high(d, oprsz, desc);
1247}
1248
1249void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1250{
1251 intptr_t oprsz = simd_oprsz(desc);
1252 intptr_t i;
1253
1254 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1255 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1256 if (r < 0) {
1257 r = 0;
1258 }
1259 *(uint16_t *)(d + i) = r;
1260 }
1261 clear_high(d, oprsz, desc);
1262}
1263
1264void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1265{
1266 intptr_t oprsz = simd_oprsz(desc);
1267 intptr_t i;
1268
1269 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1270 uint32_t ai = *(uint32_t *)(a + i);
1271 uint32_t bi = *(uint32_t *)(b + i);
7702a855
RH
1272 uint32_t di;
1273 if (usub32_overflow(ai, bi, &di)) {
f49b12c6
RH
1274 di = 0;
1275 }
1276 *(uint32_t *)(d + i) = di;
1277 }
1278 clear_high(d, oprsz, desc);
1279}
1280
1281void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1282{
1283 intptr_t oprsz = simd_oprsz(desc);
1284 intptr_t i;
1285
1286 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1287 uint64_t ai = *(uint64_t *)(a + i);
1288 uint64_t bi = *(uint64_t *)(b + i);
7702a855
RH
1289 uint64_t di;
1290 if (usub64_overflow(ai, bi, &di)) {
f49b12c6
RH
1291 di = 0;
1292 }
1293 *(uint64_t *)(d + i) = di;
1294 }
1295 clear_high(d, oprsz, desc);
1296}
dd0a0fcd
RH
1297
1298void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1299{
1300 intptr_t oprsz = simd_oprsz(desc);
1301 intptr_t i;
1302
1303 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1304 int8_t aa = *(int8_t *)(a + i);
1305 int8_t bb = *(int8_t *)(b + i);
1306 int8_t dd = aa < bb ? aa : bb;
1307 *(int8_t *)(d + i) = dd;
1308 }
1309 clear_high(d, oprsz, desc);
1310}
1311
1312void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1313{
1314 intptr_t oprsz = simd_oprsz(desc);
1315 intptr_t i;
1316
1317 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1318 int16_t aa = *(int16_t *)(a + i);
1319 int16_t bb = *(int16_t *)(b + i);
1320 int16_t dd = aa < bb ? aa : bb;
1321 *(int16_t *)(d + i) = dd;
1322 }
1323 clear_high(d, oprsz, desc);
1324}
1325
1326void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1327{
1328 intptr_t oprsz = simd_oprsz(desc);
1329 intptr_t i;
1330
1331 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1332 int32_t aa = *(int32_t *)(a + i);
1333 int32_t bb = *(int32_t *)(b + i);
1334 int32_t dd = aa < bb ? aa : bb;
1335 *(int32_t *)(d + i) = dd;
1336 }
1337 clear_high(d, oprsz, desc);
1338}
1339
1340void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1341{
1342 intptr_t oprsz = simd_oprsz(desc);
1343 intptr_t i;
1344
1345 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1346 int64_t aa = *(int64_t *)(a + i);
1347 int64_t bb = *(int64_t *)(b + i);
1348 int64_t dd = aa < bb ? aa : bb;
1349 *(int64_t *)(d + i) = dd;
1350 }
1351 clear_high(d, oprsz, desc);
1352}
1353
1354void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1355{
1356 intptr_t oprsz = simd_oprsz(desc);
1357 intptr_t i;
1358
1359 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1360 int8_t aa = *(int8_t *)(a + i);
1361 int8_t bb = *(int8_t *)(b + i);
1362 int8_t dd = aa > bb ? aa : bb;
1363 *(int8_t *)(d + i) = dd;
1364 }
1365 clear_high(d, oprsz, desc);
1366}
1367
1368void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1369{
1370 intptr_t oprsz = simd_oprsz(desc);
1371 intptr_t i;
1372
1373 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1374 int16_t aa = *(int16_t *)(a + i);
1375 int16_t bb = *(int16_t *)(b + i);
1376 int16_t dd = aa > bb ? aa : bb;
1377 *(int16_t *)(d + i) = dd;
1378 }
1379 clear_high(d, oprsz, desc);
1380}
1381
1382void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1383{
1384 intptr_t oprsz = simd_oprsz(desc);
1385 intptr_t i;
1386
1387 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1388 int32_t aa = *(int32_t *)(a + i);
1389 int32_t bb = *(int32_t *)(b + i);
1390 int32_t dd = aa > bb ? aa : bb;
1391 *(int32_t *)(d + i) = dd;
1392 }
1393 clear_high(d, oprsz, desc);
1394}
1395
1396void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1397{
1398 intptr_t oprsz = simd_oprsz(desc);
1399 intptr_t i;
1400
1401 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1402 int64_t aa = *(int64_t *)(a + i);
1403 int64_t bb = *(int64_t *)(b + i);
1404 int64_t dd = aa > bb ? aa : bb;
1405 *(int64_t *)(d + i) = dd;
1406 }
1407 clear_high(d, oprsz, desc);
1408}
1409
1410void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1411{
1412 intptr_t oprsz = simd_oprsz(desc);
1413 intptr_t i;
1414
1415 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1416 uint8_t aa = *(uint8_t *)(a + i);
1417 uint8_t bb = *(uint8_t *)(b + i);
1418 uint8_t dd = aa < bb ? aa : bb;
1419 *(uint8_t *)(d + i) = dd;
1420 }
1421 clear_high(d, oprsz, desc);
1422}
1423
1424void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1425{
1426 intptr_t oprsz = simd_oprsz(desc);
1427 intptr_t i;
1428
1429 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1430 uint16_t aa = *(uint16_t *)(a + i);
1431 uint16_t bb = *(uint16_t *)(b + i);
1432 uint16_t dd = aa < bb ? aa : bb;
1433 *(uint16_t *)(d + i) = dd;
1434 }
1435 clear_high(d, oprsz, desc);
1436}
1437
1438void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1439{
1440 intptr_t oprsz = simd_oprsz(desc);
1441 intptr_t i;
1442
1443 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1444 uint32_t aa = *(uint32_t *)(a + i);
1445 uint32_t bb = *(uint32_t *)(b + i);
1446 uint32_t dd = aa < bb ? aa : bb;
1447 *(uint32_t *)(d + i) = dd;
1448 }
1449 clear_high(d, oprsz, desc);
1450}
1451
1452void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1453{
1454 intptr_t oprsz = simd_oprsz(desc);
1455 intptr_t i;
1456
1457 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1458 uint64_t aa = *(uint64_t *)(a + i);
1459 uint64_t bb = *(uint64_t *)(b + i);
1460 uint64_t dd = aa < bb ? aa : bb;
1461 *(uint64_t *)(d + i) = dd;
1462 }
1463 clear_high(d, oprsz, desc);
1464}
1465
1466void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1467{
1468 intptr_t oprsz = simd_oprsz(desc);
1469 intptr_t i;
1470
1471 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1472 uint8_t aa = *(uint8_t *)(a + i);
1473 uint8_t bb = *(uint8_t *)(b + i);
1474 uint8_t dd = aa > bb ? aa : bb;
1475 *(uint8_t *)(d + i) = dd;
1476 }
1477 clear_high(d, oprsz, desc);
1478}
1479
1480void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1481{
1482 intptr_t oprsz = simd_oprsz(desc);
1483 intptr_t i;
1484
1485 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1486 uint16_t aa = *(uint16_t *)(a + i);
1487 uint16_t bb = *(uint16_t *)(b + i);
1488 uint16_t dd = aa > bb ? aa : bb;
1489 *(uint16_t *)(d + i) = dd;
1490 }
1491 clear_high(d, oprsz, desc);
1492}
1493
1494void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1495{
1496 intptr_t oprsz = simd_oprsz(desc);
1497 intptr_t i;
1498
1499 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1500 uint32_t aa = *(uint32_t *)(a + i);
1501 uint32_t bb = *(uint32_t *)(b + i);
1502 uint32_t dd = aa > bb ? aa : bb;
1503 *(uint32_t *)(d + i) = dd;
1504 }
1505 clear_high(d, oprsz, desc);
1506}
1507
1508void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1509{
1510 intptr_t oprsz = simd_oprsz(desc);
1511 intptr_t i;
1512
1513 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1514 uint64_t aa = *(uint64_t *)(a + i);
1515 uint64_t bb = *(uint64_t *)(b + i);
1516 uint64_t dd = aa > bb ? aa : bb;
1517 *(uint64_t *)(d + i) = dd;
1518 }
1519 clear_high(d, oprsz, desc);
1520}
38dc1294
RH
1521
1522void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
1523{
1524 intptr_t oprsz = simd_oprsz(desc);
1525 intptr_t i;
1526
6c7ab301
RH
1527 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1528 uint64_t aa = *(uint64_t *)(a + i);
1529 uint64_t bb = *(uint64_t *)(b + i);
1530 uint64_t cc = *(uint64_t *)(c + i);
1531 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
38dc1294
RH
1532 }
1533 clear_high(d, oprsz, desc);
1534}