]> git.proxmox.com Git - mirror_qemu.git/blob - target/riscv/vector_helper.c
target/riscv: rvv: Add tail agnostic for vector load / store instructions
[mirror_qemu.git] / target / riscv / vector_helper.c
1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33 {
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
44
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
57 /* only set vill bit. */
58 env->vill = 1;
59 env->vtype = 0;
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
74 env->vill = 0;
75 return vl;
76 }
77
78 /*
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
81 */
82 #if HOST_BIG_ENDIAN
83 #define H1(x) ((x) ^ 7)
84 #define H1_2(x) ((x) ^ 6)
85 #define H1_4(x) ((x) ^ 4)
86 #define H2(x) ((x) ^ 3)
87 #define H4(x) ((x) ^ 1)
88 #define H8(x) ((x))
89 #else
90 #define H1(x) (x)
91 #define H1_2(x) (x)
92 #define H1_4(x) (x)
93 #define H2(x) (x)
94 #define H4(x) (x)
95 #define H8(x) (x)
96 #endif
97
98 static inline uint32_t vext_nf(uint32_t desc)
99 {
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101 }
102
103 static inline uint32_t vext_vm(uint32_t desc)
104 {
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106 }
107
108 /*
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
119 */
120 static inline int32_t vext_lmul(uint32_t desc)
121 {
122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
123 }
124
125 static inline uint32_t vext_vta(uint32_t desc)
126 {
127 return FIELD_EX32(simd_data(desc), VDATA, VTA);
128 }
129
130 /*
131 * Get the maximum number of elements can be operated.
132 *
133 * log2_esz: log2 of element size in bytes.
134 */
135 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
136 {
137 /*
138 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
139 * so vlen in bytes (vlenb) is encoded as maxsz.
140 */
141 uint32_t vlenb = simd_maxsz(desc);
142
143 /* Return VLMAX */
144 int scale = vext_lmul(desc) - log2_esz;
145 return scale < 0 ? vlenb >> -scale : vlenb << scale;
146 }
147
148 /*
149 * Get number of total elements, including prestart, body and tail elements.
150 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
151 * are held in the same vector register.
152 */
153 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
154 uint32_t esz)
155 {
156 uint32_t vlenb = simd_maxsz(desc);
157 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
158 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
159 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
160 return (vlenb << emul) / esz;
161 }
162
163 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
164 {
165 return (addr & env->cur_pmmask) | env->cur_pmbase;
166 }
167
168 /*
169 * This function checks watchpoint before real load operation.
170 *
171 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
172 * In user mode, there is no watchpoint support now.
173 *
174 * It will trigger an exception if there is no mapping in TLB
175 * and page table walk can't fill the TLB entry. Then the guest
176 * software can return here after process the exception or never return.
177 */
178 static void probe_pages(CPURISCVState *env, target_ulong addr,
179 target_ulong len, uintptr_t ra,
180 MMUAccessType access_type)
181 {
182 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
183 target_ulong curlen = MIN(pagelen, len);
184
185 probe_access(env, adjust_addr(env, addr), curlen, access_type,
186 cpu_mmu_index(env, false), ra);
187 if (len > curlen) {
188 addr += curlen;
189 curlen = len - curlen;
190 probe_access(env, adjust_addr(env, addr), curlen, access_type,
191 cpu_mmu_index(env, false), ra);
192 }
193 }
194
195 /* set agnostic elements to 1s */
196 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
197 uint32_t tot)
198 {
199 if (is_agnostic == 0) {
200 /* policy undisturbed */
201 return;
202 }
203 if (tot - cnt == 0) {
204 return ;
205 }
206 memset(base + cnt, -1, tot - cnt);
207 }
208
209 static inline void vext_set_elem_mask(void *v0, int index,
210 uint8_t value)
211 {
212 int idx = index / 64;
213 int pos = index % 64;
214 uint64_t old = ((uint64_t *)v0)[idx];
215 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
216 }
217
218 /*
219 * Earlier designs (pre-0.9) had a varying number of bits
220 * per mask value (MLEN). In the 0.9 design, MLEN=1.
221 * (Section 4.5)
222 */
223 static inline int vext_elem_mask(void *v0, int index)
224 {
225 int idx = index / 64;
226 int pos = index % 64;
227 return (((uint64_t *)v0)[idx] >> pos) & 1;
228 }
229
230 /* elements operations for load and store */
231 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
232 uint32_t idx, void *vd, uintptr_t retaddr);
233
234 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
235 static void NAME(CPURISCVState *env, abi_ptr addr, \
236 uint32_t idx, void *vd, uintptr_t retaddr)\
237 { \
238 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
239 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
240 } \
241
242 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
243 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
244 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
245 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
246
247 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
248 static void NAME(CPURISCVState *env, abi_ptr addr, \
249 uint32_t idx, void *vd, uintptr_t retaddr)\
250 { \
251 ETYPE data = *((ETYPE *)vd + H(idx)); \
252 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
253 }
254
255 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
256 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
257 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
258 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
259
260 /*
261 *** stride: access vector element from strided memory
262 */
263 static void
264 vext_ldst_stride(void *vd, void *v0, target_ulong base,
265 target_ulong stride, CPURISCVState *env,
266 uint32_t desc, uint32_t vm,
267 vext_ldst_elem_fn *ldst_elem,
268 uint32_t log2_esz, uintptr_t ra)
269 {
270 uint32_t i, k;
271 uint32_t nf = vext_nf(desc);
272 uint32_t max_elems = vext_max_elems(desc, log2_esz);
273 uint32_t esz = 1 << log2_esz;
274 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
275 uint32_t vta = vext_vta(desc);
276
277 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
278 if (!vm && !vext_elem_mask(v0, i)) {
279 continue;
280 }
281
282 k = 0;
283 while (k < nf) {
284 target_ulong addr = base + stride * i + (k << log2_esz);
285 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
286 k++;
287 }
288 }
289 env->vstart = 0;
290 /* set tail elements to 1s */
291 for (k = 0; k < nf; ++k) {
292 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
293 (k * max_elems + max_elems) * esz);
294 }
295 if (nf * max_elems % total_elems != 0) {
296 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
297 uint32_t registers_used =
298 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
299 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
300 registers_used * vlenb);
301 }
302 }
303
304 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
305 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
306 target_ulong stride, CPURISCVState *env, \
307 uint32_t desc) \
308 { \
309 uint32_t vm = vext_vm(desc); \
310 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
311 ctzl(sizeof(ETYPE)), GETPC()); \
312 }
313
314 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
315 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
316 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
317 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
318
319 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
320 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
321 target_ulong stride, CPURISCVState *env, \
322 uint32_t desc) \
323 { \
324 uint32_t vm = vext_vm(desc); \
325 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
326 ctzl(sizeof(ETYPE)), GETPC()); \
327 }
328
329 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
330 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
331 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
332 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
333
334 /*
335 *** unit-stride: access elements stored contiguously in memory
336 */
337
338 /* unmasked unit-stride load and store operation*/
339 static void
340 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
341 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
342 uintptr_t ra)
343 {
344 uint32_t i, k;
345 uint32_t nf = vext_nf(desc);
346 uint32_t max_elems = vext_max_elems(desc, log2_esz);
347 uint32_t esz = 1 << log2_esz;
348 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
349 uint32_t vta = vext_vta(desc);
350
351 /* load bytes from guest memory */
352 for (i = env->vstart; i < evl; i++, env->vstart++) {
353 k = 0;
354 while (k < nf) {
355 target_ulong addr = base + ((i * nf + k) << log2_esz);
356 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
357 k++;
358 }
359 }
360 env->vstart = 0;
361 /* set tail elements to 1s */
362 for (k = 0; k < nf; ++k) {
363 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz,
364 (k * max_elems + max_elems) * esz);
365 }
366 if (nf * max_elems % total_elems != 0) {
367 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
368 uint32_t registers_used =
369 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
370 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
371 registers_used * vlenb);
372 }
373 }
374
375 /*
376 * masked unit-stride load and store operation will be a special case of stride,
377 * stride = NF * sizeof (MTYPE)
378 */
379
380 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
381 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
382 CPURISCVState *env, uint32_t desc) \
383 { \
384 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
385 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
386 ctzl(sizeof(ETYPE)), GETPC()); \
387 } \
388 \
389 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
390 CPURISCVState *env, uint32_t desc) \
391 { \
392 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
393 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
394 }
395
396 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
397 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
398 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
399 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
400
401 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
402 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
403 CPURISCVState *env, uint32_t desc) \
404 { \
405 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
406 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
407 ctzl(sizeof(ETYPE)), GETPC()); \
408 } \
409 \
410 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
411 CPURISCVState *env, uint32_t desc) \
412 { \
413 vext_ldst_us(vd, base, env, desc, STORE_FN, \
414 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
415 }
416
417 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
418 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
419 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
420 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
421
422 /*
423 *** unit stride mask load and store, EEW = 1
424 */
425 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
426 CPURISCVState *env, uint32_t desc)
427 {
428 /* evl = ceil(vl/8) */
429 uint8_t evl = (env->vl + 7) >> 3;
430 vext_ldst_us(vd, base, env, desc, lde_b,
431 0, evl, GETPC());
432 }
433
434 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
435 CPURISCVState *env, uint32_t desc)
436 {
437 /* evl = ceil(vl/8) */
438 uint8_t evl = (env->vl + 7) >> 3;
439 vext_ldst_us(vd, base, env, desc, ste_b,
440 0, evl, GETPC());
441 }
442
443 /*
444 *** index: access vector element from indexed memory
445 */
446 typedef target_ulong vext_get_index_addr(target_ulong base,
447 uint32_t idx, void *vs2);
448
449 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
450 static target_ulong NAME(target_ulong base, \
451 uint32_t idx, void *vs2) \
452 { \
453 return (base + *((ETYPE *)vs2 + H(idx))); \
454 }
455
456 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
457 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
458 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
459 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
460
461 static inline void
462 vext_ldst_index(void *vd, void *v0, target_ulong base,
463 void *vs2, CPURISCVState *env, uint32_t desc,
464 vext_get_index_addr get_index_addr,
465 vext_ldst_elem_fn *ldst_elem,
466 uint32_t log2_esz, uintptr_t ra)
467 {
468 uint32_t i, k;
469 uint32_t nf = vext_nf(desc);
470 uint32_t vm = vext_vm(desc);
471 uint32_t max_elems = vext_max_elems(desc, log2_esz);
472 uint32_t esz = 1 << log2_esz;
473 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
474 uint32_t vta = vext_vta(desc);
475
476 /* load bytes from guest memory */
477 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
478 if (!vm && !vext_elem_mask(v0, i)) {
479 continue;
480 }
481
482 k = 0;
483 while (k < nf) {
484 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
485 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
486 k++;
487 }
488 }
489 env->vstart = 0;
490 /* set tail elements to 1s */
491 for (k = 0; k < nf; ++k) {
492 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
493 (k * max_elems + max_elems) * esz);
494 }
495 if (nf * max_elems % total_elems != 0) {
496 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
497 uint32_t registers_used =
498 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
499 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
500 registers_used * vlenb);
501 }
502 }
503
504 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
505 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
506 void *vs2, CPURISCVState *env, uint32_t desc) \
507 { \
508 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
509 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
510 }
511
512 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
513 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
514 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
515 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
516 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
517 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
518 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
519 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
520 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
521 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
522 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
523 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
524 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
525 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
526 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
527 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
528
529 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
530 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
531 void *vs2, CPURISCVState *env, uint32_t desc) \
532 { \
533 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
534 STORE_FN, ctzl(sizeof(ETYPE)), \
535 GETPC()); \
536 }
537
538 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
539 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
540 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
541 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
542 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
543 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
544 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
545 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
546 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
547 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
548 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
549 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
550 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
551 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
552 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
553 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
554
555 /*
556 *** unit-stride fault-only-fisrt load instructions
557 */
558 static inline void
559 vext_ldff(void *vd, void *v0, target_ulong base,
560 CPURISCVState *env, uint32_t desc,
561 vext_ldst_elem_fn *ldst_elem,
562 uint32_t log2_esz, uintptr_t ra)
563 {
564 void *host;
565 uint32_t i, k, vl = 0;
566 uint32_t nf = vext_nf(desc);
567 uint32_t vm = vext_vm(desc);
568 uint32_t max_elems = vext_max_elems(desc, log2_esz);
569 uint32_t esz = 1 << log2_esz;
570 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
571 uint32_t vta = vext_vta(desc);
572 target_ulong addr, offset, remain;
573
574 /* probe every access*/
575 for (i = env->vstart; i < env->vl; i++) {
576 if (!vm && !vext_elem_mask(v0, i)) {
577 continue;
578 }
579 addr = adjust_addr(env, base + i * (nf << log2_esz));
580 if (i == 0) {
581 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
582 } else {
583 /* if it triggers an exception, no need to check watchpoint */
584 remain = nf << log2_esz;
585 while (remain > 0) {
586 offset = -(addr | TARGET_PAGE_MASK);
587 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
588 cpu_mmu_index(env, false));
589 if (host) {
590 #ifdef CONFIG_USER_ONLY
591 if (page_check_range(addr, offset, PAGE_READ) < 0) {
592 vl = i;
593 goto ProbeSuccess;
594 }
595 #else
596 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
597 #endif
598 } else {
599 vl = i;
600 goto ProbeSuccess;
601 }
602 if (remain <= offset) {
603 break;
604 }
605 remain -= offset;
606 addr = adjust_addr(env, addr + offset);
607 }
608 }
609 }
610 ProbeSuccess:
611 /* load bytes from guest memory */
612 if (vl != 0) {
613 env->vl = vl;
614 }
615 for (i = env->vstart; i < env->vl; i++) {
616 k = 0;
617 if (!vm && !vext_elem_mask(v0, i)) {
618 continue;
619 }
620 while (k < nf) {
621 target_ulong addr = base + ((i * nf + k) << log2_esz);
622 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
623 k++;
624 }
625 }
626 env->vstart = 0;
627 /* set tail elements to 1s */
628 for (k = 0; k < nf; ++k) {
629 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
630 (k * max_elems + max_elems) * esz);
631 }
632 if (nf * max_elems % total_elems != 0) {
633 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
634 uint32_t registers_used =
635 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
636 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
637 registers_used * vlenb);
638 }
639 }
640
641 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
642 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
643 CPURISCVState *env, uint32_t desc) \
644 { \
645 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
646 ctzl(sizeof(ETYPE)), GETPC()); \
647 }
648
649 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
650 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
651 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
652 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
653
654 #define DO_SWAP(N, M) (M)
655 #define DO_AND(N, M) (N & M)
656 #define DO_XOR(N, M) (N ^ M)
657 #define DO_OR(N, M) (N | M)
658 #define DO_ADD(N, M) (N + M)
659
660 /* Signed min/max */
661 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
662 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
663
664 /* Unsigned min/max */
665 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
666 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
667
668 /*
669 *** load and store whole register instructions
670 */
671 static void
672 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
673 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
674 {
675 uint32_t i, k, off, pos;
676 uint32_t nf = vext_nf(desc);
677 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
678 uint32_t max_elems = vlenb >> log2_esz;
679
680 k = env->vstart / max_elems;
681 off = env->vstart % max_elems;
682
683 if (off) {
684 /* load/store rest of elements of current segment pointed by vstart */
685 for (pos = off; pos < max_elems; pos++, env->vstart++) {
686 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
687 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
688 }
689 k++;
690 }
691
692 /* load/store elements for rest of segments */
693 for (; k < nf; k++) {
694 for (i = 0; i < max_elems; i++, env->vstart++) {
695 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
696 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
697 }
698 }
699
700 env->vstart = 0;
701 }
702
703 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
704 void HELPER(NAME)(void *vd, target_ulong base, \
705 CPURISCVState *env, uint32_t desc) \
706 { \
707 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
708 ctzl(sizeof(ETYPE)), GETPC()); \
709 }
710
711 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
712 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
713 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
714 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
715 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
716 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
717 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
718 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
719 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
720 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
721 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
722 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
723 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
724 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
725 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
726 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
727
728 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
729 void HELPER(NAME)(void *vd, target_ulong base, \
730 CPURISCVState *env, uint32_t desc) \
731 { \
732 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
733 ctzl(sizeof(ETYPE)), GETPC()); \
734 }
735
736 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
737 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
738 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
739 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
740
741 /*
742 *** Vector Integer Arithmetic Instructions
743 */
744
745 /* expand macro args before macro */
746 #define RVVCALL(macro, ...) macro(__VA_ARGS__)
747
748 /* (TD, T1, T2, TX1, TX2) */
749 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
750 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
751 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
752 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
753 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
754 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
755 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
756 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
757 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
758 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
759 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
760 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
761 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
762 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
763 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
764 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
765 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
766 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
767 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
768 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
769 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
770 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
771 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
772 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
773 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
774 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
775 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
776 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
777 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
778 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
779
780 /* operation of two vector elements */
781 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
782
783 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
784 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
785 { \
786 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
787 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
788 *((TD *)vd + HD(i)) = OP(s2, s1); \
789 }
790 #define DO_SUB(N, M) (N - M)
791 #define DO_RSUB(N, M) (M - N)
792
793 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
794 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
795 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
796 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
797 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
798 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
799 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
800 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
801
802 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
803 CPURISCVState *env, uint32_t desc,
804 opivv2_fn *fn, uint32_t esz)
805 {
806 uint32_t vm = vext_vm(desc);
807 uint32_t vl = env->vl;
808 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
809 uint32_t vta = vext_vta(desc);
810 uint32_t i;
811
812 for (i = env->vstart; i < vl; i++) {
813 if (!vm && !vext_elem_mask(v0, i)) {
814 continue;
815 }
816 fn(vd, vs1, vs2, i);
817 }
818 env->vstart = 0;
819 /* set tail elements to 1s */
820 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
821 }
822
823 /* generate the helpers for OPIVV */
824 #define GEN_VEXT_VV(NAME, ESZ) \
825 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
826 void *vs2, CPURISCVState *env, \
827 uint32_t desc) \
828 { \
829 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
830 do_##NAME, ESZ); \
831 }
832
833 GEN_VEXT_VV(vadd_vv_b, 1)
834 GEN_VEXT_VV(vadd_vv_h, 2)
835 GEN_VEXT_VV(vadd_vv_w, 4)
836 GEN_VEXT_VV(vadd_vv_d, 8)
837 GEN_VEXT_VV(vsub_vv_b, 1)
838 GEN_VEXT_VV(vsub_vv_h, 2)
839 GEN_VEXT_VV(vsub_vv_w, 4)
840 GEN_VEXT_VV(vsub_vv_d, 8)
841
842 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
843
844 /*
845 * (T1)s1 gives the real operator type.
846 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
847 */
848 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
849 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
850 { \
851 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
852 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
853 }
854
855 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
856 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
857 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
858 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
859 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
860 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
861 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
862 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
863 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
864 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
865 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
866 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
867
868 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
869 CPURISCVState *env, uint32_t desc,
870 opivx2_fn fn)
871 {
872 uint32_t vm = vext_vm(desc);
873 uint32_t vl = env->vl;
874 uint32_t i;
875
876 for (i = env->vstart; i < vl; i++) {
877 if (!vm && !vext_elem_mask(v0, i)) {
878 continue;
879 }
880 fn(vd, s1, vs2, i);
881 }
882 env->vstart = 0;
883 }
884
885 /* generate the helpers for OPIVX */
886 #define GEN_VEXT_VX(NAME) \
887 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
888 void *vs2, CPURISCVState *env, \
889 uint32_t desc) \
890 { \
891 do_vext_vx(vd, v0, s1, vs2, env, desc, \
892 do_##NAME); \
893 }
894
895 GEN_VEXT_VX(vadd_vx_b)
896 GEN_VEXT_VX(vadd_vx_h)
897 GEN_VEXT_VX(vadd_vx_w)
898 GEN_VEXT_VX(vadd_vx_d)
899 GEN_VEXT_VX(vsub_vx_b)
900 GEN_VEXT_VX(vsub_vx_h)
901 GEN_VEXT_VX(vsub_vx_w)
902 GEN_VEXT_VX(vsub_vx_d)
903 GEN_VEXT_VX(vrsub_vx_b)
904 GEN_VEXT_VX(vrsub_vx_h)
905 GEN_VEXT_VX(vrsub_vx_w)
906 GEN_VEXT_VX(vrsub_vx_d)
907
908 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
909 {
910 intptr_t oprsz = simd_oprsz(desc);
911 intptr_t i;
912
913 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
914 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
915 }
916 }
917
918 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
919 {
920 intptr_t oprsz = simd_oprsz(desc);
921 intptr_t i;
922
923 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
924 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
925 }
926 }
927
928 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
929 {
930 intptr_t oprsz = simd_oprsz(desc);
931 intptr_t i;
932
933 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
934 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
935 }
936 }
937
938 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
939 {
940 intptr_t oprsz = simd_oprsz(desc);
941 intptr_t i;
942
943 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
944 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
945 }
946 }
947
948 /* Vector Widening Integer Add/Subtract */
949 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
950 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
951 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
952 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
953 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
954 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
955 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
956 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
957 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
958 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
959 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
960 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
961 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
962 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
963 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
964 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
965 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
966 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
967 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
968 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
969 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
970 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
971 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
972 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
973 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
974 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
975 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
976 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
977 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
978 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
979 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
980 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
981 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
982 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
983 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
984 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
985 GEN_VEXT_VV(vwaddu_vv_b, 2)
986 GEN_VEXT_VV(vwaddu_vv_h, 4)
987 GEN_VEXT_VV(vwaddu_vv_w, 8)
988 GEN_VEXT_VV(vwsubu_vv_b, 2)
989 GEN_VEXT_VV(vwsubu_vv_h, 4)
990 GEN_VEXT_VV(vwsubu_vv_w, 8)
991 GEN_VEXT_VV(vwadd_vv_b, 2)
992 GEN_VEXT_VV(vwadd_vv_h, 4)
993 GEN_VEXT_VV(vwadd_vv_w, 8)
994 GEN_VEXT_VV(vwsub_vv_b, 2)
995 GEN_VEXT_VV(vwsub_vv_h, 4)
996 GEN_VEXT_VV(vwsub_vv_w, 8)
997 GEN_VEXT_VV(vwaddu_wv_b, 2)
998 GEN_VEXT_VV(vwaddu_wv_h, 4)
999 GEN_VEXT_VV(vwaddu_wv_w, 8)
1000 GEN_VEXT_VV(vwsubu_wv_b, 2)
1001 GEN_VEXT_VV(vwsubu_wv_h, 4)
1002 GEN_VEXT_VV(vwsubu_wv_w, 8)
1003 GEN_VEXT_VV(vwadd_wv_b, 2)
1004 GEN_VEXT_VV(vwadd_wv_h, 4)
1005 GEN_VEXT_VV(vwadd_wv_w, 8)
1006 GEN_VEXT_VV(vwsub_wv_b, 2)
1007 GEN_VEXT_VV(vwsub_wv_h, 4)
1008 GEN_VEXT_VV(vwsub_wv_w, 8)
1009
1010 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1011 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1012 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1013 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1014 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1015 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1016 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1017 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1018 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1019 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1020 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1021 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1022 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1023 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1024 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1025 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1026 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1027 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1028 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1029 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1030 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1031 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1032 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1033 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1034 GEN_VEXT_VX(vwaddu_vx_b)
1035 GEN_VEXT_VX(vwaddu_vx_h)
1036 GEN_VEXT_VX(vwaddu_vx_w)
1037 GEN_VEXT_VX(vwsubu_vx_b)
1038 GEN_VEXT_VX(vwsubu_vx_h)
1039 GEN_VEXT_VX(vwsubu_vx_w)
1040 GEN_VEXT_VX(vwadd_vx_b)
1041 GEN_VEXT_VX(vwadd_vx_h)
1042 GEN_VEXT_VX(vwadd_vx_w)
1043 GEN_VEXT_VX(vwsub_vx_b)
1044 GEN_VEXT_VX(vwsub_vx_h)
1045 GEN_VEXT_VX(vwsub_vx_w)
1046 GEN_VEXT_VX(vwaddu_wx_b)
1047 GEN_VEXT_VX(vwaddu_wx_h)
1048 GEN_VEXT_VX(vwaddu_wx_w)
1049 GEN_VEXT_VX(vwsubu_wx_b)
1050 GEN_VEXT_VX(vwsubu_wx_h)
1051 GEN_VEXT_VX(vwsubu_wx_w)
1052 GEN_VEXT_VX(vwadd_wx_b)
1053 GEN_VEXT_VX(vwadd_wx_h)
1054 GEN_VEXT_VX(vwadd_wx_w)
1055 GEN_VEXT_VX(vwsub_wx_b)
1056 GEN_VEXT_VX(vwsub_wx_h)
1057 GEN_VEXT_VX(vwsub_wx_w)
1058
1059 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1060 #define DO_VADC(N, M, C) (N + M + C)
1061 #define DO_VSBC(N, M, C) (N - M - C)
1062
1063 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1064 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1065 CPURISCVState *env, uint32_t desc) \
1066 { \
1067 uint32_t vl = env->vl; \
1068 uint32_t i; \
1069 \
1070 for (i = env->vstart; i < vl; i++) { \
1071 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1072 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1073 ETYPE carry = vext_elem_mask(v0, i); \
1074 \
1075 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1076 } \
1077 env->vstart = 0; \
1078 }
1079
1080 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1081 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1082 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1083 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1084
1085 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1086 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1087 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1088 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1089
1090 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1091 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1092 CPURISCVState *env, uint32_t desc) \
1093 { \
1094 uint32_t vl = env->vl; \
1095 uint32_t i; \
1096 \
1097 for (i = env->vstart; i < vl; i++) { \
1098 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1099 ETYPE carry = vext_elem_mask(v0, i); \
1100 \
1101 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1102 } \
1103 env->vstart = 0; \
1104 }
1105
1106 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1107 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1108 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1109 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1110
1111 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1112 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1113 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1114 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1115
1116 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1117 (__typeof(N))(N + M) < N)
1118 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1119
1120 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1121 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1122 CPURISCVState *env, uint32_t desc) \
1123 { \
1124 uint32_t vl = env->vl; \
1125 uint32_t vm = vext_vm(desc); \
1126 uint32_t i; \
1127 \
1128 for (i = env->vstart; i < vl; i++) { \
1129 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1130 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1131 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1132 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1133 } \
1134 env->vstart = 0; \
1135 }
1136
1137 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1138 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1139 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1140 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1141
1142 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1143 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1144 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1145 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1146
1147 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1148 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1149 void *vs2, CPURISCVState *env, uint32_t desc) \
1150 { \
1151 uint32_t vl = env->vl; \
1152 uint32_t vm = vext_vm(desc); \
1153 uint32_t i; \
1154 \
1155 for (i = env->vstart; i < vl; i++) { \
1156 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1157 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1158 vext_set_elem_mask(vd, i, \
1159 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1160 } \
1161 env->vstart = 0; \
1162 }
1163
1164 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1165 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1166 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1167 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1168
1169 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1170 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1171 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1172 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1173
1174 /* Vector Bitwise Logical Instructions */
1175 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1176 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1177 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1178 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1179 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1180 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1181 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1182 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1183 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1184 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1185 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1186 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1187 GEN_VEXT_VV(vand_vv_b, 1)
1188 GEN_VEXT_VV(vand_vv_h, 2)
1189 GEN_VEXT_VV(vand_vv_w, 4)
1190 GEN_VEXT_VV(vand_vv_d, 8)
1191 GEN_VEXT_VV(vor_vv_b, 1)
1192 GEN_VEXT_VV(vor_vv_h, 2)
1193 GEN_VEXT_VV(vor_vv_w, 4)
1194 GEN_VEXT_VV(vor_vv_d, 8)
1195 GEN_VEXT_VV(vxor_vv_b, 1)
1196 GEN_VEXT_VV(vxor_vv_h, 2)
1197 GEN_VEXT_VV(vxor_vv_w, 4)
1198 GEN_VEXT_VV(vxor_vv_d, 8)
1199
1200 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1201 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1202 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1203 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1204 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1205 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1206 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1207 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1208 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1209 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1210 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1211 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1212 GEN_VEXT_VX(vand_vx_b)
1213 GEN_VEXT_VX(vand_vx_h)
1214 GEN_VEXT_VX(vand_vx_w)
1215 GEN_VEXT_VX(vand_vx_d)
1216 GEN_VEXT_VX(vor_vx_b)
1217 GEN_VEXT_VX(vor_vx_h)
1218 GEN_VEXT_VX(vor_vx_w)
1219 GEN_VEXT_VX(vor_vx_d)
1220 GEN_VEXT_VX(vxor_vx_b)
1221 GEN_VEXT_VX(vxor_vx_h)
1222 GEN_VEXT_VX(vxor_vx_w)
1223 GEN_VEXT_VX(vxor_vx_d)
1224
1225 /* Vector Single-Width Bit Shift Instructions */
1226 #define DO_SLL(N, M) (N << (M))
1227 #define DO_SRL(N, M) (N >> (M))
1228
1229 /* generate the helpers for shift instructions with two vector operators */
1230 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1231 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1232 void *vs2, CPURISCVState *env, uint32_t desc) \
1233 { \
1234 uint32_t vm = vext_vm(desc); \
1235 uint32_t vl = env->vl; \
1236 uint32_t i; \
1237 \
1238 for (i = env->vstart; i < vl; i++) { \
1239 if (!vm && !vext_elem_mask(v0, i)) { \
1240 continue; \
1241 } \
1242 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1243 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1244 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1245 } \
1246 env->vstart = 0; \
1247 }
1248
1249 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1250 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1251 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1252 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1253
1254 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1255 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1256 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1257 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1258
1259 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1260 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1261 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1262 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1263
1264 /* generate the helpers for shift instructions with one vector and one scalar */
1265 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1266 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1267 void *vs2, CPURISCVState *env, uint32_t desc) \
1268 { \
1269 uint32_t vm = vext_vm(desc); \
1270 uint32_t vl = env->vl; \
1271 uint32_t i; \
1272 \
1273 for (i = env->vstart; i < vl; i++) { \
1274 if (!vm && !vext_elem_mask(v0, i)) { \
1275 continue; \
1276 } \
1277 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1278 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1279 } \
1280 env->vstart = 0; \
1281 }
1282
1283 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1284 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1285 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1286 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1287
1288 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1289 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1290 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1291 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1292
1293 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1294 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1295 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1296 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1297
1298 /* Vector Narrowing Integer Right Shift Instructions */
1299 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1300 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1301 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1302 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1303 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1304 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1305 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1306 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1307 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1308 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1309 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1310 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1311
1312 /* Vector Integer Comparison Instructions */
1313 #define DO_MSEQ(N, M) (N == M)
1314 #define DO_MSNE(N, M) (N != M)
1315 #define DO_MSLT(N, M) (N < M)
1316 #define DO_MSLE(N, M) (N <= M)
1317 #define DO_MSGT(N, M) (N > M)
1318
1319 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1320 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1321 CPURISCVState *env, uint32_t desc) \
1322 { \
1323 uint32_t vm = vext_vm(desc); \
1324 uint32_t vl = env->vl; \
1325 uint32_t i; \
1326 \
1327 for (i = env->vstart; i < vl; i++) { \
1328 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1329 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1330 if (!vm && !vext_elem_mask(v0, i)) { \
1331 continue; \
1332 } \
1333 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1334 } \
1335 env->vstart = 0; \
1336 }
1337
1338 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1339 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1340 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1341 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1342
1343 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1344 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1345 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1346 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1347
1348 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1349 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1350 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1351 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1352
1353 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1354 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1355 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1356 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1357
1358 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1359 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1360 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1361 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1362
1363 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1364 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1365 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1366 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1367
1368 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1369 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1370 CPURISCVState *env, uint32_t desc) \
1371 { \
1372 uint32_t vm = vext_vm(desc); \
1373 uint32_t vl = env->vl; \
1374 uint32_t i; \
1375 \
1376 for (i = env->vstart; i < vl; i++) { \
1377 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1378 if (!vm && !vext_elem_mask(v0, i)) { \
1379 continue; \
1380 } \
1381 vext_set_elem_mask(vd, i, \
1382 DO_OP(s2, (ETYPE)(target_long)s1)); \
1383 } \
1384 env->vstart = 0; \
1385 }
1386
1387 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1388 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1389 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1390 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1391
1392 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1393 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1394 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1395 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1396
1397 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1398 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1399 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1400 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1401
1402 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1403 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1404 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1405 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1406
1407 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1408 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1409 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1410 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1411
1412 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1413 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1414 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1415 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1416
1417 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1418 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1419 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1420 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1421
1422 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1423 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1424 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1425 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1426
1427 /* Vector Integer Min/Max Instructions */
1428 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1429 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1430 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1431 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1432 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1433 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1434 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1435 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1436 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1437 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1438 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1439 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1440 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1441 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1442 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1443 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1444 GEN_VEXT_VV(vminu_vv_b, 1)
1445 GEN_VEXT_VV(vminu_vv_h, 2)
1446 GEN_VEXT_VV(vminu_vv_w, 4)
1447 GEN_VEXT_VV(vminu_vv_d, 8)
1448 GEN_VEXT_VV(vmin_vv_b, 1)
1449 GEN_VEXT_VV(vmin_vv_h, 2)
1450 GEN_VEXT_VV(vmin_vv_w, 4)
1451 GEN_VEXT_VV(vmin_vv_d, 8)
1452 GEN_VEXT_VV(vmaxu_vv_b, 1)
1453 GEN_VEXT_VV(vmaxu_vv_h, 2)
1454 GEN_VEXT_VV(vmaxu_vv_w, 4)
1455 GEN_VEXT_VV(vmaxu_vv_d, 8)
1456 GEN_VEXT_VV(vmax_vv_b, 1)
1457 GEN_VEXT_VV(vmax_vv_h, 2)
1458 GEN_VEXT_VV(vmax_vv_w, 4)
1459 GEN_VEXT_VV(vmax_vv_d, 8)
1460
1461 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1462 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1463 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1464 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1465 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1466 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1467 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1468 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1469 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1470 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1471 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1472 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1473 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1474 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1475 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1476 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1477 GEN_VEXT_VX(vminu_vx_b)
1478 GEN_VEXT_VX(vminu_vx_h)
1479 GEN_VEXT_VX(vminu_vx_w)
1480 GEN_VEXT_VX(vminu_vx_d)
1481 GEN_VEXT_VX(vmin_vx_b)
1482 GEN_VEXT_VX(vmin_vx_h)
1483 GEN_VEXT_VX(vmin_vx_w)
1484 GEN_VEXT_VX(vmin_vx_d)
1485 GEN_VEXT_VX(vmaxu_vx_b)
1486 GEN_VEXT_VX(vmaxu_vx_h)
1487 GEN_VEXT_VX(vmaxu_vx_w)
1488 GEN_VEXT_VX(vmaxu_vx_d)
1489 GEN_VEXT_VX(vmax_vx_b)
1490 GEN_VEXT_VX(vmax_vx_h)
1491 GEN_VEXT_VX(vmax_vx_w)
1492 GEN_VEXT_VX(vmax_vx_d)
1493
1494 /* Vector Single-Width Integer Multiply Instructions */
1495 #define DO_MUL(N, M) (N * M)
1496 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1497 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1498 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1499 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1500 GEN_VEXT_VV(vmul_vv_b, 1)
1501 GEN_VEXT_VV(vmul_vv_h, 2)
1502 GEN_VEXT_VV(vmul_vv_w, 4)
1503 GEN_VEXT_VV(vmul_vv_d, 8)
1504
1505 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1506 {
1507 return (int16_t)s2 * (int16_t)s1 >> 8;
1508 }
1509
1510 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1511 {
1512 return (int32_t)s2 * (int32_t)s1 >> 16;
1513 }
1514
1515 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1516 {
1517 return (int64_t)s2 * (int64_t)s1 >> 32;
1518 }
1519
1520 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1521 {
1522 uint64_t hi_64, lo_64;
1523
1524 muls64(&lo_64, &hi_64, s1, s2);
1525 return hi_64;
1526 }
1527
1528 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1529 {
1530 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1531 }
1532
1533 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1534 {
1535 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1536 }
1537
1538 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1539 {
1540 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1541 }
1542
1543 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1544 {
1545 uint64_t hi_64, lo_64;
1546
1547 mulu64(&lo_64, &hi_64, s2, s1);
1548 return hi_64;
1549 }
1550
1551 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1552 {
1553 return (int16_t)s2 * (uint16_t)s1 >> 8;
1554 }
1555
1556 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1557 {
1558 return (int32_t)s2 * (uint32_t)s1 >> 16;
1559 }
1560
1561 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1562 {
1563 return (int64_t)s2 * (uint64_t)s1 >> 32;
1564 }
1565
1566 /*
1567 * Let A = signed operand,
1568 * B = unsigned operand
1569 * P = mulu64(A, B), unsigned product
1570 *
1571 * LET X = 2 ** 64 - A, 2's complement of A
1572 * SP = signed product
1573 * THEN
1574 * IF A < 0
1575 * SP = -X * B
1576 * = -(2 ** 64 - A) * B
1577 * = A * B - 2 ** 64 * B
1578 * = P - 2 ** 64 * B
1579 * ELSE
1580 * SP = P
1581 * THEN
1582 * HI_P -= (A < 0 ? B : 0)
1583 */
1584
1585 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1586 {
1587 uint64_t hi_64, lo_64;
1588
1589 mulu64(&lo_64, &hi_64, s2, s1);
1590
1591 hi_64 -= s2 < 0 ? s1 : 0;
1592 return hi_64;
1593 }
1594
1595 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1596 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1597 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1598 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1599 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1600 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1601 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1602 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1603 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1604 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1605 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1606 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1607 GEN_VEXT_VV(vmulh_vv_b, 1)
1608 GEN_VEXT_VV(vmulh_vv_h, 2)
1609 GEN_VEXT_VV(vmulh_vv_w, 4)
1610 GEN_VEXT_VV(vmulh_vv_d, 8)
1611 GEN_VEXT_VV(vmulhu_vv_b, 1)
1612 GEN_VEXT_VV(vmulhu_vv_h, 2)
1613 GEN_VEXT_VV(vmulhu_vv_w, 4)
1614 GEN_VEXT_VV(vmulhu_vv_d, 8)
1615 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1616 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1617 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1618 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1619
1620 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1621 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1622 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1623 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1624 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1625 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1626 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1627 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1628 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1629 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1630 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1631 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1632 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1633 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1634 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1635 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1636 GEN_VEXT_VX(vmul_vx_b)
1637 GEN_VEXT_VX(vmul_vx_h)
1638 GEN_VEXT_VX(vmul_vx_w)
1639 GEN_VEXT_VX(vmul_vx_d)
1640 GEN_VEXT_VX(vmulh_vx_b)
1641 GEN_VEXT_VX(vmulh_vx_h)
1642 GEN_VEXT_VX(vmulh_vx_w)
1643 GEN_VEXT_VX(vmulh_vx_d)
1644 GEN_VEXT_VX(vmulhu_vx_b)
1645 GEN_VEXT_VX(vmulhu_vx_h)
1646 GEN_VEXT_VX(vmulhu_vx_w)
1647 GEN_VEXT_VX(vmulhu_vx_d)
1648 GEN_VEXT_VX(vmulhsu_vx_b)
1649 GEN_VEXT_VX(vmulhsu_vx_h)
1650 GEN_VEXT_VX(vmulhsu_vx_w)
1651 GEN_VEXT_VX(vmulhsu_vx_d)
1652
1653 /* Vector Integer Divide Instructions */
1654 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1655 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1656 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1657 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1658 #define DO_REM(N, M) (unlikely(M == 0) ? N :\
1659 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1660
1661 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1662 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1663 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1664 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1665 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1666 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1667 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1668 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1669 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1670 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1671 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1672 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1673 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1674 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1675 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1676 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1677 GEN_VEXT_VV(vdivu_vv_b, 1)
1678 GEN_VEXT_VV(vdivu_vv_h, 2)
1679 GEN_VEXT_VV(vdivu_vv_w, 4)
1680 GEN_VEXT_VV(vdivu_vv_d, 8)
1681 GEN_VEXT_VV(vdiv_vv_b, 1)
1682 GEN_VEXT_VV(vdiv_vv_h, 2)
1683 GEN_VEXT_VV(vdiv_vv_w, 4)
1684 GEN_VEXT_VV(vdiv_vv_d, 8)
1685 GEN_VEXT_VV(vremu_vv_b, 1)
1686 GEN_VEXT_VV(vremu_vv_h, 2)
1687 GEN_VEXT_VV(vremu_vv_w, 4)
1688 GEN_VEXT_VV(vremu_vv_d, 8)
1689 GEN_VEXT_VV(vrem_vv_b, 1)
1690 GEN_VEXT_VV(vrem_vv_h, 2)
1691 GEN_VEXT_VV(vrem_vv_w, 4)
1692 GEN_VEXT_VV(vrem_vv_d, 8)
1693
1694 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1695 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1696 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1697 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1698 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1699 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1700 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1701 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1702 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1703 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1704 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1705 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1706 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1707 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1708 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1709 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1710 GEN_VEXT_VX(vdivu_vx_b)
1711 GEN_VEXT_VX(vdivu_vx_h)
1712 GEN_VEXT_VX(vdivu_vx_w)
1713 GEN_VEXT_VX(vdivu_vx_d)
1714 GEN_VEXT_VX(vdiv_vx_b)
1715 GEN_VEXT_VX(vdiv_vx_h)
1716 GEN_VEXT_VX(vdiv_vx_w)
1717 GEN_VEXT_VX(vdiv_vx_d)
1718 GEN_VEXT_VX(vremu_vx_b)
1719 GEN_VEXT_VX(vremu_vx_h)
1720 GEN_VEXT_VX(vremu_vx_w)
1721 GEN_VEXT_VX(vremu_vx_d)
1722 GEN_VEXT_VX(vrem_vx_b)
1723 GEN_VEXT_VX(vrem_vx_h)
1724 GEN_VEXT_VX(vrem_vx_w)
1725 GEN_VEXT_VX(vrem_vx_d)
1726
1727 /* Vector Widening Integer Multiply Instructions */
1728 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1729 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1730 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1731 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1732 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1733 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1734 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1735 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1736 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1737 GEN_VEXT_VV(vwmul_vv_b, 2)
1738 GEN_VEXT_VV(vwmul_vv_h, 4)
1739 GEN_VEXT_VV(vwmul_vv_w, 8)
1740 GEN_VEXT_VV(vwmulu_vv_b, 2)
1741 GEN_VEXT_VV(vwmulu_vv_h, 4)
1742 GEN_VEXT_VV(vwmulu_vv_w, 8)
1743 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1744 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1745 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1746
1747 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1748 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1749 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1750 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1751 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1752 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1753 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1754 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1755 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1756 GEN_VEXT_VX(vwmul_vx_b)
1757 GEN_VEXT_VX(vwmul_vx_h)
1758 GEN_VEXT_VX(vwmul_vx_w)
1759 GEN_VEXT_VX(vwmulu_vx_b)
1760 GEN_VEXT_VX(vwmulu_vx_h)
1761 GEN_VEXT_VX(vwmulu_vx_w)
1762 GEN_VEXT_VX(vwmulsu_vx_b)
1763 GEN_VEXT_VX(vwmulsu_vx_h)
1764 GEN_VEXT_VX(vwmulsu_vx_w)
1765
1766 /* Vector Single-Width Integer Multiply-Add Instructions */
1767 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1768 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1769 { \
1770 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1771 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1772 TD d = *((TD *)vd + HD(i)); \
1773 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1774 }
1775
1776 #define DO_MACC(N, M, D) (M * N + D)
1777 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1778 #define DO_MADD(N, M, D) (M * D + N)
1779 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1780 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1781 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1782 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1783 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1784 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1785 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1786 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1787 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1788 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1789 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1790 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1791 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1792 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1793 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1794 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1795 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1796 GEN_VEXT_VV(vmacc_vv_b, 1)
1797 GEN_VEXT_VV(vmacc_vv_h, 2)
1798 GEN_VEXT_VV(vmacc_vv_w, 4)
1799 GEN_VEXT_VV(vmacc_vv_d, 8)
1800 GEN_VEXT_VV(vnmsac_vv_b, 1)
1801 GEN_VEXT_VV(vnmsac_vv_h, 2)
1802 GEN_VEXT_VV(vnmsac_vv_w, 4)
1803 GEN_VEXT_VV(vnmsac_vv_d, 8)
1804 GEN_VEXT_VV(vmadd_vv_b, 1)
1805 GEN_VEXT_VV(vmadd_vv_h, 2)
1806 GEN_VEXT_VV(vmadd_vv_w, 4)
1807 GEN_VEXT_VV(vmadd_vv_d, 8)
1808 GEN_VEXT_VV(vnmsub_vv_b, 1)
1809 GEN_VEXT_VV(vnmsub_vv_h, 2)
1810 GEN_VEXT_VV(vnmsub_vv_w, 4)
1811 GEN_VEXT_VV(vnmsub_vv_d, 8)
1812
1813 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1814 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1815 { \
1816 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1817 TD d = *((TD *)vd + HD(i)); \
1818 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1819 }
1820
1821 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1822 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1823 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1824 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1825 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1826 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1827 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1828 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1829 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1830 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1831 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1832 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1833 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1834 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1835 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1836 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1837 GEN_VEXT_VX(vmacc_vx_b)
1838 GEN_VEXT_VX(vmacc_vx_h)
1839 GEN_VEXT_VX(vmacc_vx_w)
1840 GEN_VEXT_VX(vmacc_vx_d)
1841 GEN_VEXT_VX(vnmsac_vx_b)
1842 GEN_VEXT_VX(vnmsac_vx_h)
1843 GEN_VEXT_VX(vnmsac_vx_w)
1844 GEN_VEXT_VX(vnmsac_vx_d)
1845 GEN_VEXT_VX(vmadd_vx_b)
1846 GEN_VEXT_VX(vmadd_vx_h)
1847 GEN_VEXT_VX(vmadd_vx_w)
1848 GEN_VEXT_VX(vmadd_vx_d)
1849 GEN_VEXT_VX(vnmsub_vx_b)
1850 GEN_VEXT_VX(vnmsub_vx_h)
1851 GEN_VEXT_VX(vnmsub_vx_w)
1852 GEN_VEXT_VX(vnmsub_vx_d)
1853
1854 /* Vector Widening Integer Multiply-Add Instructions */
1855 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1856 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1857 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1858 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1859 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1860 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1861 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1862 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1863 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1864 GEN_VEXT_VV(vwmaccu_vv_b, 2)
1865 GEN_VEXT_VV(vwmaccu_vv_h, 4)
1866 GEN_VEXT_VV(vwmaccu_vv_w, 8)
1867 GEN_VEXT_VV(vwmacc_vv_b, 2)
1868 GEN_VEXT_VV(vwmacc_vv_h, 4)
1869 GEN_VEXT_VV(vwmacc_vv_w, 8)
1870 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1871 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1872 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
1873
1874 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1875 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1876 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1877 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1878 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1879 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1880 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1881 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1882 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1883 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1884 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1885 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1886 GEN_VEXT_VX(vwmaccu_vx_b)
1887 GEN_VEXT_VX(vwmaccu_vx_h)
1888 GEN_VEXT_VX(vwmaccu_vx_w)
1889 GEN_VEXT_VX(vwmacc_vx_b)
1890 GEN_VEXT_VX(vwmacc_vx_h)
1891 GEN_VEXT_VX(vwmacc_vx_w)
1892 GEN_VEXT_VX(vwmaccsu_vx_b)
1893 GEN_VEXT_VX(vwmaccsu_vx_h)
1894 GEN_VEXT_VX(vwmaccsu_vx_w)
1895 GEN_VEXT_VX(vwmaccus_vx_b)
1896 GEN_VEXT_VX(vwmaccus_vx_h)
1897 GEN_VEXT_VX(vwmaccus_vx_w)
1898
1899 /* Vector Integer Merge and Move Instructions */
1900 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
1901 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1902 uint32_t desc) \
1903 { \
1904 uint32_t vl = env->vl; \
1905 uint32_t i; \
1906 \
1907 for (i = env->vstart; i < vl; i++) { \
1908 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1909 *((ETYPE *)vd + H(i)) = s1; \
1910 } \
1911 env->vstart = 0; \
1912 }
1913
1914 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1915 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1916 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1917 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1918
1919 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
1920 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1921 uint32_t desc) \
1922 { \
1923 uint32_t vl = env->vl; \
1924 uint32_t i; \
1925 \
1926 for (i = env->vstart; i < vl; i++) { \
1927 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1928 } \
1929 env->vstart = 0; \
1930 }
1931
1932 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1933 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1934 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1935 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1936
1937 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
1938 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1939 CPURISCVState *env, uint32_t desc) \
1940 { \
1941 uint32_t vl = env->vl; \
1942 uint32_t i; \
1943 \
1944 for (i = env->vstart; i < vl; i++) { \
1945 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
1946 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1947 } \
1948 env->vstart = 0; \
1949 }
1950
1951 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1952 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1953 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1954 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1955
1956 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
1957 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1958 void *vs2, CPURISCVState *env, uint32_t desc) \
1959 { \
1960 uint32_t vl = env->vl; \
1961 uint32_t i; \
1962 \
1963 for (i = env->vstart; i < vl; i++) { \
1964 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1965 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
1966 (ETYPE)(target_long)s1); \
1967 *((ETYPE *)vd + H(i)) = d; \
1968 } \
1969 env->vstart = 0; \
1970 }
1971
1972 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1973 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1974 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1975 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1976
1977 /*
1978 *** Vector Fixed-Point Arithmetic Instructions
1979 */
1980
1981 /* Vector Single-Width Saturating Add and Subtract */
1982
1983 /*
1984 * As fixed point instructions probably have round mode and saturation,
1985 * define common macros for fixed point here.
1986 */
1987 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1988 CPURISCVState *env, int vxrm);
1989
1990 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1991 static inline void \
1992 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1993 CPURISCVState *env, int vxrm) \
1994 { \
1995 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1996 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1997 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1998 }
1999
2000 static inline void
2001 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2002 CPURISCVState *env,
2003 uint32_t vl, uint32_t vm, int vxrm,
2004 opivv2_rm_fn *fn)
2005 {
2006 for (uint32_t i = env->vstart; i < vl; i++) {
2007 if (!vm && !vext_elem_mask(v0, i)) {
2008 continue;
2009 }
2010 fn(vd, vs1, vs2, i, env, vxrm);
2011 }
2012 env->vstart = 0;
2013 }
2014
2015 static inline void
2016 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2017 CPURISCVState *env,
2018 uint32_t desc,
2019 opivv2_rm_fn *fn)
2020 {
2021 uint32_t vm = vext_vm(desc);
2022 uint32_t vl = env->vl;
2023
2024 switch (env->vxrm) {
2025 case 0: /* rnu */
2026 vext_vv_rm_1(vd, v0, vs1, vs2,
2027 env, vl, vm, 0, fn);
2028 break;
2029 case 1: /* rne */
2030 vext_vv_rm_1(vd, v0, vs1, vs2,
2031 env, vl, vm, 1, fn);
2032 break;
2033 case 2: /* rdn */
2034 vext_vv_rm_1(vd, v0, vs1, vs2,
2035 env, vl, vm, 2, fn);
2036 break;
2037 default: /* rod */
2038 vext_vv_rm_1(vd, v0, vs1, vs2,
2039 env, vl, vm, 3, fn);
2040 break;
2041 }
2042 }
2043
2044 /* generate helpers for fixed point instructions with OPIVV format */
2045 #define GEN_VEXT_VV_RM(NAME) \
2046 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2047 CPURISCVState *env, uint32_t desc) \
2048 { \
2049 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2050 do_##NAME); \
2051 }
2052
2053 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2054 {
2055 uint8_t res = a + b;
2056 if (res < a) {
2057 res = UINT8_MAX;
2058 env->vxsat = 0x1;
2059 }
2060 return res;
2061 }
2062
2063 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2064 uint16_t b)
2065 {
2066 uint16_t res = a + b;
2067 if (res < a) {
2068 res = UINT16_MAX;
2069 env->vxsat = 0x1;
2070 }
2071 return res;
2072 }
2073
2074 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2075 uint32_t b)
2076 {
2077 uint32_t res = a + b;
2078 if (res < a) {
2079 res = UINT32_MAX;
2080 env->vxsat = 0x1;
2081 }
2082 return res;
2083 }
2084
2085 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2086 uint64_t b)
2087 {
2088 uint64_t res = a + b;
2089 if (res < a) {
2090 res = UINT64_MAX;
2091 env->vxsat = 0x1;
2092 }
2093 return res;
2094 }
2095
2096 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2097 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2098 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2099 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2100 GEN_VEXT_VV_RM(vsaddu_vv_b)
2101 GEN_VEXT_VV_RM(vsaddu_vv_h)
2102 GEN_VEXT_VV_RM(vsaddu_vv_w)
2103 GEN_VEXT_VV_RM(vsaddu_vv_d)
2104
2105 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2106 CPURISCVState *env, int vxrm);
2107
2108 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2109 static inline void \
2110 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2111 CPURISCVState *env, int vxrm) \
2112 { \
2113 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2114 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2115 }
2116
2117 static inline void
2118 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2119 CPURISCVState *env,
2120 uint32_t vl, uint32_t vm, int vxrm,
2121 opivx2_rm_fn *fn)
2122 {
2123 for (uint32_t i = env->vstart; i < vl; i++) {
2124 if (!vm && !vext_elem_mask(v0, i)) {
2125 continue;
2126 }
2127 fn(vd, s1, vs2, i, env, vxrm);
2128 }
2129 env->vstart = 0;
2130 }
2131
2132 static inline void
2133 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2134 CPURISCVState *env,
2135 uint32_t desc,
2136 opivx2_rm_fn *fn)
2137 {
2138 uint32_t vm = vext_vm(desc);
2139 uint32_t vl = env->vl;
2140
2141 switch (env->vxrm) {
2142 case 0: /* rnu */
2143 vext_vx_rm_1(vd, v0, s1, vs2,
2144 env, vl, vm, 0, fn);
2145 break;
2146 case 1: /* rne */
2147 vext_vx_rm_1(vd, v0, s1, vs2,
2148 env, vl, vm, 1, fn);
2149 break;
2150 case 2: /* rdn */
2151 vext_vx_rm_1(vd, v0, s1, vs2,
2152 env, vl, vm, 2, fn);
2153 break;
2154 default: /* rod */
2155 vext_vx_rm_1(vd, v0, s1, vs2,
2156 env, vl, vm, 3, fn);
2157 break;
2158 }
2159 }
2160
2161 /* generate helpers for fixed point instructions with OPIVX format */
2162 #define GEN_VEXT_VX_RM(NAME) \
2163 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2164 void *vs2, CPURISCVState *env, uint32_t desc) \
2165 { \
2166 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2167 do_##NAME); \
2168 }
2169
2170 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2171 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2172 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2173 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2174 GEN_VEXT_VX_RM(vsaddu_vx_b)
2175 GEN_VEXT_VX_RM(vsaddu_vx_h)
2176 GEN_VEXT_VX_RM(vsaddu_vx_w)
2177 GEN_VEXT_VX_RM(vsaddu_vx_d)
2178
2179 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2180 {
2181 int8_t res = a + b;
2182 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2183 res = a > 0 ? INT8_MAX : INT8_MIN;
2184 env->vxsat = 0x1;
2185 }
2186 return res;
2187 }
2188
2189 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2190 {
2191 int16_t res = a + b;
2192 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2193 res = a > 0 ? INT16_MAX : INT16_MIN;
2194 env->vxsat = 0x1;
2195 }
2196 return res;
2197 }
2198
2199 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2200 {
2201 int32_t res = a + b;
2202 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2203 res = a > 0 ? INT32_MAX : INT32_MIN;
2204 env->vxsat = 0x1;
2205 }
2206 return res;
2207 }
2208
2209 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2210 {
2211 int64_t res = a + b;
2212 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2213 res = a > 0 ? INT64_MAX : INT64_MIN;
2214 env->vxsat = 0x1;
2215 }
2216 return res;
2217 }
2218
2219 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2220 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2221 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2222 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2223 GEN_VEXT_VV_RM(vsadd_vv_b)
2224 GEN_VEXT_VV_RM(vsadd_vv_h)
2225 GEN_VEXT_VV_RM(vsadd_vv_w)
2226 GEN_VEXT_VV_RM(vsadd_vv_d)
2227
2228 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2229 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2230 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2231 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2232 GEN_VEXT_VX_RM(vsadd_vx_b)
2233 GEN_VEXT_VX_RM(vsadd_vx_h)
2234 GEN_VEXT_VX_RM(vsadd_vx_w)
2235 GEN_VEXT_VX_RM(vsadd_vx_d)
2236
2237 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2238 {
2239 uint8_t res = a - b;
2240 if (res > a) {
2241 res = 0;
2242 env->vxsat = 0x1;
2243 }
2244 return res;
2245 }
2246
2247 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2248 uint16_t b)
2249 {
2250 uint16_t res = a - b;
2251 if (res > a) {
2252 res = 0;
2253 env->vxsat = 0x1;
2254 }
2255 return res;
2256 }
2257
2258 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2259 uint32_t b)
2260 {
2261 uint32_t res = a - b;
2262 if (res > a) {
2263 res = 0;
2264 env->vxsat = 0x1;
2265 }
2266 return res;
2267 }
2268
2269 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2270 uint64_t b)
2271 {
2272 uint64_t res = a - b;
2273 if (res > a) {
2274 res = 0;
2275 env->vxsat = 0x1;
2276 }
2277 return res;
2278 }
2279
2280 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2281 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2282 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2283 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2284 GEN_VEXT_VV_RM(vssubu_vv_b)
2285 GEN_VEXT_VV_RM(vssubu_vv_h)
2286 GEN_VEXT_VV_RM(vssubu_vv_w)
2287 GEN_VEXT_VV_RM(vssubu_vv_d)
2288
2289 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2290 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2291 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2292 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2293 GEN_VEXT_VX_RM(vssubu_vx_b)
2294 GEN_VEXT_VX_RM(vssubu_vx_h)
2295 GEN_VEXT_VX_RM(vssubu_vx_w)
2296 GEN_VEXT_VX_RM(vssubu_vx_d)
2297
2298 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2299 {
2300 int8_t res = a - b;
2301 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2302 res = a >= 0 ? INT8_MAX : INT8_MIN;
2303 env->vxsat = 0x1;
2304 }
2305 return res;
2306 }
2307
2308 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2309 {
2310 int16_t res = a - b;
2311 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2312 res = a >= 0 ? INT16_MAX : INT16_MIN;
2313 env->vxsat = 0x1;
2314 }
2315 return res;
2316 }
2317
2318 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2319 {
2320 int32_t res = a - b;
2321 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2322 res = a >= 0 ? INT32_MAX : INT32_MIN;
2323 env->vxsat = 0x1;
2324 }
2325 return res;
2326 }
2327
2328 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2329 {
2330 int64_t res = a - b;
2331 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2332 res = a >= 0 ? INT64_MAX : INT64_MIN;
2333 env->vxsat = 0x1;
2334 }
2335 return res;
2336 }
2337
2338 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2339 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2340 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2341 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2342 GEN_VEXT_VV_RM(vssub_vv_b)
2343 GEN_VEXT_VV_RM(vssub_vv_h)
2344 GEN_VEXT_VV_RM(vssub_vv_w)
2345 GEN_VEXT_VV_RM(vssub_vv_d)
2346
2347 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2348 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2349 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2350 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2351 GEN_VEXT_VX_RM(vssub_vx_b)
2352 GEN_VEXT_VX_RM(vssub_vx_h)
2353 GEN_VEXT_VX_RM(vssub_vx_w)
2354 GEN_VEXT_VX_RM(vssub_vx_d)
2355
2356 /* Vector Single-Width Averaging Add and Subtract */
2357 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2358 {
2359 uint8_t d = extract64(v, shift, 1);
2360 uint8_t d1;
2361 uint64_t D1, D2;
2362
2363 if (shift == 0 || shift > 64) {
2364 return 0;
2365 }
2366
2367 d1 = extract64(v, shift - 1, 1);
2368 D1 = extract64(v, 0, shift);
2369 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2370 return d1;
2371 } else if (vxrm == 1) { /* round-to-nearest-even */
2372 if (shift > 1) {
2373 D2 = extract64(v, 0, shift - 1);
2374 return d1 & ((D2 != 0) | d);
2375 } else {
2376 return d1 & d;
2377 }
2378 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2379 return !d & (D1 != 0);
2380 }
2381 return 0; /* round-down (truncate) */
2382 }
2383
2384 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2385 {
2386 int64_t res = (int64_t)a + b;
2387 uint8_t round = get_round(vxrm, res, 1);
2388
2389 return (res >> 1) + round;
2390 }
2391
2392 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2393 {
2394 int64_t res = a + b;
2395 uint8_t round = get_round(vxrm, res, 1);
2396 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2397
2398 /* With signed overflow, bit 64 is inverse of bit 63. */
2399 return ((res >> 1) ^ over) + round;
2400 }
2401
2402 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2403 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2404 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2405 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2406 GEN_VEXT_VV_RM(vaadd_vv_b)
2407 GEN_VEXT_VV_RM(vaadd_vv_h)
2408 GEN_VEXT_VV_RM(vaadd_vv_w)
2409 GEN_VEXT_VV_RM(vaadd_vv_d)
2410
2411 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2412 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2413 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2414 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2415 GEN_VEXT_VX_RM(vaadd_vx_b)
2416 GEN_VEXT_VX_RM(vaadd_vx_h)
2417 GEN_VEXT_VX_RM(vaadd_vx_w)
2418 GEN_VEXT_VX_RM(vaadd_vx_d)
2419
2420 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2421 uint32_t a, uint32_t b)
2422 {
2423 uint64_t res = (uint64_t)a + b;
2424 uint8_t round = get_round(vxrm, res, 1);
2425
2426 return (res >> 1) + round;
2427 }
2428
2429 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2430 uint64_t a, uint64_t b)
2431 {
2432 uint64_t res = a + b;
2433 uint8_t round = get_round(vxrm, res, 1);
2434 uint64_t over = (uint64_t)(res < a) << 63;
2435
2436 return ((res >> 1) | over) + round;
2437 }
2438
2439 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2440 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2441 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2442 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2443 GEN_VEXT_VV_RM(vaaddu_vv_b)
2444 GEN_VEXT_VV_RM(vaaddu_vv_h)
2445 GEN_VEXT_VV_RM(vaaddu_vv_w)
2446 GEN_VEXT_VV_RM(vaaddu_vv_d)
2447
2448 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2449 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2450 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2451 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2452 GEN_VEXT_VX_RM(vaaddu_vx_b)
2453 GEN_VEXT_VX_RM(vaaddu_vx_h)
2454 GEN_VEXT_VX_RM(vaaddu_vx_w)
2455 GEN_VEXT_VX_RM(vaaddu_vx_d)
2456
2457 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2458 {
2459 int64_t res = (int64_t)a - b;
2460 uint8_t round = get_round(vxrm, res, 1);
2461
2462 return (res >> 1) + round;
2463 }
2464
2465 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2466 {
2467 int64_t res = (int64_t)a - b;
2468 uint8_t round = get_round(vxrm, res, 1);
2469 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2470
2471 /* With signed overflow, bit 64 is inverse of bit 63. */
2472 return ((res >> 1) ^ over) + round;
2473 }
2474
2475 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2476 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2477 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2478 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2479 GEN_VEXT_VV_RM(vasub_vv_b)
2480 GEN_VEXT_VV_RM(vasub_vv_h)
2481 GEN_VEXT_VV_RM(vasub_vv_w)
2482 GEN_VEXT_VV_RM(vasub_vv_d)
2483
2484 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2485 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2486 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2487 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2488 GEN_VEXT_VX_RM(vasub_vx_b)
2489 GEN_VEXT_VX_RM(vasub_vx_h)
2490 GEN_VEXT_VX_RM(vasub_vx_w)
2491 GEN_VEXT_VX_RM(vasub_vx_d)
2492
2493 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2494 uint32_t a, uint32_t b)
2495 {
2496 int64_t res = (int64_t)a - b;
2497 uint8_t round = get_round(vxrm, res, 1);
2498
2499 return (res >> 1) + round;
2500 }
2501
2502 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2503 uint64_t a, uint64_t b)
2504 {
2505 uint64_t res = (uint64_t)a - b;
2506 uint8_t round = get_round(vxrm, res, 1);
2507 uint64_t over = (uint64_t)(res > a) << 63;
2508
2509 return ((res >> 1) | over) + round;
2510 }
2511
2512 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2513 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2514 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2515 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2516 GEN_VEXT_VV_RM(vasubu_vv_b)
2517 GEN_VEXT_VV_RM(vasubu_vv_h)
2518 GEN_VEXT_VV_RM(vasubu_vv_w)
2519 GEN_VEXT_VV_RM(vasubu_vv_d)
2520
2521 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2522 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2523 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2524 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2525 GEN_VEXT_VX_RM(vasubu_vx_b)
2526 GEN_VEXT_VX_RM(vasubu_vx_h)
2527 GEN_VEXT_VX_RM(vasubu_vx_w)
2528 GEN_VEXT_VX_RM(vasubu_vx_d)
2529
2530 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2531 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2532 {
2533 uint8_t round;
2534 int16_t res;
2535
2536 res = (int16_t)a * (int16_t)b;
2537 round = get_round(vxrm, res, 7);
2538 res = (res >> 7) + round;
2539
2540 if (res > INT8_MAX) {
2541 env->vxsat = 0x1;
2542 return INT8_MAX;
2543 } else if (res < INT8_MIN) {
2544 env->vxsat = 0x1;
2545 return INT8_MIN;
2546 } else {
2547 return res;
2548 }
2549 }
2550
2551 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2552 {
2553 uint8_t round;
2554 int32_t res;
2555
2556 res = (int32_t)a * (int32_t)b;
2557 round = get_round(vxrm, res, 15);
2558 res = (res >> 15) + round;
2559
2560 if (res > INT16_MAX) {
2561 env->vxsat = 0x1;
2562 return INT16_MAX;
2563 } else if (res < INT16_MIN) {
2564 env->vxsat = 0x1;
2565 return INT16_MIN;
2566 } else {
2567 return res;
2568 }
2569 }
2570
2571 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2572 {
2573 uint8_t round;
2574 int64_t res;
2575
2576 res = (int64_t)a * (int64_t)b;
2577 round = get_round(vxrm, res, 31);
2578 res = (res >> 31) + round;
2579
2580 if (res > INT32_MAX) {
2581 env->vxsat = 0x1;
2582 return INT32_MAX;
2583 } else if (res < INT32_MIN) {
2584 env->vxsat = 0x1;
2585 return INT32_MIN;
2586 } else {
2587 return res;
2588 }
2589 }
2590
2591 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2592 {
2593 uint8_t round;
2594 uint64_t hi_64, lo_64;
2595 int64_t res;
2596
2597 if (a == INT64_MIN && b == INT64_MIN) {
2598 env->vxsat = 1;
2599 return INT64_MAX;
2600 }
2601
2602 muls64(&lo_64, &hi_64, a, b);
2603 round = get_round(vxrm, lo_64, 63);
2604 /*
2605 * Cannot overflow, as there are always
2606 * 2 sign bits after multiply.
2607 */
2608 res = (hi_64 << 1) | (lo_64 >> 63);
2609 if (round) {
2610 if (res == INT64_MAX) {
2611 env->vxsat = 1;
2612 } else {
2613 res += 1;
2614 }
2615 }
2616 return res;
2617 }
2618
2619 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2620 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2621 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2622 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2623 GEN_VEXT_VV_RM(vsmul_vv_b)
2624 GEN_VEXT_VV_RM(vsmul_vv_h)
2625 GEN_VEXT_VV_RM(vsmul_vv_w)
2626 GEN_VEXT_VV_RM(vsmul_vv_d)
2627
2628 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2629 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2630 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2631 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2632 GEN_VEXT_VX_RM(vsmul_vx_b)
2633 GEN_VEXT_VX_RM(vsmul_vx_h)
2634 GEN_VEXT_VX_RM(vsmul_vx_w)
2635 GEN_VEXT_VX_RM(vsmul_vx_d)
2636
2637 /* Vector Single-Width Scaling Shift Instructions */
2638 static inline uint8_t
2639 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2640 {
2641 uint8_t round, shift = b & 0x7;
2642 uint8_t res;
2643
2644 round = get_round(vxrm, a, shift);
2645 res = (a >> shift) + round;
2646 return res;
2647 }
2648 static inline uint16_t
2649 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2650 {
2651 uint8_t round, shift = b & 0xf;
2652 uint16_t res;
2653
2654 round = get_round(vxrm, a, shift);
2655 res = (a >> shift) + round;
2656 return res;
2657 }
2658 static inline uint32_t
2659 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2660 {
2661 uint8_t round, shift = b & 0x1f;
2662 uint32_t res;
2663
2664 round = get_round(vxrm, a, shift);
2665 res = (a >> shift) + round;
2666 return res;
2667 }
2668 static inline uint64_t
2669 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2670 {
2671 uint8_t round, shift = b & 0x3f;
2672 uint64_t res;
2673
2674 round = get_round(vxrm, a, shift);
2675 res = (a >> shift) + round;
2676 return res;
2677 }
2678 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2679 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2680 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2681 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2682 GEN_VEXT_VV_RM(vssrl_vv_b)
2683 GEN_VEXT_VV_RM(vssrl_vv_h)
2684 GEN_VEXT_VV_RM(vssrl_vv_w)
2685 GEN_VEXT_VV_RM(vssrl_vv_d)
2686
2687 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2688 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2689 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2690 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2691 GEN_VEXT_VX_RM(vssrl_vx_b)
2692 GEN_VEXT_VX_RM(vssrl_vx_h)
2693 GEN_VEXT_VX_RM(vssrl_vx_w)
2694 GEN_VEXT_VX_RM(vssrl_vx_d)
2695
2696 static inline int8_t
2697 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2698 {
2699 uint8_t round, shift = b & 0x7;
2700 int8_t res;
2701
2702 round = get_round(vxrm, a, shift);
2703 res = (a >> shift) + round;
2704 return res;
2705 }
2706 static inline int16_t
2707 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2708 {
2709 uint8_t round, shift = b & 0xf;
2710 int16_t res;
2711
2712 round = get_round(vxrm, a, shift);
2713 res = (a >> shift) + round;
2714 return res;
2715 }
2716 static inline int32_t
2717 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2718 {
2719 uint8_t round, shift = b & 0x1f;
2720 int32_t res;
2721
2722 round = get_round(vxrm, a, shift);
2723 res = (a >> shift) + round;
2724 return res;
2725 }
2726 static inline int64_t
2727 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2728 {
2729 uint8_t round, shift = b & 0x3f;
2730 int64_t res;
2731
2732 round = get_round(vxrm, a, shift);
2733 res = (a >> shift) + round;
2734 return res;
2735 }
2736
2737 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2738 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2739 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2740 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2741 GEN_VEXT_VV_RM(vssra_vv_b)
2742 GEN_VEXT_VV_RM(vssra_vv_h)
2743 GEN_VEXT_VV_RM(vssra_vv_w)
2744 GEN_VEXT_VV_RM(vssra_vv_d)
2745
2746 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2747 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2748 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2749 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2750 GEN_VEXT_VX_RM(vssra_vx_b)
2751 GEN_VEXT_VX_RM(vssra_vx_h)
2752 GEN_VEXT_VX_RM(vssra_vx_w)
2753 GEN_VEXT_VX_RM(vssra_vx_d)
2754
2755 /* Vector Narrowing Fixed-Point Clip Instructions */
2756 static inline int8_t
2757 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2758 {
2759 uint8_t round, shift = b & 0xf;
2760 int16_t res;
2761
2762 round = get_round(vxrm, a, shift);
2763 res = (a >> shift) + round;
2764 if (res > INT8_MAX) {
2765 env->vxsat = 0x1;
2766 return INT8_MAX;
2767 } else if (res < INT8_MIN) {
2768 env->vxsat = 0x1;
2769 return INT8_MIN;
2770 } else {
2771 return res;
2772 }
2773 }
2774
2775 static inline int16_t
2776 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2777 {
2778 uint8_t round, shift = b & 0x1f;
2779 int32_t res;
2780
2781 round = get_round(vxrm, a, shift);
2782 res = (a >> shift) + round;
2783 if (res > INT16_MAX) {
2784 env->vxsat = 0x1;
2785 return INT16_MAX;
2786 } else if (res < INT16_MIN) {
2787 env->vxsat = 0x1;
2788 return INT16_MIN;
2789 } else {
2790 return res;
2791 }
2792 }
2793
2794 static inline int32_t
2795 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2796 {
2797 uint8_t round, shift = b & 0x3f;
2798 int64_t res;
2799
2800 round = get_round(vxrm, a, shift);
2801 res = (a >> shift) + round;
2802 if (res > INT32_MAX) {
2803 env->vxsat = 0x1;
2804 return INT32_MAX;
2805 } else if (res < INT32_MIN) {
2806 env->vxsat = 0x1;
2807 return INT32_MIN;
2808 } else {
2809 return res;
2810 }
2811 }
2812
2813 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2814 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2815 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2816 GEN_VEXT_VV_RM(vnclip_wv_b)
2817 GEN_VEXT_VV_RM(vnclip_wv_h)
2818 GEN_VEXT_VV_RM(vnclip_wv_w)
2819
2820 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2821 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2822 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2823 GEN_VEXT_VX_RM(vnclip_wx_b)
2824 GEN_VEXT_VX_RM(vnclip_wx_h)
2825 GEN_VEXT_VX_RM(vnclip_wx_w)
2826
2827 static inline uint8_t
2828 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2829 {
2830 uint8_t round, shift = b & 0xf;
2831 uint16_t res;
2832
2833 round = get_round(vxrm, a, shift);
2834 res = (a >> shift) + round;
2835 if (res > UINT8_MAX) {
2836 env->vxsat = 0x1;
2837 return UINT8_MAX;
2838 } else {
2839 return res;
2840 }
2841 }
2842
2843 static inline uint16_t
2844 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2845 {
2846 uint8_t round, shift = b & 0x1f;
2847 uint32_t res;
2848
2849 round = get_round(vxrm, a, shift);
2850 res = (a >> shift) + round;
2851 if (res > UINT16_MAX) {
2852 env->vxsat = 0x1;
2853 return UINT16_MAX;
2854 } else {
2855 return res;
2856 }
2857 }
2858
2859 static inline uint32_t
2860 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2861 {
2862 uint8_t round, shift = b & 0x3f;
2863 uint64_t res;
2864
2865 round = get_round(vxrm, a, shift);
2866 res = (a >> shift) + round;
2867 if (res > UINT32_MAX) {
2868 env->vxsat = 0x1;
2869 return UINT32_MAX;
2870 } else {
2871 return res;
2872 }
2873 }
2874
2875 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2876 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2877 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2878 GEN_VEXT_VV_RM(vnclipu_wv_b)
2879 GEN_VEXT_VV_RM(vnclipu_wv_h)
2880 GEN_VEXT_VV_RM(vnclipu_wv_w)
2881
2882 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2883 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2884 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2885 GEN_VEXT_VX_RM(vnclipu_wx_b)
2886 GEN_VEXT_VX_RM(vnclipu_wx_h)
2887 GEN_VEXT_VX_RM(vnclipu_wx_w)
2888
2889 /*
2890 *** Vector Float Point Arithmetic Instructions
2891 */
2892 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2893 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2894 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2895 CPURISCVState *env) \
2896 { \
2897 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2898 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2899 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2900 }
2901
2902 #define GEN_VEXT_VV_ENV(NAME) \
2903 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2904 void *vs2, CPURISCVState *env, \
2905 uint32_t desc) \
2906 { \
2907 uint32_t vm = vext_vm(desc); \
2908 uint32_t vl = env->vl; \
2909 uint32_t i; \
2910 \
2911 for (i = env->vstart; i < vl; i++) { \
2912 if (!vm && !vext_elem_mask(v0, i)) { \
2913 continue; \
2914 } \
2915 do_##NAME(vd, vs1, vs2, i, env); \
2916 } \
2917 env->vstart = 0; \
2918 }
2919
2920 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2921 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2922 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2923 GEN_VEXT_VV_ENV(vfadd_vv_h)
2924 GEN_VEXT_VV_ENV(vfadd_vv_w)
2925 GEN_VEXT_VV_ENV(vfadd_vv_d)
2926
2927 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2928 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2929 CPURISCVState *env) \
2930 { \
2931 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2932 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2933 }
2934
2935 #define GEN_VEXT_VF(NAME) \
2936 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2937 void *vs2, CPURISCVState *env, \
2938 uint32_t desc) \
2939 { \
2940 uint32_t vm = vext_vm(desc); \
2941 uint32_t vl = env->vl; \
2942 uint32_t i; \
2943 \
2944 for (i = env->vstart; i < vl; i++) { \
2945 if (!vm && !vext_elem_mask(v0, i)) { \
2946 continue; \
2947 } \
2948 do_##NAME(vd, s1, vs2, i, env); \
2949 } \
2950 env->vstart = 0; \
2951 }
2952
2953 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2954 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2955 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2956 GEN_VEXT_VF(vfadd_vf_h)
2957 GEN_VEXT_VF(vfadd_vf_w)
2958 GEN_VEXT_VF(vfadd_vf_d)
2959
2960 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2961 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2962 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2963 GEN_VEXT_VV_ENV(vfsub_vv_h)
2964 GEN_VEXT_VV_ENV(vfsub_vv_w)
2965 GEN_VEXT_VV_ENV(vfsub_vv_d)
2966 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2967 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2968 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2969 GEN_VEXT_VF(vfsub_vf_h)
2970 GEN_VEXT_VF(vfsub_vf_w)
2971 GEN_VEXT_VF(vfsub_vf_d)
2972
2973 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2974 {
2975 return float16_sub(b, a, s);
2976 }
2977
2978 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2979 {
2980 return float32_sub(b, a, s);
2981 }
2982
2983 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2984 {
2985 return float64_sub(b, a, s);
2986 }
2987
2988 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2989 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2990 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2991 GEN_VEXT_VF(vfrsub_vf_h)
2992 GEN_VEXT_VF(vfrsub_vf_w)
2993 GEN_VEXT_VF(vfrsub_vf_d)
2994
2995 /* Vector Widening Floating-Point Add/Subtract Instructions */
2996 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2997 {
2998 return float32_add(float16_to_float32(a, true, s),
2999 float16_to_float32(b, true, s), s);
3000 }
3001
3002 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3003 {
3004 return float64_add(float32_to_float64(a, s),
3005 float32_to_float64(b, s), s);
3006
3007 }
3008
3009 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3010 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3011 GEN_VEXT_VV_ENV(vfwadd_vv_h)
3012 GEN_VEXT_VV_ENV(vfwadd_vv_w)
3013 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3014 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3015 GEN_VEXT_VF(vfwadd_vf_h)
3016 GEN_VEXT_VF(vfwadd_vf_w)
3017
3018 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3019 {
3020 return float32_sub(float16_to_float32(a, true, s),
3021 float16_to_float32(b, true, s), s);
3022 }
3023
3024 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3025 {
3026 return float64_sub(float32_to_float64(a, s),
3027 float32_to_float64(b, s), s);
3028
3029 }
3030
3031 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3032 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3033 GEN_VEXT_VV_ENV(vfwsub_vv_h)
3034 GEN_VEXT_VV_ENV(vfwsub_vv_w)
3035 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3036 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3037 GEN_VEXT_VF(vfwsub_vf_h)
3038 GEN_VEXT_VF(vfwsub_vf_w)
3039
3040 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3041 {
3042 return float32_add(a, float16_to_float32(b, true, s), s);
3043 }
3044
3045 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3046 {
3047 return float64_add(a, float32_to_float64(b, s), s);
3048 }
3049
3050 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3051 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3052 GEN_VEXT_VV_ENV(vfwadd_wv_h)
3053 GEN_VEXT_VV_ENV(vfwadd_wv_w)
3054 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3055 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3056 GEN_VEXT_VF(vfwadd_wf_h)
3057 GEN_VEXT_VF(vfwadd_wf_w)
3058
3059 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3060 {
3061 return float32_sub(a, float16_to_float32(b, true, s), s);
3062 }
3063
3064 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3065 {
3066 return float64_sub(a, float32_to_float64(b, s), s);
3067 }
3068
3069 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3070 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3071 GEN_VEXT_VV_ENV(vfwsub_wv_h)
3072 GEN_VEXT_VV_ENV(vfwsub_wv_w)
3073 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3074 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3075 GEN_VEXT_VF(vfwsub_wf_h)
3076 GEN_VEXT_VF(vfwsub_wf_w)
3077
3078 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3079 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3080 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3081 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3082 GEN_VEXT_VV_ENV(vfmul_vv_h)
3083 GEN_VEXT_VV_ENV(vfmul_vv_w)
3084 GEN_VEXT_VV_ENV(vfmul_vv_d)
3085 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3086 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3087 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3088 GEN_VEXT_VF(vfmul_vf_h)
3089 GEN_VEXT_VF(vfmul_vf_w)
3090 GEN_VEXT_VF(vfmul_vf_d)
3091
3092 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3093 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3094 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3095 GEN_VEXT_VV_ENV(vfdiv_vv_h)
3096 GEN_VEXT_VV_ENV(vfdiv_vv_w)
3097 GEN_VEXT_VV_ENV(vfdiv_vv_d)
3098 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3099 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3100 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3101 GEN_VEXT_VF(vfdiv_vf_h)
3102 GEN_VEXT_VF(vfdiv_vf_w)
3103 GEN_VEXT_VF(vfdiv_vf_d)
3104
3105 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3106 {
3107 return float16_div(b, a, s);
3108 }
3109
3110 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3111 {
3112 return float32_div(b, a, s);
3113 }
3114
3115 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3116 {
3117 return float64_div(b, a, s);
3118 }
3119
3120 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3121 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3122 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3123 GEN_VEXT_VF(vfrdiv_vf_h)
3124 GEN_VEXT_VF(vfrdiv_vf_w)
3125 GEN_VEXT_VF(vfrdiv_vf_d)
3126
3127 /* Vector Widening Floating-Point Multiply */
3128 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3129 {
3130 return float32_mul(float16_to_float32(a, true, s),
3131 float16_to_float32(b, true, s), s);
3132 }
3133
3134 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3135 {
3136 return float64_mul(float32_to_float64(a, s),
3137 float32_to_float64(b, s), s);
3138
3139 }
3140 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3141 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3142 GEN_VEXT_VV_ENV(vfwmul_vv_h)
3143 GEN_VEXT_VV_ENV(vfwmul_vv_w)
3144 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3145 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3146 GEN_VEXT_VF(vfwmul_vf_h)
3147 GEN_VEXT_VF(vfwmul_vf_w)
3148
3149 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3150 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3151 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3152 CPURISCVState *env) \
3153 { \
3154 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3155 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3156 TD d = *((TD *)vd + HD(i)); \
3157 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3158 }
3159
3160 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3161 {
3162 return float16_muladd(a, b, d, 0, s);
3163 }
3164
3165 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3166 {
3167 return float32_muladd(a, b, d, 0, s);
3168 }
3169
3170 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3171 {
3172 return float64_muladd(a, b, d, 0, s);
3173 }
3174
3175 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3176 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3177 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3178 GEN_VEXT_VV_ENV(vfmacc_vv_h)
3179 GEN_VEXT_VV_ENV(vfmacc_vv_w)
3180 GEN_VEXT_VV_ENV(vfmacc_vv_d)
3181
3182 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3183 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3184 CPURISCVState *env) \
3185 { \
3186 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3187 TD d = *((TD *)vd + HD(i)); \
3188 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3189 }
3190
3191 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3192 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3193 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3194 GEN_VEXT_VF(vfmacc_vf_h)
3195 GEN_VEXT_VF(vfmacc_vf_w)
3196 GEN_VEXT_VF(vfmacc_vf_d)
3197
3198 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3199 {
3200 return float16_muladd(a, b, d,
3201 float_muladd_negate_c | float_muladd_negate_product, s);
3202 }
3203
3204 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3205 {
3206 return float32_muladd(a, b, d,
3207 float_muladd_negate_c | float_muladd_negate_product, s);
3208 }
3209
3210 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3211 {
3212 return float64_muladd(a, b, d,
3213 float_muladd_negate_c | float_muladd_negate_product, s);
3214 }
3215
3216 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3217 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3218 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3219 GEN_VEXT_VV_ENV(vfnmacc_vv_h)
3220 GEN_VEXT_VV_ENV(vfnmacc_vv_w)
3221 GEN_VEXT_VV_ENV(vfnmacc_vv_d)
3222 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3223 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3224 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3225 GEN_VEXT_VF(vfnmacc_vf_h)
3226 GEN_VEXT_VF(vfnmacc_vf_w)
3227 GEN_VEXT_VF(vfnmacc_vf_d)
3228
3229 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3230 {
3231 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3232 }
3233
3234 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3235 {
3236 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3237 }
3238
3239 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3240 {
3241 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3242 }
3243
3244 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3245 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3246 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3247 GEN_VEXT_VV_ENV(vfmsac_vv_h)
3248 GEN_VEXT_VV_ENV(vfmsac_vv_w)
3249 GEN_VEXT_VV_ENV(vfmsac_vv_d)
3250 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3251 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3252 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3253 GEN_VEXT_VF(vfmsac_vf_h)
3254 GEN_VEXT_VF(vfmsac_vf_w)
3255 GEN_VEXT_VF(vfmsac_vf_d)
3256
3257 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3258 {
3259 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3260 }
3261
3262 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3263 {
3264 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3265 }
3266
3267 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3268 {
3269 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3270 }
3271
3272 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3273 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3274 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3275 GEN_VEXT_VV_ENV(vfnmsac_vv_h)
3276 GEN_VEXT_VV_ENV(vfnmsac_vv_w)
3277 GEN_VEXT_VV_ENV(vfnmsac_vv_d)
3278 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3279 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3280 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3281 GEN_VEXT_VF(vfnmsac_vf_h)
3282 GEN_VEXT_VF(vfnmsac_vf_w)
3283 GEN_VEXT_VF(vfnmsac_vf_d)
3284
3285 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3286 {
3287 return float16_muladd(d, b, a, 0, s);
3288 }
3289
3290 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3291 {
3292 return float32_muladd(d, b, a, 0, s);
3293 }
3294
3295 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3296 {
3297 return float64_muladd(d, b, a, 0, s);
3298 }
3299
3300 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3301 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3302 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3303 GEN_VEXT_VV_ENV(vfmadd_vv_h)
3304 GEN_VEXT_VV_ENV(vfmadd_vv_w)
3305 GEN_VEXT_VV_ENV(vfmadd_vv_d)
3306 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3307 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3308 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3309 GEN_VEXT_VF(vfmadd_vf_h)
3310 GEN_VEXT_VF(vfmadd_vf_w)
3311 GEN_VEXT_VF(vfmadd_vf_d)
3312
3313 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3314 {
3315 return float16_muladd(d, b, a,
3316 float_muladd_negate_c | float_muladd_negate_product, s);
3317 }
3318
3319 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3320 {
3321 return float32_muladd(d, b, a,
3322 float_muladd_negate_c | float_muladd_negate_product, s);
3323 }
3324
3325 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3326 {
3327 return float64_muladd(d, b, a,
3328 float_muladd_negate_c | float_muladd_negate_product, s);
3329 }
3330
3331 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3332 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3333 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3334 GEN_VEXT_VV_ENV(vfnmadd_vv_h)
3335 GEN_VEXT_VV_ENV(vfnmadd_vv_w)
3336 GEN_VEXT_VV_ENV(vfnmadd_vv_d)
3337 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3338 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3339 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3340 GEN_VEXT_VF(vfnmadd_vf_h)
3341 GEN_VEXT_VF(vfnmadd_vf_w)
3342 GEN_VEXT_VF(vfnmadd_vf_d)
3343
3344 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3345 {
3346 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3347 }
3348
3349 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3350 {
3351 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3352 }
3353
3354 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3355 {
3356 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3357 }
3358
3359 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3360 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3361 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3362 GEN_VEXT_VV_ENV(vfmsub_vv_h)
3363 GEN_VEXT_VV_ENV(vfmsub_vv_w)
3364 GEN_VEXT_VV_ENV(vfmsub_vv_d)
3365 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3366 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3367 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3368 GEN_VEXT_VF(vfmsub_vf_h)
3369 GEN_VEXT_VF(vfmsub_vf_w)
3370 GEN_VEXT_VF(vfmsub_vf_d)
3371
3372 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3373 {
3374 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3375 }
3376
3377 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3378 {
3379 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3380 }
3381
3382 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3383 {
3384 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3385 }
3386
3387 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3388 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3389 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3390 GEN_VEXT_VV_ENV(vfnmsub_vv_h)
3391 GEN_VEXT_VV_ENV(vfnmsub_vv_w)
3392 GEN_VEXT_VV_ENV(vfnmsub_vv_d)
3393 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3394 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3395 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3396 GEN_VEXT_VF(vfnmsub_vf_h)
3397 GEN_VEXT_VF(vfnmsub_vf_w)
3398 GEN_VEXT_VF(vfnmsub_vf_d)
3399
3400 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3401 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3402 {
3403 return float32_muladd(float16_to_float32(a, true, s),
3404 float16_to_float32(b, true, s), d, 0, s);
3405 }
3406
3407 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3408 {
3409 return float64_muladd(float32_to_float64(a, s),
3410 float32_to_float64(b, s), d, 0, s);
3411 }
3412
3413 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3414 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3415 GEN_VEXT_VV_ENV(vfwmacc_vv_h)
3416 GEN_VEXT_VV_ENV(vfwmacc_vv_w)
3417 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3418 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3419 GEN_VEXT_VF(vfwmacc_vf_h)
3420 GEN_VEXT_VF(vfwmacc_vf_w)
3421
3422 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3423 {
3424 return float32_muladd(float16_to_float32(a, true, s),
3425 float16_to_float32(b, true, s), d,
3426 float_muladd_negate_c | float_muladd_negate_product, s);
3427 }
3428
3429 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3430 {
3431 return float64_muladd(float32_to_float64(a, s),
3432 float32_to_float64(b, s), d,
3433 float_muladd_negate_c | float_muladd_negate_product, s);
3434 }
3435
3436 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3437 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3438 GEN_VEXT_VV_ENV(vfwnmacc_vv_h)
3439 GEN_VEXT_VV_ENV(vfwnmacc_vv_w)
3440 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3441 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3442 GEN_VEXT_VF(vfwnmacc_vf_h)
3443 GEN_VEXT_VF(vfwnmacc_vf_w)
3444
3445 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3446 {
3447 return float32_muladd(float16_to_float32(a, true, s),
3448 float16_to_float32(b, true, s), d,
3449 float_muladd_negate_c, s);
3450 }
3451
3452 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3453 {
3454 return float64_muladd(float32_to_float64(a, s),
3455 float32_to_float64(b, s), d,
3456 float_muladd_negate_c, s);
3457 }
3458
3459 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3460 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3461 GEN_VEXT_VV_ENV(vfwmsac_vv_h)
3462 GEN_VEXT_VV_ENV(vfwmsac_vv_w)
3463 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3464 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3465 GEN_VEXT_VF(vfwmsac_vf_h)
3466 GEN_VEXT_VF(vfwmsac_vf_w)
3467
3468 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3469 {
3470 return float32_muladd(float16_to_float32(a, true, s),
3471 float16_to_float32(b, true, s), d,
3472 float_muladd_negate_product, s);
3473 }
3474
3475 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3476 {
3477 return float64_muladd(float32_to_float64(a, s),
3478 float32_to_float64(b, s), d,
3479 float_muladd_negate_product, s);
3480 }
3481
3482 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3483 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3484 GEN_VEXT_VV_ENV(vfwnmsac_vv_h)
3485 GEN_VEXT_VV_ENV(vfwnmsac_vv_w)
3486 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3487 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3488 GEN_VEXT_VF(vfwnmsac_vf_h)
3489 GEN_VEXT_VF(vfwnmsac_vf_w)
3490
3491 /* Vector Floating-Point Square-Root Instruction */
3492 /* (TD, T2, TX2) */
3493 #define OP_UU_H uint16_t, uint16_t, uint16_t
3494 #define OP_UU_W uint32_t, uint32_t, uint32_t
3495 #define OP_UU_D uint64_t, uint64_t, uint64_t
3496
3497 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3498 static void do_##NAME(void *vd, void *vs2, int i, \
3499 CPURISCVState *env) \
3500 { \
3501 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3502 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3503 }
3504
3505 #define GEN_VEXT_V_ENV(NAME) \
3506 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3507 CPURISCVState *env, uint32_t desc) \
3508 { \
3509 uint32_t vm = vext_vm(desc); \
3510 uint32_t vl = env->vl; \
3511 uint32_t i; \
3512 \
3513 if (vl == 0) { \
3514 return; \
3515 } \
3516 for (i = env->vstart; i < vl; i++) { \
3517 if (!vm && !vext_elem_mask(v0, i)) { \
3518 continue; \
3519 } \
3520 do_##NAME(vd, vs2, i, env); \
3521 } \
3522 env->vstart = 0; \
3523 }
3524
3525 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3526 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3527 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3528 GEN_VEXT_V_ENV(vfsqrt_v_h)
3529 GEN_VEXT_V_ENV(vfsqrt_v_w)
3530 GEN_VEXT_V_ENV(vfsqrt_v_d)
3531
3532 /*
3533 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3534 *
3535 * Adapted from riscv-v-spec recip.c:
3536 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3537 */
3538 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3539 {
3540 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3541 uint64_t exp = extract64(f, frac_size, exp_size);
3542 uint64_t frac = extract64(f, 0, frac_size);
3543
3544 const uint8_t lookup_table[] = {
3545 52, 51, 50, 48, 47, 46, 44, 43,
3546 42, 41, 40, 39, 38, 36, 35, 34,
3547 33, 32, 31, 30, 30, 29, 28, 27,
3548 26, 25, 24, 23, 23, 22, 21, 20,
3549 19, 19, 18, 17, 16, 16, 15, 14,
3550 14, 13, 12, 12, 11, 10, 10, 9,
3551 9, 8, 7, 7, 6, 6, 5, 4,
3552 4, 3, 3, 2, 2, 1, 1, 0,
3553 127, 125, 123, 121, 119, 118, 116, 114,
3554 113, 111, 109, 108, 106, 105, 103, 102,
3555 100, 99, 97, 96, 95, 93, 92, 91,
3556 90, 88, 87, 86, 85, 84, 83, 82,
3557 80, 79, 78, 77, 76, 75, 74, 73,
3558 72, 71, 70, 70, 69, 68, 67, 66,
3559 65, 64, 63, 63, 62, 61, 60, 59,
3560 59, 58, 57, 56, 56, 55, 54, 53
3561 };
3562 const int precision = 7;
3563
3564 if (exp == 0 && frac != 0) { /* subnormal */
3565 /* Normalize the subnormal. */
3566 while (extract64(frac, frac_size - 1, 1) == 0) {
3567 exp--;
3568 frac <<= 1;
3569 }
3570
3571 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3572 }
3573
3574 int idx = ((exp & 1) << (precision - 1)) |
3575 (frac >> (frac_size - precision + 1));
3576 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3577 (frac_size - precision);
3578 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3579
3580 uint64_t val = 0;
3581 val = deposit64(val, 0, frac_size, out_frac);
3582 val = deposit64(val, frac_size, exp_size, out_exp);
3583 val = deposit64(val, frac_size + exp_size, 1, sign);
3584 return val;
3585 }
3586
3587 static float16 frsqrt7_h(float16 f, float_status *s)
3588 {
3589 int exp_size = 5, frac_size = 10;
3590 bool sign = float16_is_neg(f);
3591
3592 /*
3593 * frsqrt7(sNaN) = canonical NaN
3594 * frsqrt7(-inf) = canonical NaN
3595 * frsqrt7(-normal) = canonical NaN
3596 * frsqrt7(-subnormal) = canonical NaN
3597 */
3598 if (float16_is_signaling_nan(f, s) ||
3599 (float16_is_infinity(f) && sign) ||
3600 (float16_is_normal(f) && sign) ||
3601 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3602 s->float_exception_flags |= float_flag_invalid;
3603 return float16_default_nan(s);
3604 }
3605
3606 /* frsqrt7(qNaN) = canonical NaN */
3607 if (float16_is_quiet_nan(f, s)) {
3608 return float16_default_nan(s);
3609 }
3610
3611 /* frsqrt7(+-0) = +-inf */
3612 if (float16_is_zero(f)) {
3613 s->float_exception_flags |= float_flag_divbyzero;
3614 return float16_set_sign(float16_infinity, sign);
3615 }
3616
3617 /* frsqrt7(+inf) = +0 */
3618 if (float16_is_infinity(f) && !sign) {
3619 return float16_set_sign(float16_zero, sign);
3620 }
3621
3622 /* +normal, +subnormal */
3623 uint64_t val = frsqrt7(f, exp_size, frac_size);
3624 return make_float16(val);
3625 }
3626
3627 static float32 frsqrt7_s(float32 f, float_status *s)
3628 {
3629 int exp_size = 8, frac_size = 23;
3630 bool sign = float32_is_neg(f);
3631
3632 /*
3633 * frsqrt7(sNaN) = canonical NaN
3634 * frsqrt7(-inf) = canonical NaN
3635 * frsqrt7(-normal) = canonical NaN
3636 * frsqrt7(-subnormal) = canonical NaN
3637 */
3638 if (float32_is_signaling_nan(f, s) ||
3639 (float32_is_infinity(f) && sign) ||
3640 (float32_is_normal(f) && sign) ||
3641 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3642 s->float_exception_flags |= float_flag_invalid;
3643 return float32_default_nan(s);
3644 }
3645
3646 /* frsqrt7(qNaN) = canonical NaN */
3647 if (float32_is_quiet_nan(f, s)) {
3648 return float32_default_nan(s);
3649 }
3650
3651 /* frsqrt7(+-0) = +-inf */
3652 if (float32_is_zero(f)) {
3653 s->float_exception_flags |= float_flag_divbyzero;
3654 return float32_set_sign(float32_infinity, sign);
3655 }
3656
3657 /* frsqrt7(+inf) = +0 */
3658 if (float32_is_infinity(f) && !sign) {
3659 return float32_set_sign(float32_zero, sign);
3660 }
3661
3662 /* +normal, +subnormal */
3663 uint64_t val = frsqrt7(f, exp_size, frac_size);
3664 return make_float32(val);
3665 }
3666
3667 static float64 frsqrt7_d(float64 f, float_status *s)
3668 {
3669 int exp_size = 11, frac_size = 52;
3670 bool sign = float64_is_neg(f);
3671
3672 /*
3673 * frsqrt7(sNaN) = canonical NaN
3674 * frsqrt7(-inf) = canonical NaN
3675 * frsqrt7(-normal) = canonical NaN
3676 * frsqrt7(-subnormal) = canonical NaN
3677 */
3678 if (float64_is_signaling_nan(f, s) ||
3679 (float64_is_infinity(f) && sign) ||
3680 (float64_is_normal(f) && sign) ||
3681 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3682 s->float_exception_flags |= float_flag_invalid;
3683 return float64_default_nan(s);
3684 }
3685
3686 /* frsqrt7(qNaN) = canonical NaN */
3687 if (float64_is_quiet_nan(f, s)) {
3688 return float64_default_nan(s);
3689 }
3690
3691 /* frsqrt7(+-0) = +-inf */
3692 if (float64_is_zero(f)) {
3693 s->float_exception_flags |= float_flag_divbyzero;
3694 return float64_set_sign(float64_infinity, sign);
3695 }
3696
3697 /* frsqrt7(+inf) = +0 */
3698 if (float64_is_infinity(f) && !sign) {
3699 return float64_set_sign(float64_zero, sign);
3700 }
3701
3702 /* +normal, +subnormal */
3703 uint64_t val = frsqrt7(f, exp_size, frac_size);
3704 return make_float64(val);
3705 }
3706
3707 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3708 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3709 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3710 GEN_VEXT_V_ENV(vfrsqrt7_v_h)
3711 GEN_VEXT_V_ENV(vfrsqrt7_v_w)
3712 GEN_VEXT_V_ENV(vfrsqrt7_v_d)
3713
3714 /*
3715 * Vector Floating-Point Reciprocal Estimate Instruction
3716 *
3717 * Adapted from riscv-v-spec recip.c:
3718 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3719 */
3720 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3721 float_status *s)
3722 {
3723 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3724 uint64_t exp = extract64(f, frac_size, exp_size);
3725 uint64_t frac = extract64(f, 0, frac_size);
3726
3727 const uint8_t lookup_table[] = {
3728 127, 125, 123, 121, 119, 117, 116, 114,
3729 112, 110, 109, 107, 105, 104, 102, 100,
3730 99, 97, 96, 94, 93, 91, 90, 88,
3731 87, 85, 84, 83, 81, 80, 79, 77,
3732 76, 75, 74, 72, 71, 70, 69, 68,
3733 66, 65, 64, 63, 62, 61, 60, 59,
3734 58, 57, 56, 55, 54, 53, 52, 51,
3735 50, 49, 48, 47, 46, 45, 44, 43,
3736 42, 41, 40, 40, 39, 38, 37, 36,
3737 35, 35, 34, 33, 32, 31, 31, 30,
3738 29, 28, 28, 27, 26, 25, 25, 24,
3739 23, 23, 22, 21, 21, 20, 19, 19,
3740 18, 17, 17, 16, 15, 15, 14, 14,
3741 13, 12, 12, 11, 11, 10, 9, 9,
3742 8, 8, 7, 7, 6, 5, 5, 4,
3743 4, 3, 3, 2, 2, 1, 1, 0
3744 };
3745 const int precision = 7;
3746
3747 if (exp == 0 && frac != 0) { /* subnormal */
3748 /* Normalize the subnormal. */
3749 while (extract64(frac, frac_size - 1, 1) == 0) {
3750 exp--;
3751 frac <<= 1;
3752 }
3753
3754 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3755
3756 if (exp != 0 && exp != UINT64_MAX) {
3757 /*
3758 * Overflow to inf or max value of same sign,
3759 * depending on sign and rounding mode.
3760 */
3761 s->float_exception_flags |= (float_flag_inexact |
3762 float_flag_overflow);
3763
3764 if ((s->float_rounding_mode == float_round_to_zero) ||
3765 ((s->float_rounding_mode == float_round_down) && !sign) ||
3766 ((s->float_rounding_mode == float_round_up) && sign)) {
3767 /* Return greatest/negative finite value. */
3768 return (sign << (exp_size + frac_size)) |
3769 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3770 } else {
3771 /* Return +-inf. */
3772 return (sign << (exp_size + frac_size)) |
3773 MAKE_64BIT_MASK(frac_size, exp_size);
3774 }
3775 }
3776 }
3777
3778 int idx = frac >> (frac_size - precision);
3779 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3780 (frac_size - precision);
3781 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3782
3783 if (out_exp == 0 || out_exp == UINT64_MAX) {
3784 /*
3785 * The result is subnormal, but don't raise the underflow exception,
3786 * because there's no additional loss of precision.
3787 */
3788 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3789 if (out_exp == UINT64_MAX) {
3790 out_frac >>= 1;
3791 out_exp = 0;
3792 }
3793 }
3794
3795 uint64_t val = 0;
3796 val = deposit64(val, 0, frac_size, out_frac);
3797 val = deposit64(val, frac_size, exp_size, out_exp);
3798 val = deposit64(val, frac_size + exp_size, 1, sign);
3799 return val;
3800 }
3801
3802 static float16 frec7_h(float16 f, float_status *s)
3803 {
3804 int exp_size = 5, frac_size = 10;
3805 bool sign = float16_is_neg(f);
3806
3807 /* frec7(+-inf) = +-0 */
3808 if (float16_is_infinity(f)) {
3809 return float16_set_sign(float16_zero, sign);
3810 }
3811
3812 /* frec7(+-0) = +-inf */
3813 if (float16_is_zero(f)) {
3814 s->float_exception_flags |= float_flag_divbyzero;
3815 return float16_set_sign(float16_infinity, sign);
3816 }
3817
3818 /* frec7(sNaN) = canonical NaN */
3819 if (float16_is_signaling_nan(f, s)) {
3820 s->float_exception_flags |= float_flag_invalid;
3821 return float16_default_nan(s);
3822 }
3823
3824 /* frec7(qNaN) = canonical NaN */
3825 if (float16_is_quiet_nan(f, s)) {
3826 return float16_default_nan(s);
3827 }
3828
3829 /* +-normal, +-subnormal */
3830 uint64_t val = frec7(f, exp_size, frac_size, s);
3831 return make_float16(val);
3832 }
3833
3834 static float32 frec7_s(float32 f, float_status *s)
3835 {
3836 int exp_size = 8, frac_size = 23;
3837 bool sign = float32_is_neg(f);
3838
3839 /* frec7(+-inf) = +-0 */
3840 if (float32_is_infinity(f)) {
3841 return float32_set_sign(float32_zero, sign);
3842 }
3843
3844 /* frec7(+-0) = +-inf */
3845 if (float32_is_zero(f)) {
3846 s->float_exception_flags |= float_flag_divbyzero;
3847 return float32_set_sign(float32_infinity, sign);
3848 }
3849
3850 /* frec7(sNaN) = canonical NaN */
3851 if (float32_is_signaling_nan(f, s)) {
3852 s->float_exception_flags |= float_flag_invalid;
3853 return float32_default_nan(s);
3854 }
3855
3856 /* frec7(qNaN) = canonical NaN */
3857 if (float32_is_quiet_nan(f, s)) {
3858 return float32_default_nan(s);
3859 }
3860
3861 /* +-normal, +-subnormal */
3862 uint64_t val = frec7(f, exp_size, frac_size, s);
3863 return make_float32(val);
3864 }
3865
3866 static float64 frec7_d(float64 f, float_status *s)
3867 {
3868 int exp_size = 11, frac_size = 52;
3869 bool sign = float64_is_neg(f);
3870
3871 /* frec7(+-inf) = +-0 */
3872 if (float64_is_infinity(f)) {
3873 return float64_set_sign(float64_zero, sign);
3874 }
3875
3876 /* frec7(+-0) = +-inf */
3877 if (float64_is_zero(f)) {
3878 s->float_exception_flags |= float_flag_divbyzero;
3879 return float64_set_sign(float64_infinity, sign);
3880 }
3881
3882 /* frec7(sNaN) = canonical NaN */
3883 if (float64_is_signaling_nan(f, s)) {
3884 s->float_exception_flags |= float_flag_invalid;
3885 return float64_default_nan(s);
3886 }
3887
3888 /* frec7(qNaN) = canonical NaN */
3889 if (float64_is_quiet_nan(f, s)) {
3890 return float64_default_nan(s);
3891 }
3892
3893 /* +-normal, +-subnormal */
3894 uint64_t val = frec7(f, exp_size, frac_size, s);
3895 return make_float64(val);
3896 }
3897
3898 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3899 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3900 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3901 GEN_VEXT_V_ENV(vfrec7_v_h)
3902 GEN_VEXT_V_ENV(vfrec7_v_w)
3903 GEN_VEXT_V_ENV(vfrec7_v_d)
3904
3905 /* Vector Floating-Point MIN/MAX Instructions */
3906 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3907 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3908 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3909 GEN_VEXT_VV_ENV(vfmin_vv_h)
3910 GEN_VEXT_VV_ENV(vfmin_vv_w)
3911 GEN_VEXT_VV_ENV(vfmin_vv_d)
3912 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3913 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3914 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3915 GEN_VEXT_VF(vfmin_vf_h)
3916 GEN_VEXT_VF(vfmin_vf_w)
3917 GEN_VEXT_VF(vfmin_vf_d)
3918
3919 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3920 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3921 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3922 GEN_VEXT_VV_ENV(vfmax_vv_h)
3923 GEN_VEXT_VV_ENV(vfmax_vv_w)
3924 GEN_VEXT_VV_ENV(vfmax_vv_d)
3925 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3926 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3927 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3928 GEN_VEXT_VF(vfmax_vf_h)
3929 GEN_VEXT_VF(vfmax_vf_w)
3930 GEN_VEXT_VF(vfmax_vf_d)
3931
3932 /* Vector Floating-Point Sign-Injection Instructions */
3933 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3934 {
3935 return deposit64(b, 0, 15, a);
3936 }
3937
3938 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3939 {
3940 return deposit64(b, 0, 31, a);
3941 }
3942
3943 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3944 {
3945 return deposit64(b, 0, 63, a);
3946 }
3947
3948 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3949 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3950 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3951 GEN_VEXT_VV_ENV(vfsgnj_vv_h)
3952 GEN_VEXT_VV_ENV(vfsgnj_vv_w)
3953 GEN_VEXT_VV_ENV(vfsgnj_vv_d)
3954 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3955 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3956 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3957 GEN_VEXT_VF(vfsgnj_vf_h)
3958 GEN_VEXT_VF(vfsgnj_vf_w)
3959 GEN_VEXT_VF(vfsgnj_vf_d)
3960
3961 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3962 {
3963 return deposit64(~b, 0, 15, a);
3964 }
3965
3966 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3967 {
3968 return deposit64(~b, 0, 31, a);
3969 }
3970
3971 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3972 {
3973 return deposit64(~b, 0, 63, a);
3974 }
3975
3976 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3977 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3978 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3979 GEN_VEXT_VV_ENV(vfsgnjn_vv_h)
3980 GEN_VEXT_VV_ENV(vfsgnjn_vv_w)
3981 GEN_VEXT_VV_ENV(vfsgnjn_vv_d)
3982 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3983 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3984 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3985 GEN_VEXT_VF(vfsgnjn_vf_h)
3986 GEN_VEXT_VF(vfsgnjn_vf_w)
3987 GEN_VEXT_VF(vfsgnjn_vf_d)
3988
3989 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3990 {
3991 return deposit64(b ^ a, 0, 15, a);
3992 }
3993
3994 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3995 {
3996 return deposit64(b ^ a, 0, 31, a);
3997 }
3998
3999 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4000 {
4001 return deposit64(b ^ a, 0, 63, a);
4002 }
4003
4004 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4005 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4006 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4007 GEN_VEXT_VV_ENV(vfsgnjx_vv_h)
4008 GEN_VEXT_VV_ENV(vfsgnjx_vv_w)
4009 GEN_VEXT_VV_ENV(vfsgnjx_vv_d)
4010 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4011 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4012 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4013 GEN_VEXT_VF(vfsgnjx_vf_h)
4014 GEN_VEXT_VF(vfsgnjx_vf_w)
4015 GEN_VEXT_VF(vfsgnjx_vf_d)
4016
4017 /* Vector Floating-Point Compare Instructions */
4018 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4019 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4020 CPURISCVState *env, uint32_t desc) \
4021 { \
4022 uint32_t vm = vext_vm(desc); \
4023 uint32_t vl = env->vl; \
4024 uint32_t i; \
4025 \
4026 for (i = env->vstart; i < vl; i++) { \
4027 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4028 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4029 if (!vm && !vext_elem_mask(v0, i)) { \
4030 continue; \
4031 } \
4032 vext_set_elem_mask(vd, i, \
4033 DO_OP(s2, s1, &env->fp_status)); \
4034 } \
4035 env->vstart = 0; \
4036 }
4037
4038 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4039 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4040 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4041
4042 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4043 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4044 CPURISCVState *env, uint32_t desc) \
4045 { \
4046 uint32_t vm = vext_vm(desc); \
4047 uint32_t vl = env->vl; \
4048 uint32_t i; \
4049 \
4050 for (i = env->vstart; i < vl; i++) { \
4051 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4052 if (!vm && !vext_elem_mask(v0, i)) { \
4053 continue; \
4054 } \
4055 vext_set_elem_mask(vd, i, \
4056 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4057 } \
4058 env->vstart = 0; \
4059 }
4060
4061 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4062 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4063 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4064
4065 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4066 {
4067 FloatRelation compare = float16_compare_quiet(a, b, s);
4068 return compare != float_relation_equal;
4069 }
4070
4071 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4072 {
4073 FloatRelation compare = float32_compare_quiet(a, b, s);
4074 return compare != float_relation_equal;
4075 }
4076
4077 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4078 {
4079 FloatRelation compare = float64_compare_quiet(a, b, s);
4080 return compare != float_relation_equal;
4081 }
4082
4083 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4084 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4085 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4086 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4087 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4088 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4089
4090 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4091 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4092 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4093 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4094 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4095 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4096
4097 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4098 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4099 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4100 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4101 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4102 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4103
4104 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4105 {
4106 FloatRelation compare = float16_compare(a, b, s);
4107 return compare == float_relation_greater;
4108 }
4109
4110 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4111 {
4112 FloatRelation compare = float32_compare(a, b, s);
4113 return compare == float_relation_greater;
4114 }
4115
4116 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4117 {
4118 FloatRelation compare = float64_compare(a, b, s);
4119 return compare == float_relation_greater;
4120 }
4121
4122 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4123 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4124 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4125
4126 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4127 {
4128 FloatRelation compare = float16_compare(a, b, s);
4129 return compare == float_relation_greater ||
4130 compare == float_relation_equal;
4131 }
4132
4133 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4134 {
4135 FloatRelation compare = float32_compare(a, b, s);
4136 return compare == float_relation_greater ||
4137 compare == float_relation_equal;
4138 }
4139
4140 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4141 {
4142 FloatRelation compare = float64_compare(a, b, s);
4143 return compare == float_relation_greater ||
4144 compare == float_relation_equal;
4145 }
4146
4147 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4148 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4149 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4150
4151 /* Vector Floating-Point Classify Instruction */
4152 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4153 static void do_##NAME(void *vd, void *vs2, int i) \
4154 { \
4155 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4156 *((TD *)vd + HD(i)) = OP(s2); \
4157 }
4158
4159 #define GEN_VEXT_V(NAME) \
4160 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4161 CPURISCVState *env, uint32_t desc) \
4162 { \
4163 uint32_t vm = vext_vm(desc); \
4164 uint32_t vl = env->vl; \
4165 uint32_t i; \
4166 \
4167 for (i = env->vstart; i < vl; i++) { \
4168 if (!vm && !vext_elem_mask(v0, i)) { \
4169 continue; \
4170 } \
4171 do_##NAME(vd, vs2, i); \
4172 } \
4173 env->vstart = 0; \
4174 }
4175
4176 target_ulong fclass_h(uint64_t frs1)
4177 {
4178 float16 f = frs1;
4179 bool sign = float16_is_neg(f);
4180
4181 if (float16_is_infinity(f)) {
4182 return sign ? 1 << 0 : 1 << 7;
4183 } else if (float16_is_zero(f)) {
4184 return sign ? 1 << 3 : 1 << 4;
4185 } else if (float16_is_zero_or_denormal(f)) {
4186 return sign ? 1 << 2 : 1 << 5;
4187 } else if (float16_is_any_nan(f)) {
4188 float_status s = { }; /* for snan_bit_is_one */
4189 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4190 } else {
4191 return sign ? 1 << 1 : 1 << 6;
4192 }
4193 }
4194
4195 target_ulong fclass_s(uint64_t frs1)
4196 {
4197 float32 f = frs1;
4198 bool sign = float32_is_neg(f);
4199
4200 if (float32_is_infinity(f)) {
4201 return sign ? 1 << 0 : 1 << 7;
4202 } else if (float32_is_zero(f)) {
4203 return sign ? 1 << 3 : 1 << 4;
4204 } else if (float32_is_zero_or_denormal(f)) {
4205 return sign ? 1 << 2 : 1 << 5;
4206 } else if (float32_is_any_nan(f)) {
4207 float_status s = { }; /* for snan_bit_is_one */
4208 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4209 } else {
4210 return sign ? 1 << 1 : 1 << 6;
4211 }
4212 }
4213
4214 target_ulong fclass_d(uint64_t frs1)
4215 {
4216 float64 f = frs1;
4217 bool sign = float64_is_neg(f);
4218
4219 if (float64_is_infinity(f)) {
4220 return sign ? 1 << 0 : 1 << 7;
4221 } else if (float64_is_zero(f)) {
4222 return sign ? 1 << 3 : 1 << 4;
4223 } else if (float64_is_zero_or_denormal(f)) {
4224 return sign ? 1 << 2 : 1 << 5;
4225 } else if (float64_is_any_nan(f)) {
4226 float_status s = { }; /* for snan_bit_is_one */
4227 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4228 } else {
4229 return sign ? 1 << 1 : 1 << 6;
4230 }
4231 }
4232
4233 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4234 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4235 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4236 GEN_VEXT_V(vfclass_v_h)
4237 GEN_VEXT_V(vfclass_v_w)
4238 GEN_VEXT_V(vfclass_v_d)
4239
4240 /* Vector Floating-Point Merge Instruction */
4241 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4242 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4243 CPURISCVState *env, uint32_t desc) \
4244 { \
4245 uint32_t vm = vext_vm(desc); \
4246 uint32_t vl = env->vl; \
4247 uint32_t i; \
4248 \
4249 for (i = env->vstart; i < vl; i++) { \
4250 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4251 *((ETYPE *)vd + H(i)) \
4252 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4253 } \
4254 env->vstart = 0; \
4255 }
4256
4257 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4258 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4259 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4260
4261 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4262 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4263 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4264 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4265 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4266 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h)
4267 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w)
4268 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d)
4269
4270 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4271 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4272 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4273 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4274 GEN_VEXT_V_ENV(vfcvt_x_f_v_h)
4275 GEN_VEXT_V_ENV(vfcvt_x_f_v_w)
4276 GEN_VEXT_V_ENV(vfcvt_x_f_v_d)
4277
4278 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4279 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4280 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4281 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4282 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h)
4283 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w)
4284 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d)
4285
4286 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4287 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4288 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4289 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4290 GEN_VEXT_V_ENV(vfcvt_f_x_v_h)
4291 GEN_VEXT_V_ENV(vfcvt_f_x_v_w)
4292 GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
4293
4294 /* Widening Floating-Point/Integer Type-Convert Instructions */
4295 /* (TD, T2, TX2) */
4296 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4297 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4298 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4299 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4300 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4301 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4302 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h)
4303 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w)
4304
4305 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4306 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4307 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4308 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h)
4309 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w)
4310
4311 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4312 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4313 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4314 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4315 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b)
4316 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h)
4317 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w)
4318
4319 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4320 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4321 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4322 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4323 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b)
4324 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h)
4325 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w)
4326
4327 /*
4328 * vfwcvt.f.f.v vd, vs2, vm
4329 * Convert single-width float to double-width float.
4330 */
4331 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4332 {
4333 return float16_to_float32(a, true, s);
4334 }
4335
4336 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4337 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4338 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h)
4339 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
4340
4341 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4342 /* (TD, T2, TX2) */
4343 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4344 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4345 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4346 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4347 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4348 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4349 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4350 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b)
4351 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h)
4352 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w)
4353
4354 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4355 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4356 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4357 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4358 GEN_VEXT_V_ENV(vfncvt_x_f_w_b)
4359 GEN_VEXT_V_ENV(vfncvt_x_f_w_h)
4360 GEN_VEXT_V_ENV(vfncvt_x_f_w_w)
4361
4362 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4363 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4364 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4365 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h)
4366 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w)
4367
4368 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4369 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4370 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4371 GEN_VEXT_V_ENV(vfncvt_f_x_w_h)
4372 GEN_VEXT_V_ENV(vfncvt_f_x_w_w)
4373
4374 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4375 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4376 {
4377 return float32_to_float16(a, true, s);
4378 }
4379
4380 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4381 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4382 GEN_VEXT_V_ENV(vfncvt_f_f_w_h)
4383 GEN_VEXT_V_ENV(vfncvt_f_f_w_w)
4384
4385 /*
4386 *** Vector Reduction Operations
4387 */
4388 /* Vector Single-Width Integer Reduction Instructions */
4389 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4390 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4391 void *vs2, CPURISCVState *env, uint32_t desc) \
4392 { \
4393 uint32_t vm = vext_vm(desc); \
4394 uint32_t vl = env->vl; \
4395 uint32_t i; \
4396 TD s1 = *((TD *)vs1 + HD(0)); \
4397 \
4398 for (i = env->vstart; i < vl; i++) { \
4399 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4400 if (!vm && !vext_elem_mask(v0, i)) { \
4401 continue; \
4402 } \
4403 s1 = OP(s1, (TD)s2); \
4404 } \
4405 *((TD *)vd + HD(0)) = s1; \
4406 env->vstart = 0; \
4407 }
4408
4409 /* vd[0] = sum(vs1[0], vs2[*]) */
4410 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4411 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4412 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4413 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4414
4415 /* vd[0] = maxu(vs1[0], vs2[*]) */
4416 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4417 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4418 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4419 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4420
4421 /* vd[0] = max(vs1[0], vs2[*]) */
4422 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4423 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4424 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4425 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4426
4427 /* vd[0] = minu(vs1[0], vs2[*]) */
4428 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4429 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4430 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4431 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4432
4433 /* vd[0] = min(vs1[0], vs2[*]) */
4434 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4435 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4436 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4437 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4438
4439 /* vd[0] = and(vs1[0], vs2[*]) */
4440 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4441 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4442 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4443 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4444
4445 /* vd[0] = or(vs1[0], vs2[*]) */
4446 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4447 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4448 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4449 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4450
4451 /* vd[0] = xor(vs1[0], vs2[*]) */
4452 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4453 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4454 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4455 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4456
4457 /* Vector Widening Integer Reduction Instructions */
4458 /* signed sum reduction into double-width accumulator */
4459 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4460 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4461 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4462
4463 /* Unsigned sum reduction into double-width accumulator */
4464 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4465 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4466 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4467
4468 /* Vector Single-Width Floating-Point Reduction Instructions */
4469 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4470 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4471 void *vs2, CPURISCVState *env, \
4472 uint32_t desc) \
4473 { \
4474 uint32_t vm = vext_vm(desc); \
4475 uint32_t vl = env->vl; \
4476 uint32_t i; \
4477 TD s1 = *((TD *)vs1 + HD(0)); \
4478 \
4479 for (i = env->vstart; i < vl; i++) { \
4480 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4481 if (!vm && !vext_elem_mask(v0, i)) { \
4482 continue; \
4483 } \
4484 s1 = OP(s1, (TD)s2, &env->fp_status); \
4485 } \
4486 *((TD *)vd + HD(0)) = s1; \
4487 env->vstart = 0; \
4488 }
4489
4490 /* Unordered sum */
4491 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4492 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4493 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4494
4495 /* Maximum value */
4496 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4497 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4498 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4499
4500 /* Minimum value */
4501 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4502 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4503 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4504
4505 /* Vector Widening Floating-Point Reduction Instructions */
4506 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4507 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4508 void *vs2, CPURISCVState *env, uint32_t desc)
4509 {
4510 uint32_t vm = vext_vm(desc);
4511 uint32_t vl = env->vl;
4512 uint32_t i;
4513 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4514
4515 for (i = env->vstart; i < vl; i++) {
4516 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4517 if (!vm && !vext_elem_mask(v0, i)) {
4518 continue;
4519 }
4520 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4521 &env->fp_status);
4522 }
4523 *((uint32_t *)vd + H4(0)) = s1;
4524 env->vstart = 0;
4525 }
4526
4527 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4528 void *vs2, CPURISCVState *env, uint32_t desc)
4529 {
4530 uint32_t vm = vext_vm(desc);
4531 uint32_t vl = env->vl;
4532 uint32_t i;
4533 uint64_t s1 = *((uint64_t *)vs1);
4534
4535 for (i = env->vstart; i < vl; i++) {
4536 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4537 if (!vm && !vext_elem_mask(v0, i)) {
4538 continue;
4539 }
4540 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4541 &env->fp_status);
4542 }
4543 *((uint64_t *)vd) = s1;
4544 env->vstart = 0;
4545 }
4546
4547 /*
4548 *** Vector Mask Operations
4549 */
4550 /* Vector Mask-Register Logical Instructions */
4551 #define GEN_VEXT_MASK_VV(NAME, OP) \
4552 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4553 void *vs2, CPURISCVState *env, \
4554 uint32_t desc) \
4555 { \
4556 uint32_t vl = env->vl; \
4557 uint32_t i; \
4558 int a, b; \
4559 \
4560 for (i = env->vstart; i < vl; i++) { \
4561 a = vext_elem_mask(vs1, i); \
4562 b = vext_elem_mask(vs2, i); \
4563 vext_set_elem_mask(vd, i, OP(b, a)); \
4564 } \
4565 env->vstart = 0; \
4566 }
4567
4568 #define DO_NAND(N, M) (!(N & M))
4569 #define DO_ANDNOT(N, M) (N & !M)
4570 #define DO_NOR(N, M) (!(N | M))
4571 #define DO_ORNOT(N, M) (N | !M)
4572 #define DO_XNOR(N, M) (!(N ^ M))
4573
4574 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4575 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4576 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4577 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4578 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4579 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4580 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4581 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4582
4583 /* Vector count population in mask vcpop */
4584 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4585 uint32_t desc)
4586 {
4587 target_ulong cnt = 0;
4588 uint32_t vm = vext_vm(desc);
4589 uint32_t vl = env->vl;
4590 int i;
4591
4592 for (i = env->vstart; i < vl; i++) {
4593 if (vm || vext_elem_mask(v0, i)) {
4594 if (vext_elem_mask(vs2, i)) {
4595 cnt++;
4596 }
4597 }
4598 }
4599 env->vstart = 0;
4600 return cnt;
4601 }
4602
4603 /* vfirst find-first-set mask bit*/
4604 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4605 uint32_t desc)
4606 {
4607 uint32_t vm = vext_vm(desc);
4608 uint32_t vl = env->vl;
4609 int i;
4610
4611 for (i = env->vstart; i < vl; i++) {
4612 if (vm || vext_elem_mask(v0, i)) {
4613 if (vext_elem_mask(vs2, i)) {
4614 return i;
4615 }
4616 }
4617 }
4618 env->vstart = 0;
4619 return -1LL;
4620 }
4621
4622 enum set_mask_type {
4623 ONLY_FIRST = 1,
4624 INCLUDE_FIRST,
4625 BEFORE_FIRST,
4626 };
4627
4628 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4629 uint32_t desc, enum set_mask_type type)
4630 {
4631 uint32_t vm = vext_vm(desc);
4632 uint32_t vl = env->vl;
4633 int i;
4634 bool first_mask_bit = false;
4635
4636 for (i = env->vstart; i < vl; i++) {
4637 if (!vm && !vext_elem_mask(v0, i)) {
4638 continue;
4639 }
4640 /* write a zero to all following active elements */
4641 if (first_mask_bit) {
4642 vext_set_elem_mask(vd, i, 0);
4643 continue;
4644 }
4645 if (vext_elem_mask(vs2, i)) {
4646 first_mask_bit = true;
4647 if (type == BEFORE_FIRST) {
4648 vext_set_elem_mask(vd, i, 0);
4649 } else {
4650 vext_set_elem_mask(vd, i, 1);
4651 }
4652 } else {
4653 if (type == ONLY_FIRST) {
4654 vext_set_elem_mask(vd, i, 0);
4655 } else {
4656 vext_set_elem_mask(vd, i, 1);
4657 }
4658 }
4659 }
4660 env->vstart = 0;
4661 }
4662
4663 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4664 uint32_t desc)
4665 {
4666 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4667 }
4668
4669 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4670 uint32_t desc)
4671 {
4672 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4673 }
4674
4675 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4676 uint32_t desc)
4677 {
4678 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4679 }
4680
4681 /* Vector Iota Instruction */
4682 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4683 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4684 uint32_t desc) \
4685 { \
4686 uint32_t vm = vext_vm(desc); \
4687 uint32_t vl = env->vl; \
4688 uint32_t sum = 0; \
4689 int i; \
4690 \
4691 for (i = env->vstart; i < vl; i++) { \
4692 if (!vm && !vext_elem_mask(v0, i)) { \
4693 continue; \
4694 } \
4695 *((ETYPE *)vd + H(i)) = sum; \
4696 if (vext_elem_mask(vs2, i)) { \
4697 sum++; \
4698 } \
4699 } \
4700 env->vstart = 0; \
4701 }
4702
4703 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4704 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4705 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4706 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4707
4708 /* Vector Element Index Instruction */
4709 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4710 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4711 { \
4712 uint32_t vm = vext_vm(desc); \
4713 uint32_t vl = env->vl; \
4714 int i; \
4715 \
4716 for (i = env->vstart; i < vl; i++) { \
4717 if (!vm && !vext_elem_mask(v0, i)) { \
4718 continue; \
4719 } \
4720 *((ETYPE *)vd + H(i)) = i; \
4721 } \
4722 env->vstart = 0; \
4723 }
4724
4725 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4726 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4727 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4728 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4729
4730 /*
4731 *** Vector Permutation Instructions
4732 */
4733
4734 /* Vector Slide Instructions */
4735 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
4736 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4737 CPURISCVState *env, uint32_t desc) \
4738 { \
4739 uint32_t vm = vext_vm(desc); \
4740 uint32_t vl = env->vl; \
4741 target_ulong offset = s1, i_min, i; \
4742 \
4743 i_min = MAX(env->vstart, offset); \
4744 for (i = i_min; i < vl; i++) { \
4745 if (!vm && !vext_elem_mask(v0, i)) { \
4746 continue; \
4747 } \
4748 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4749 } \
4750 }
4751
4752 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4753 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4754 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4755 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4756 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4757
4758 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
4759 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4760 CPURISCVState *env, uint32_t desc) \
4761 { \
4762 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4763 uint32_t vm = vext_vm(desc); \
4764 uint32_t vl = env->vl; \
4765 target_ulong i_max, i; \
4766 \
4767 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4768 for (i = env->vstart; i < i_max; ++i) { \
4769 if (vm || vext_elem_mask(v0, i)) { \
4770 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4771 } \
4772 } \
4773 \
4774 for (i = i_max; i < vl; ++i) { \
4775 if (vm || vext_elem_mask(v0, i)) { \
4776 *((ETYPE *)vd + H(i)) = 0; \
4777 } \
4778 } \
4779 \
4780 env->vstart = 0; \
4781 }
4782
4783 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4784 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4785 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4786 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4787 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4788
4789 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
4790 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4791 void *vs2, CPURISCVState *env, uint32_t desc) \
4792 { \
4793 typedef uint##BITWIDTH##_t ETYPE; \
4794 uint32_t vm = vext_vm(desc); \
4795 uint32_t vl = env->vl; \
4796 uint32_t i; \
4797 \
4798 for (i = env->vstart; i < vl; i++) { \
4799 if (!vm && !vext_elem_mask(v0, i)) { \
4800 continue; \
4801 } \
4802 if (i == 0) { \
4803 *((ETYPE *)vd + H(i)) = s1; \
4804 } else { \
4805 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4806 } \
4807 } \
4808 env->vstart = 0; \
4809 }
4810
4811 GEN_VEXT_VSLIE1UP(8, H1)
4812 GEN_VEXT_VSLIE1UP(16, H2)
4813 GEN_VEXT_VSLIE1UP(32, H4)
4814 GEN_VEXT_VSLIE1UP(64, H8)
4815
4816 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
4817 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4818 CPURISCVState *env, uint32_t desc) \
4819 { \
4820 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4821 }
4822
4823 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4824 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4825 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4826 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4827 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4828
4829 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
4830 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4831 void *vs2, CPURISCVState *env, uint32_t desc) \
4832 { \
4833 typedef uint##BITWIDTH##_t ETYPE; \
4834 uint32_t vm = vext_vm(desc); \
4835 uint32_t vl = env->vl; \
4836 uint32_t i; \
4837 \
4838 for (i = env->vstart; i < vl; i++) { \
4839 if (!vm && !vext_elem_mask(v0, i)) { \
4840 continue; \
4841 } \
4842 if (i == vl - 1) { \
4843 *((ETYPE *)vd + H(i)) = s1; \
4844 } else { \
4845 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4846 } \
4847 } \
4848 env->vstart = 0; \
4849 }
4850
4851 GEN_VEXT_VSLIDE1DOWN(8, H1)
4852 GEN_VEXT_VSLIDE1DOWN(16, H2)
4853 GEN_VEXT_VSLIDE1DOWN(32, H4)
4854 GEN_VEXT_VSLIDE1DOWN(64, H8)
4855
4856 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
4857 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4858 CPURISCVState *env, uint32_t desc) \
4859 { \
4860 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4861 }
4862
4863 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4864 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4865 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4866 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4867 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4868
4869 /* Vector Floating-Point Slide Instructions */
4870 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
4871 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4872 CPURISCVState *env, uint32_t desc) \
4873 { \
4874 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4875 }
4876
4877 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4878 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4879 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4880 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4881
4882 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
4883 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4884 CPURISCVState *env, uint32_t desc) \
4885 { \
4886 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4887 }
4888
4889 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4890 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4891 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4892 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4893
4894 /* Vector Register Gather Instruction */
4895 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
4896 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4897 CPURISCVState *env, uint32_t desc) \
4898 { \
4899 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
4900 uint32_t vm = vext_vm(desc); \
4901 uint32_t vl = env->vl; \
4902 uint64_t index; \
4903 uint32_t i; \
4904 \
4905 for (i = env->vstart; i < vl; i++) { \
4906 if (!vm && !vext_elem_mask(v0, i)) { \
4907 continue; \
4908 } \
4909 index = *((TS1 *)vs1 + HS1(i)); \
4910 if (index >= vlmax) { \
4911 *((TS2 *)vd + HS2(i)) = 0; \
4912 } else { \
4913 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
4914 } \
4915 } \
4916 env->vstart = 0; \
4917 }
4918
4919 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4920 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4921 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4922 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4923 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4924
4925 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
4926 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4927 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4928 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4929
4930 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
4931 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4932 CPURISCVState *env, uint32_t desc) \
4933 { \
4934 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4935 uint32_t vm = vext_vm(desc); \
4936 uint32_t vl = env->vl; \
4937 uint64_t index = s1; \
4938 uint32_t i; \
4939 \
4940 for (i = env->vstart; i < vl; i++) { \
4941 if (!vm && !vext_elem_mask(v0, i)) { \
4942 continue; \
4943 } \
4944 if (index >= vlmax) { \
4945 *((ETYPE *)vd + H(i)) = 0; \
4946 } else { \
4947 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
4948 } \
4949 } \
4950 env->vstart = 0; \
4951 }
4952
4953 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4954 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
4955 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4956 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4957 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4958
4959 /* Vector Compress Instruction */
4960 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
4961 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4962 CPURISCVState *env, uint32_t desc) \
4963 { \
4964 uint32_t vl = env->vl; \
4965 uint32_t num = 0, i; \
4966 \
4967 for (i = env->vstart; i < vl; i++) { \
4968 if (!vext_elem_mask(vs1, i)) { \
4969 continue; \
4970 } \
4971 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
4972 num++; \
4973 } \
4974 env->vstart = 0; \
4975 }
4976
4977 /* Compress into vd elements of vs2 where vs1 is enabled */
4978 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
4979 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4980 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4981 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4982
4983 /* Vector Whole Register Move */
4984 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
4985 {
4986 /* EEW = SEW */
4987 uint32_t maxsz = simd_maxsz(desc);
4988 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
4989 uint32_t startb = env->vstart * sewb;
4990 uint32_t i = startb;
4991
4992 memcpy((uint8_t *)vd + H1(i),
4993 (uint8_t *)vs2 + H1(i),
4994 maxsz - startb);
4995
4996 env->vstart = 0;
4997 }
4998
4999 /* Vector Integer Extension */
5000 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5001 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5002 CPURISCVState *env, uint32_t desc) \
5003 { \
5004 uint32_t vl = env->vl; \
5005 uint32_t vm = vext_vm(desc); \
5006 uint32_t i; \
5007 \
5008 for (i = env->vstart; i < vl; i++) { \
5009 if (!vm && !vext_elem_mask(v0, i)) { \
5010 continue; \
5011 } \
5012 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5013 } \
5014 env->vstart = 0; \
5015 }
5016
5017 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5018 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5019 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5020 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5021 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5022 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5023
5024 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5025 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5026 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5027 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5028 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5029 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)