]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
target/riscv: rvv-1.0: vf[w]redsum distinguish between ordered/unordered
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc
LZ
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
ce2a0343 26#include "fpu/softfloat.h"
751538d5
LZ
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
2b7168fc
LZ
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 44
d9b7609a
FC
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
2b7168fc 57 /* only set vill bit. */
d96a271a
LZ
58 env->vill = 1;
59 env->vtype = 0;
2b7168fc
LZ
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
ac6bcf4d 74 env->vill = 0;
2b7168fc
LZ
75 return vl;
76}
751538d5
LZ
77
78/*
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
81 */
e03b5686 82#if HOST_BIG_ENDIAN
751538d5
LZ
83#define H1(x) ((x) ^ 7)
84#define H1_2(x) ((x) ^ 6)
85#define H1_4(x) ((x) ^ 4)
86#define H2(x) ((x) ^ 3)
87#define H4(x) ((x) ^ 1)
88#define H8(x) ((x))
89#else
90#define H1(x) (x)
91#define H1_2(x) (x)
92#define H1_4(x) (x)
93#define H2(x) (x)
94#define H4(x) (x)
95#define H8(x) (x)
96#endif
97
98static inline uint32_t vext_nf(uint32_t desc)
99{
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101}
102
751538d5
LZ
103static inline uint32_t vext_vm(uint32_t desc)
104{
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106}
107
33f1beaf
FC
108/*
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
119 */
120static inline int32_t vext_lmul(uint32_t desc)
751538d5 121{
33f1beaf 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
123}
124
f1eed927 125static inline uint32_t vext_vta(uint32_t desc)
126{
127 return FIELD_EX32(simd_data(desc), VDATA, VTA);
128}
129
355d5584
YTC
130static inline uint32_t vext_vma(uint32_t desc)
131{
132 return FIELD_EX32(simd_data(desc), VDATA, VMA);
133}
134
5c19fc15 135static inline uint32_t vext_vta_all_1s(uint32_t desc)
136{
137 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
138}
139
751538d5 140/*
5a9f8e15 141 * Get the maximum number of elements can be operated.
751538d5 142 *
c7b8a421 143 * log2_esz: log2 of element size in bytes.
751538d5 144 */
c7b8a421 145static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 146{
5a9f8e15 147 /*
8a4b5257 148 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
149 * so vlen in bytes (vlenb) is encoded as maxsz.
150 */
151 uint32_t vlenb = simd_maxsz(desc);
152
153 /* Return VLMAX */
c7b8a421 154 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 155 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
156}
157
f1eed927 158/*
159 * Get number of total elements, including prestart, body and tail elements.
160 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
161 * are held in the same vector register.
162 */
163static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
164 uint32_t esz)
165{
166 uint32_t vlenb = simd_maxsz(desc);
167 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
168 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
169 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
170 return (vlenb << emul) / esz;
171}
172
d6b9d930
LZ
173static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
174{
175 return (addr & env->cur_pmmask) | env->cur_pmbase;
176}
177
751538d5
LZ
178/*
179 * This function checks watchpoint before real load operation.
180 *
181 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
182 * In user mode, there is no watchpoint support now.
183 *
184 * It will trigger an exception if there is no mapping in TLB
185 * and page table walk can't fill the TLB entry. Then the guest
186 * software can return here after process the exception or never return.
187 */
188static void probe_pages(CPURISCVState *env, target_ulong addr,
189 target_ulong len, uintptr_t ra,
190 MMUAccessType access_type)
191{
192 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
193 target_ulong curlen = MIN(pagelen, len);
194
d6b9d930 195 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
196 cpu_mmu_index(env, false), ra);
197 if (len > curlen) {
198 addr += curlen;
199 curlen = len - curlen;
d6b9d930 200 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
201 cpu_mmu_index(env, false), ra);
202 }
203}
204
f1eed927 205/* set agnostic elements to 1s */
206static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
207 uint32_t tot)
208{
209 if (is_agnostic == 0) {
210 /* policy undisturbed */
211 return;
212 }
213 if (tot - cnt == 0) {
214 return ;
215 }
216 memset(base + cnt, -1, tot - cnt);
217}
218
f9298de5
FC
219static inline void vext_set_elem_mask(void *v0, int index,
220 uint8_t value)
3a6f8f68 221{
f9298de5
FC
222 int idx = index / 64;
223 int pos = index % 64;
3a6f8f68 224 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 225 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 226}
751538d5 227
f9298de5
FC
228/*
229 * Earlier designs (pre-0.9) had a varying number of bits
230 * per mask value (MLEN). In the 0.9 design, MLEN=1.
231 * (Section 4.5)
232 */
233static inline int vext_elem_mask(void *v0, int index)
751538d5 234{
f9298de5
FC
235 int idx = index / 64;
236 int pos = index % 64;
751538d5
LZ
237 return (((uint64_t *)v0)[idx] >> pos) & 1;
238}
239
240/* elements operations for load and store */
241typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
242 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 243
79556fb6 244#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
245static void NAME(CPURISCVState *env, abi_ptr addr, \
246 uint32_t idx, void *vd, uintptr_t retaddr)\
247{ \
751538d5 248 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 249 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
250} \
251
79556fb6
FC
252GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
253GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
254GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
255GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
256
257#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
258static void NAME(CPURISCVState *env, abi_ptr addr, \
259 uint32_t idx, void *vd, uintptr_t retaddr)\
260{ \
261 ETYPE data = *((ETYPE *)vd + H(idx)); \
262 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
263}
264
751538d5
LZ
265GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
266GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
267GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
268GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
269
270/*
271 *** stride: access vector element from strided memory
272 */
273static void
274vext_ldst_stride(void *vd, void *v0, target_ulong base,
275 target_ulong stride, CPURISCVState *env,
276 uint32_t desc, uint32_t vm,
3479a814 277 vext_ldst_elem_fn *ldst_elem,
c7b8a421 278 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
279{
280 uint32_t i, k;
281 uint32_t nf = vext_nf(desc);
c7b8a421 282 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 283 uint32_t esz = 1 << log2_esz;
284 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
285 uint32_t vta = vext_vta(desc);
265ecd4c 286 uint32_t vma = vext_vma(desc);
751538d5 287
f714361e 288 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
751538d5 289 k = 0;
751538d5 290 while (k < nf) {
265ecd4c
YTC
291 if (!vm && !vext_elem_mask(v0, i)) {
292 /* set masked-off elements to 1s */
293 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
294 (i + k * max_elems + 1) * esz);
295 k++;
296 continue;
297 }
c7b8a421 298 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 299 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
300 k++;
301 }
302 }
f714361e 303 env->vstart = 0;
752614ca 304 /* set tail elements to 1s */
305 for (k = 0; k < nf; ++k) {
306 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
307 (k * max_elems + max_elems) * esz);
308 }
309 if (nf * max_elems % total_elems != 0) {
310 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
311 uint32_t registers_used =
312 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
313 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
314 registers_used * vlenb);
315 }
751538d5
LZ
316}
317
79556fb6 318#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
319void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
320 target_ulong stride, CPURISCVState *env, \
321 uint32_t desc) \
322{ \
323 uint32_t vm = vext_vm(desc); \
324 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 325 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
326}
327
79556fb6
FC
328GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
329GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
330GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
331GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
332
333#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
334void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
335 target_ulong stride, CPURISCVState *env, \
336 uint32_t desc) \
337{ \
338 uint32_t vm = vext_vm(desc); \
339 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 340 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
341}
342
79556fb6
FC
343GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
344GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
345GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
346GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
347
348/*
349 *** unit-stride: access elements stored contiguously in memory
350 */
351
352/* unmasked unit-stride load and store operation*/
353static void
354vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 355 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 356 uintptr_t ra)
751538d5
LZ
357{
358 uint32_t i, k;
359 uint32_t nf = vext_nf(desc);
c7b8a421 360 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 361 uint32_t esz = 1 << log2_esz;
362 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
363 uint32_t vta = vext_vta(desc);
751538d5 364
751538d5 365 /* load bytes from guest memory */
5c89e9c0 366 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
367 k = 0;
368 while (k < nf) {
c7b8a421 369 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 370 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
371 k++;
372 }
373 }
f714361e 374 env->vstart = 0;
752614ca 375 /* set tail elements to 1s */
376 for (k = 0; k < nf; ++k) {
377 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz,
378 (k * max_elems + max_elems) * esz);
379 }
380 if (nf * max_elems % total_elems != 0) {
381 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
382 uint32_t registers_used =
383 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
384 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
385 registers_used * vlenb);
386 }
751538d5
LZ
387}
388
389/*
390 * masked unit-stride load and store operation will be a special case of stride,
391 * stride = NF * sizeof (MTYPE)
392 */
393
79556fb6 394#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
395void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
396 CPURISCVState *env, uint32_t desc) \
397{ \
5a9f8e15 398 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 399 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 400 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
401} \
402 \
403void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
404 CPURISCVState *env, uint32_t desc) \
405{ \
3479a814 406 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 407 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
408}
409
79556fb6
FC
410GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
411GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
412GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
413GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
414
5c89e9c0
FC
415#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
416void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
417 CPURISCVState *env, uint32_t desc) \
418{ \
419 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
420 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 421 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
422} \
423 \
424void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
425 CPURISCVState *env, uint32_t desc) \
426{ \
427 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 428 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
429}
430
79556fb6
FC
431GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
432GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
433GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
434GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 435
26086aea
FC
436/*
437 *** unit stride mask load and store, EEW = 1
438 */
439void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
440 CPURISCVState *env, uint32_t desc)
441{
442 /* evl = ceil(vl/8) */
443 uint8_t evl = (env->vl + 7) >> 3;
444 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 445 0, evl, GETPC());
26086aea
FC
446}
447
448void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
449 CPURISCVState *env, uint32_t desc)
450{
451 /* evl = ceil(vl/8) */
452 uint8_t evl = (env->vl + 7) >> 3;
453 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 454 0, evl, GETPC());
26086aea
FC
455}
456
f732560e
LZ
457/*
458 *** index: access vector element from indexed memory
459 */
460typedef target_ulong vext_get_index_addr(target_ulong base,
461 uint32_t idx, void *vs2);
462
463#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
464static target_ulong NAME(target_ulong base, \
465 uint32_t idx, void *vs2) \
466{ \
467 return (base + *((ETYPE *)vs2 + H(idx))); \
468}
469
83fcd573
FC
470GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
471GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
472GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
473GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
474
475static inline void
476vext_ldst_index(void *vd, void *v0, target_ulong base,
477 void *vs2, CPURISCVState *env, uint32_t desc,
478 vext_get_index_addr get_index_addr,
479 vext_ldst_elem_fn *ldst_elem,
c7b8a421 480 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
481{
482 uint32_t i, k;
483 uint32_t nf = vext_nf(desc);
484 uint32_t vm = vext_vm(desc);
c7b8a421 485 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 486 uint32_t esz = 1 << log2_esz;
487 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
488 uint32_t vta = vext_vta(desc);
265ecd4c 489 uint32_t vma = vext_vma(desc);
f732560e 490
f732560e 491 /* load bytes from guest memory */
f714361e 492 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f714361e 493 k = 0;
f732560e 494 while (k < nf) {
265ecd4c
YTC
495 if (!vm && !vext_elem_mask(v0, i)) {
496 /* set masked-off elements to 1s */
497 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
498 (i + k * max_elems + 1) * esz);
499 k++;
500 continue;
501 }
c7b8a421 502 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 503 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
504 k++;
505 }
506 }
f714361e 507 env->vstart = 0;
752614ca 508 /* set tail elements to 1s */
509 for (k = 0; k < nf; ++k) {
510 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
511 (k * max_elems + max_elems) * esz);
512 }
513 if (nf * max_elems % total_elems != 0) {
514 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
515 uint32_t registers_used =
516 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
517 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
518 registers_used * vlenb);
519 }
f732560e
LZ
520}
521
08b9d0ed 522#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
523void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
524 void *vs2, CPURISCVState *env, uint32_t desc) \
525{ \
526 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 527 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
528}
529
08b9d0ed
FC
530GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
531GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
532GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
533GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
534GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
535GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
536GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
537GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
538GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
539GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
540GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
541GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
542GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
543GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
544GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
545GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
546
547#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
548void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
549 void *vs2, CPURISCVState *env, uint32_t desc) \
550{ \
551 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 552 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 553 GETPC()); \
f732560e
LZ
554}
555
08b9d0ed
FC
556GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
557GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
558GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
559GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
560GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
561GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
562GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
563GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
564GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
565GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
566GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
567GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
568GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
569GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
570GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
571GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
572
573/*
574 *** unit-stride fault-only-fisrt load instructions
575 */
576static inline void
577vext_ldff(void *vd, void *v0, target_ulong base,
578 CPURISCVState *env, uint32_t desc,
579 vext_ldst_elem_fn *ldst_elem,
c7b8a421 580 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
581{
582 void *host;
583 uint32_t i, k, vl = 0;
022b4ecf
LZ
584 uint32_t nf = vext_nf(desc);
585 uint32_t vm = vext_vm(desc);
c7b8a421 586 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 587 uint32_t esz = 1 << log2_esz;
588 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
589 uint32_t vta = vext_vta(desc);
265ecd4c 590 uint32_t vma = vext_vma(desc);
022b4ecf
LZ
591 target_ulong addr, offset, remain;
592
593 /* probe every access*/
f714361e 594 for (i = env->vstart; i < env->vl; i++) {
f9298de5 595 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
596 continue;
597 }
c7b8a421 598 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 599 if (i == 0) {
c7b8a421 600 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
601 } else {
602 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 603 remain = nf << log2_esz;
022b4ecf
LZ
604 while (remain > 0) {
605 offset = -(addr | TARGET_PAGE_MASK);
606 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
607 cpu_mmu_index(env, false));
608 if (host) {
609#ifdef CONFIG_USER_ONLY
01d09525 610 if (page_check_range(addr, offset, PAGE_READ) < 0) {
022b4ecf
LZ
611 vl = i;
612 goto ProbeSuccess;
613 }
614#else
01d09525 615 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
616#endif
617 } else {
618 vl = i;
619 goto ProbeSuccess;
620 }
621 if (remain <= offset) {
622 break;
623 }
624 remain -= offset;
d6b9d930 625 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
626 }
627 }
628 }
629ProbeSuccess:
630 /* load bytes from guest memory */
631 if (vl != 0) {
632 env->vl = vl;
633 }
f714361e 634 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 635 k = 0;
022b4ecf 636 while (k < nf) {
265ecd4c
YTC
637 if (!vm && !vext_elem_mask(v0, i)) {
638 /* set masked-off elements to 1s */
639 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
640 (i + k * max_elems + 1) * esz);
641 k++;
642 continue;
643 }
c7b8a421 644 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 645 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
646 k++;
647 }
648 }
f714361e 649 env->vstart = 0;
752614ca 650 /* set tail elements to 1s */
651 for (k = 0; k < nf; ++k) {
652 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
653 (k * max_elems + max_elems) * esz);
654 }
655 if (nf * max_elems % total_elems != 0) {
656 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
657 uint32_t registers_used =
658 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
659 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
660 registers_used * vlenb);
661 }
022b4ecf
LZ
662}
663
d3e5e2ff
FC
664#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
665void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
666 CPURISCVState *env, uint32_t desc) \
667{ \
668 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 669 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
670}
671
d3e5e2ff
FC
672GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
673GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
674GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
675GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 676
268fcca6
LZ
677#define DO_SWAP(N, M) (M)
678#define DO_AND(N, M) (N & M)
679#define DO_XOR(N, M) (N ^ M)
680#define DO_OR(N, M) (N | M)
681#define DO_ADD(N, M) (N + M)
682
268fcca6
LZ
683/* Signed min/max */
684#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
685#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
686
687/* Unsigned min/max */
688#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
689#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
690
30206bd8
FC
691/*
692 *** load and store whole register instructions
693 */
694static void
695vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 696 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 697{
f714361e 698 uint32_t i, k, off, pos;
30206bd8
FC
699 uint32_t nf = vext_nf(desc);
700 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
c7b8a421 701 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 702
f714361e
FC
703 k = env->vstart / max_elems;
704 off = env->vstart % max_elems;
30206bd8 705
f714361e
FC
706 if (off) {
707 /* load/store rest of elements of current segment pointed by vstart */
708 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 709 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
d6b9d930 710 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
f714361e
FC
711 }
712 k++;
713 }
714
715 /* load/store elements for rest of segments */
716 for (; k < nf; k++) {
717 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 718 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 719 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
720 }
721 }
f714361e
FC
722
723 env->vstart = 0;
30206bd8
FC
724}
725
726#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
727void HELPER(NAME)(void *vd, target_ulong base, \
728 CPURISCVState *env, uint32_t desc) \
729{ \
730 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 731 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
732}
733
734GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
735GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
736GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
737GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
738GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
739GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
740GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
741GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
742GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
743GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
744GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
745GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
746GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
747GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
748GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
749GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
750
751#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
752void HELPER(NAME)(void *vd, target_ulong base, \
753 CPURISCVState *env, uint32_t desc) \
754{ \
755 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 756 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
757}
758
759GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
760GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
761GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
762GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
763
43740e3a
LZ
764/*
765 *** Vector Integer Arithmetic Instructions
766 */
767
768/* expand macro args before macro */
769#define RVVCALL(macro, ...) macro(__VA_ARGS__)
770
771/* (TD, T1, T2, TX1, TX2) */
772#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
773#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
774#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
775#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
776#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
777#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
778#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
779#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
780#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
781#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
782#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
783#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
784#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
785#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
786#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
787#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
788#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
789#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
790#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
791#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
792#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
793#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
794#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
795#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
796#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
797#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
798#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
799#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
800#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
801#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
802
803/* operation of two vector elements */
804typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
805
806#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
807static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
808{ \
809 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
810 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
811 *((TD *)vd + HD(i)) = OP(s2, s1); \
812}
813#define DO_SUB(N, M) (N - M)
814#define DO_RSUB(N, M) (M - N)
815
816RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
817RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
818RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
819RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
820RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
821RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
822RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
823RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
824
825static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
826 CPURISCVState *env, uint32_t desc,
f1eed927 827 opivv2_fn *fn, uint32_t esz)
43740e3a 828{
43740e3a
LZ
829 uint32_t vm = vext_vm(desc);
830 uint32_t vl = env->vl;
f1eed927 831 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
832 uint32_t vta = vext_vta(desc);
355d5584 833 uint32_t vma = vext_vma(desc);
43740e3a
LZ
834 uint32_t i;
835
f714361e 836 for (i = env->vstart; i < vl; i++) {
f9298de5 837 if (!vm && !vext_elem_mask(v0, i)) {
355d5584
YTC
838 /* set masked-off elements to 1s */
839 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
840 continue;
841 }
842 fn(vd, vs1, vs2, i);
843 }
f714361e 844 env->vstart = 0;
f1eed927 845 /* set tail elements to 1s */
846 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
847}
848
849/* generate the helpers for OPIVV */
f1eed927 850#define GEN_VEXT_VV(NAME, ESZ) \
43740e3a
LZ
851void HELPER(NAME)(void *vd, void *v0, void *vs1, \
852 void *vs2, CPURISCVState *env, \
853 uint32_t desc) \
854{ \
8a085fb2 855 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
f1eed927 856 do_##NAME, ESZ); \
43740e3a
LZ
857}
858
f1eed927 859GEN_VEXT_VV(vadd_vv_b, 1)
860GEN_VEXT_VV(vadd_vv_h, 2)
861GEN_VEXT_VV(vadd_vv_w, 4)
862GEN_VEXT_VV(vadd_vv_d, 8)
863GEN_VEXT_VV(vsub_vv_b, 1)
864GEN_VEXT_VV(vsub_vv_h, 2)
865GEN_VEXT_VV(vsub_vv_w, 4)
866GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a
LZ
867
868typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
869
870/*
871 * (T1)s1 gives the real operator type.
872 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
873 */
874#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
875static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
876{ \
877 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
878 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
879}
880
881RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
882RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
883RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
884RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
885RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
886RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
887RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
888RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
889RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
890RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
891RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
892RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
893
894static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
895 CPURISCVState *env, uint32_t desc,
5c19fc15 896 opivx2_fn fn, uint32_t esz)
43740e3a 897{
43740e3a
LZ
898 uint32_t vm = vext_vm(desc);
899 uint32_t vl = env->vl;
5c19fc15 900 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
901 uint32_t vta = vext_vta(desc);
bce9a636 902 uint32_t vma = vext_vma(desc);
43740e3a
LZ
903 uint32_t i;
904
f714361e 905 for (i = env->vstart; i < vl; i++) {
f9298de5 906 if (!vm && !vext_elem_mask(v0, i)) {
bce9a636
YTC
907 /* set masked-off elements to 1s */
908 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
909 continue;
910 }
911 fn(vd, s1, vs2, i);
912 }
f714361e 913 env->vstart = 0;
5c19fc15 914 /* set tail elements to 1s */
915 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
916}
917
918/* generate the helpers for OPIVX */
5c19fc15 919#define GEN_VEXT_VX(NAME, ESZ) \
43740e3a
LZ
920void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
921 void *vs2, CPURISCVState *env, \
922 uint32_t desc) \
923{ \
8a085fb2 924 do_vext_vx(vd, v0, s1, vs2, env, desc, \
5c19fc15 925 do_##NAME, ESZ); \
3479a814
FC
926}
927
5c19fc15 928GEN_VEXT_VX(vadd_vx_b, 1)
929GEN_VEXT_VX(vadd_vx_h, 2)
930GEN_VEXT_VX(vadd_vx_w, 4)
931GEN_VEXT_VX(vadd_vx_d, 8)
932GEN_VEXT_VX(vsub_vx_b, 1)
933GEN_VEXT_VX(vsub_vx_h, 2)
934GEN_VEXT_VX(vsub_vx_w, 4)
935GEN_VEXT_VX(vsub_vx_d, 8)
936GEN_VEXT_VX(vrsub_vx_b, 1)
937GEN_VEXT_VX(vrsub_vx_h, 2)
938GEN_VEXT_VX(vrsub_vx_w, 4)
939GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
940
941void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
942{
943 intptr_t oprsz = simd_oprsz(desc);
944 intptr_t i;
945
946 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
947 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
948 }
949}
950
951void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
952{
953 intptr_t oprsz = simd_oprsz(desc);
954 intptr_t i;
955
956 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
957 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
958 }
959}
960
961void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
962{
963 intptr_t oprsz = simd_oprsz(desc);
964 intptr_t i;
965
966 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
967 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
968 }
969}
970
971void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
972{
973 intptr_t oprsz = simd_oprsz(desc);
974 intptr_t i;
975
976 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
977 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
978 }
979}
8fcdf776
LZ
980
981/* Vector Widening Integer Add/Subtract */
982#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
983#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
984#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
985#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
986#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
987#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
988#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
989#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
990#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
991#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
992#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
993#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
994RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
995RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
996RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
997RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
998RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
999RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1000RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1001RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1002RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1003RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1004RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1005RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1006RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1007RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1008RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1009RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1010RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1011RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1012RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1013RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1014RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1015RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1016RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1017RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 1018GEN_VEXT_VV(vwaddu_vv_b, 2)
1019GEN_VEXT_VV(vwaddu_vv_h, 4)
1020GEN_VEXT_VV(vwaddu_vv_w, 8)
1021GEN_VEXT_VV(vwsubu_vv_b, 2)
1022GEN_VEXT_VV(vwsubu_vv_h, 4)
1023GEN_VEXT_VV(vwsubu_vv_w, 8)
1024GEN_VEXT_VV(vwadd_vv_b, 2)
1025GEN_VEXT_VV(vwadd_vv_h, 4)
1026GEN_VEXT_VV(vwadd_vv_w, 8)
1027GEN_VEXT_VV(vwsub_vv_b, 2)
1028GEN_VEXT_VV(vwsub_vv_h, 4)
1029GEN_VEXT_VV(vwsub_vv_w, 8)
1030GEN_VEXT_VV(vwaddu_wv_b, 2)
1031GEN_VEXT_VV(vwaddu_wv_h, 4)
1032GEN_VEXT_VV(vwaddu_wv_w, 8)
1033GEN_VEXT_VV(vwsubu_wv_b, 2)
1034GEN_VEXT_VV(vwsubu_wv_h, 4)
1035GEN_VEXT_VV(vwsubu_wv_w, 8)
1036GEN_VEXT_VV(vwadd_wv_b, 2)
1037GEN_VEXT_VV(vwadd_wv_h, 4)
1038GEN_VEXT_VV(vwadd_wv_w, 8)
1039GEN_VEXT_VV(vwsub_wv_b, 2)
1040GEN_VEXT_VV(vwsub_wv_h, 4)
1041GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
1042
1043RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1044RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1045RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1046RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1047RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1048RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1049RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1050RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1051RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1052RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1053RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1054RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1055RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1056RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1057RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1058RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1059RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1060RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1061RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1062RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1063RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1064RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1065RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1066RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 1067GEN_VEXT_VX(vwaddu_vx_b, 2)
1068GEN_VEXT_VX(vwaddu_vx_h, 4)
1069GEN_VEXT_VX(vwaddu_vx_w, 8)
1070GEN_VEXT_VX(vwsubu_vx_b, 2)
1071GEN_VEXT_VX(vwsubu_vx_h, 4)
1072GEN_VEXT_VX(vwsubu_vx_w, 8)
1073GEN_VEXT_VX(vwadd_vx_b, 2)
1074GEN_VEXT_VX(vwadd_vx_h, 4)
1075GEN_VEXT_VX(vwadd_vx_w, 8)
1076GEN_VEXT_VX(vwsub_vx_b, 2)
1077GEN_VEXT_VX(vwsub_vx_h, 4)
1078GEN_VEXT_VX(vwsub_vx_w, 8)
1079GEN_VEXT_VX(vwaddu_wx_b, 2)
1080GEN_VEXT_VX(vwaddu_wx_h, 4)
1081GEN_VEXT_VX(vwaddu_wx_w, 8)
1082GEN_VEXT_VX(vwsubu_wx_b, 2)
1083GEN_VEXT_VX(vwsubu_wx_h, 4)
1084GEN_VEXT_VX(vwsubu_wx_w, 8)
1085GEN_VEXT_VX(vwadd_wx_b, 2)
1086GEN_VEXT_VX(vwadd_wx_h, 4)
1087GEN_VEXT_VX(vwadd_wx_w, 8)
1088GEN_VEXT_VX(vwsub_wx_b, 2)
1089GEN_VEXT_VX(vwsub_wx_h, 4)
1090GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
1091
1092/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1093#define DO_VADC(N, M, C) (N + M + C)
1094#define DO_VSBC(N, M, C) (N - M - C)
1095
3479a814 1096#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1097void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1098 CPURISCVState *env, uint32_t desc) \
1099{ \
3a6f8f68 1100 uint32_t vl = env->vl; \
5c19fc15 1101 uint32_t esz = sizeof(ETYPE); \
1102 uint32_t total_elems = \
1103 vext_get_total_elems(env, desc, esz); \
1104 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1105 uint32_t i; \
1106 \
f714361e 1107 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1108 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1109 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1110 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1111 \
1112 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1113 } \
f714361e 1114 env->vstart = 0; \
5c19fc15 1115 /* set tail elements to 1s */ \
1116 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1117}
1118
3479a814
FC
1119GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1120GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1121GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1122GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1123
3479a814
FC
1124GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1125GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1126GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1127GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 1128
3479a814 1129#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1130void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1131 CPURISCVState *env, uint32_t desc) \
1132{ \
3a6f8f68 1133 uint32_t vl = env->vl; \
5c19fc15 1134 uint32_t esz = sizeof(ETYPE); \
1135 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1136 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1137 uint32_t i; \
1138 \
f714361e 1139 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1140 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1141 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1142 \
1143 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1144 } \
f714361e 1145 env->vstart = 0; \
5c19fc15 1146 /* set tail elements to 1s */ \
1147 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1148}
1149
3479a814
FC
1150GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1151GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1152GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1153GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1154
3479a814
FC
1155GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1156GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1157GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1158GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
1159
1160#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1161 (__typeof(N))(N + M) < N)
1162#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1163
1164#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1165void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1166 CPURISCVState *env, uint32_t desc) \
1167{ \
3a6f8f68 1168 uint32_t vl = env->vl; \
bb45485a 1169 uint32_t vm = vext_vm(desc); \
5c19fc15 1170 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1171 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1172 uint32_t i; \
1173 \
f714361e 1174 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1175 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1176 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1177 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1178 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1179 } \
f714361e 1180 env->vstart = 0; \
5c19fc15 1181 /* mask destination register are always tail-agnostic */ \
1182 /* set tail elements to 1s */ \
1183 if (vta_all_1s) { \
1184 for (; i < total_elems; i++) { \
1185 vext_set_elem_mask(vd, i, 1); \
1186 } \
1187 } \
3a6f8f68
LZ
1188}
1189
1190GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1191GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1192GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1193GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1194
1195GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1196GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1197GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1198GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1199
1200#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1201void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1202 void *vs2, CPURISCVState *env, uint32_t desc) \
1203{ \
3a6f8f68 1204 uint32_t vl = env->vl; \
bb45485a 1205 uint32_t vm = vext_vm(desc); \
5c19fc15 1206 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1207 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1208 uint32_t i; \
1209 \
f714361e 1210 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1211 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1212 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1213 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1214 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1215 } \
f714361e 1216 env->vstart = 0; \
5c19fc15 1217 /* mask destination register are always tail-agnostic */ \
1218 /* set tail elements to 1s */ \
1219 if (vta_all_1s) { \
1220 for (; i < total_elems; i++) { \
1221 vext_set_elem_mask(vd, i, 1); \
1222 } \
1223 } \
3a6f8f68
LZ
1224}
1225
1226GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1227GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1228GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1229GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1230
1231GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1232GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1233GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1234GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1235
1236/* Vector Bitwise Logical Instructions */
1237RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1238RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1239RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1240RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1241RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1242RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1243RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1244RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1245RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1246RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1247RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1248RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1249GEN_VEXT_VV(vand_vv_b, 1)
1250GEN_VEXT_VV(vand_vv_h, 2)
1251GEN_VEXT_VV(vand_vv_w, 4)
1252GEN_VEXT_VV(vand_vv_d, 8)
1253GEN_VEXT_VV(vor_vv_b, 1)
1254GEN_VEXT_VV(vor_vv_h, 2)
1255GEN_VEXT_VV(vor_vv_w, 4)
1256GEN_VEXT_VV(vor_vv_d, 8)
1257GEN_VEXT_VV(vxor_vv_b, 1)
1258GEN_VEXT_VV(vxor_vv_h, 2)
1259GEN_VEXT_VV(vxor_vv_w, 4)
1260GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1261
1262RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1263RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1264RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1265RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1266RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1267RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1268RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1269RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1270RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1271RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1272RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1273RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1274GEN_VEXT_VX(vand_vx_b, 1)
1275GEN_VEXT_VX(vand_vx_h, 2)
1276GEN_VEXT_VX(vand_vx_w, 4)
1277GEN_VEXT_VX(vand_vx_d, 8)
1278GEN_VEXT_VX(vor_vx_b, 1)
1279GEN_VEXT_VX(vor_vx_h, 2)
1280GEN_VEXT_VX(vor_vx_w, 4)
1281GEN_VEXT_VX(vor_vx_d, 8)
1282GEN_VEXT_VX(vxor_vx_b, 1)
1283GEN_VEXT_VX(vxor_vx_h, 2)
1284GEN_VEXT_VX(vxor_vx_w, 4)
1285GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1286
1287/* Vector Single-Width Bit Shift Instructions */
1288#define DO_SLL(N, M) (N << (M))
1289#define DO_SRL(N, M) (N >> (M))
1290
1291/* generate the helpers for shift instructions with two vector operators */
3479a814 1292#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1293void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1294 void *vs2, CPURISCVState *env, uint32_t desc) \
1295{ \
3277d955
LZ
1296 uint32_t vm = vext_vm(desc); \
1297 uint32_t vl = env->vl; \
7b1bff41 1298 uint32_t esz = sizeof(TS1); \
1299 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1300 uint32_t vta = vext_vta(desc); \
fd93045e 1301 uint32_t vma = vext_vma(desc); \
3277d955
LZ
1302 uint32_t i; \
1303 \
f714361e 1304 for (i = env->vstart; i < vl; i++) { \
f9298de5 1305 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1306 /* set masked-off elements to 1s */ \
1307 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
3277d955
LZ
1308 continue; \
1309 } \
1310 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1311 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1312 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1313 } \
f714361e 1314 env->vstart = 0; \
7b1bff41 1315 /* set tail elements to 1s */ \
1316 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1317}
1318
3479a814
FC
1319GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1320GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1321GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1322GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1323
3479a814
FC
1324GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1325GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1326GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1327GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1328
3479a814
FC
1329GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1330GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1331GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1332GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955
LZ
1333
1334/* generate the helpers for shift instructions with one vector and one scalar */
3479a814
FC
1335#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1336void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1337 void *vs2, CPURISCVState *env, uint32_t desc) \
1338{ \
1339 uint32_t vm = vext_vm(desc); \
1340 uint32_t vl = env->vl; \
7b1bff41 1341 uint32_t esz = sizeof(TD); \
1342 uint32_t total_elems = \
1343 vext_get_total_elems(env, desc, esz); \
1344 uint32_t vta = vext_vta(desc); \
fd93045e 1345 uint32_t vma = vext_vma(desc); \
3479a814
FC
1346 uint32_t i; \
1347 \
f714361e 1348 for (i = env->vstart; i < vl; i++) { \
3479a814 1349 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1350 /* set masked-off elements to 1s */ \
1351 vext_set_elems_1s(vd, vma, i * esz, \
1352 (i + 1) * esz); \
3479a814
FC
1353 continue; \
1354 } \
1355 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1356 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1357 } \
f714361e 1358 env->vstart = 0; \
7b1bff41 1359 /* set tail elements to 1s */ \
1360 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1361}
1362
1363GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1364GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1365GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1366GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1367
1368GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1369GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1370GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1371GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1372
1373GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1374GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1375GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1376GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1377
1378/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1379GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1380GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1381GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1382GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1383GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1384GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1385GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1386GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1387GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1388GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1389GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1390GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1391
1392/* Vector Integer Comparison Instructions */
1393#define DO_MSEQ(N, M) (N == M)
1394#define DO_MSNE(N, M) (N != M)
1395#define DO_MSLT(N, M) (N < M)
1396#define DO_MSLE(N, M) (N <= M)
1397#define DO_MSGT(N, M) (N > M)
1398
1399#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1400void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1401 CPURISCVState *env, uint32_t desc) \
1402{ \
1366fc79
LZ
1403 uint32_t vm = vext_vm(desc); \
1404 uint32_t vl = env->vl; \
38581e5c 1405 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1406 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1407 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1408 uint32_t i; \
1409 \
f714361e 1410 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1411 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1412 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1413 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1414 /* set masked-off elements to 1s */ \
1415 if (vma) { \
1416 vext_set_elem_mask(vd, i, 1); \
1417 } \
1366fc79
LZ
1418 continue; \
1419 } \
f9298de5 1420 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1421 } \
f714361e 1422 env->vstart = 0; \
38581e5c 1423 /* mask destination register are always tail-agnostic */ \
1424 /* set tail elements to 1s */ \
1425 if (vta_all_1s) { \
1426 for (; i < total_elems; i++) { \
1427 vext_set_elem_mask(vd, i, 1); \
1428 } \
1429 } \
1366fc79
LZ
1430}
1431
1432GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1433GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1434GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1435GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1436
1437GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1438GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1439GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1440GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1441
1442GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1443GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1444GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1445GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1446
1447GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1448GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1449GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1450GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1451
1452GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1453GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1454GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1455GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1456
1457GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1458GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1459GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1460GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1461
1462#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1463void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1464 CPURISCVState *env, uint32_t desc) \
1465{ \
1366fc79
LZ
1466 uint32_t vm = vext_vm(desc); \
1467 uint32_t vl = env->vl; \
38581e5c 1468 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1469 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1470 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1471 uint32_t i; \
1472 \
f714361e 1473 for (i = env->vstart; i < vl; i++) { \
1366fc79 1474 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1475 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1476 /* set masked-off elements to 1s */ \
1477 if (vma) { \
1478 vext_set_elem_mask(vd, i, 1); \
1479 } \
1366fc79
LZ
1480 continue; \
1481 } \
f9298de5 1482 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1483 DO_OP(s2, (ETYPE)(target_long)s1)); \
1484 } \
f714361e 1485 env->vstart = 0; \
38581e5c 1486 /* mask destination register are always tail-agnostic */ \
1487 /* set tail elements to 1s */ \
1488 if (vta_all_1s) { \
1489 for (; i < total_elems; i++) { \
1490 vext_set_elem_mask(vd, i, 1); \
1491 } \
1492 } \
1366fc79
LZ
1493}
1494
1495GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1496GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1497GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1498GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1499
1500GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1501GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1502GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1503GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1504
1505GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1506GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1507GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1508GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1509
1510GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1511GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1512GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1513GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1514
1515GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1516GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1517GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1518GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1519
1520GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1521GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1522GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1523GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1524
1525GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1526GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1527GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1528GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1529
1530GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1531GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1532GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1533GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1534
1535/* Vector Integer Min/Max Instructions */
1536RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1537RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1538RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1539RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1540RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1541RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1542RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1543RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1544RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1545RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1546RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1547RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1548RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1549RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1550RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1551RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1552GEN_VEXT_VV(vminu_vv_b, 1)
1553GEN_VEXT_VV(vminu_vv_h, 2)
1554GEN_VEXT_VV(vminu_vv_w, 4)
1555GEN_VEXT_VV(vminu_vv_d, 8)
1556GEN_VEXT_VV(vmin_vv_b, 1)
1557GEN_VEXT_VV(vmin_vv_h, 2)
1558GEN_VEXT_VV(vmin_vv_w, 4)
1559GEN_VEXT_VV(vmin_vv_d, 8)
1560GEN_VEXT_VV(vmaxu_vv_b, 1)
1561GEN_VEXT_VV(vmaxu_vv_h, 2)
1562GEN_VEXT_VV(vmaxu_vv_w, 4)
1563GEN_VEXT_VV(vmaxu_vv_d, 8)
1564GEN_VEXT_VV(vmax_vv_b, 1)
1565GEN_VEXT_VV(vmax_vv_h, 2)
1566GEN_VEXT_VV(vmax_vv_w, 4)
1567GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1568
1569RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1570RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1571RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1572RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1573RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1574RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1575RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1576RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1577RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1578RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1579RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1580RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1581RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1582RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1583RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1584RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1585GEN_VEXT_VX(vminu_vx_b, 1)
1586GEN_VEXT_VX(vminu_vx_h, 2)
1587GEN_VEXT_VX(vminu_vx_w, 4)
1588GEN_VEXT_VX(vminu_vx_d, 8)
1589GEN_VEXT_VX(vmin_vx_b, 1)
1590GEN_VEXT_VX(vmin_vx_h, 2)
1591GEN_VEXT_VX(vmin_vx_w, 4)
1592GEN_VEXT_VX(vmin_vx_d, 8)
1593GEN_VEXT_VX(vmaxu_vx_b, 1)
1594GEN_VEXT_VX(vmaxu_vx_h, 2)
1595GEN_VEXT_VX(vmaxu_vx_w, 4)
1596GEN_VEXT_VX(vmaxu_vx_d, 8)
1597GEN_VEXT_VX(vmax_vx_b, 1)
1598GEN_VEXT_VX(vmax_vx_h, 2)
1599GEN_VEXT_VX(vmax_vx_w, 4)
1600GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1601
1602/* Vector Single-Width Integer Multiply Instructions */
1603#define DO_MUL(N, M) (N * M)
1604RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1605RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1606RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1607RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1608GEN_VEXT_VV(vmul_vv_b, 1)
1609GEN_VEXT_VV(vmul_vv_h, 2)
1610GEN_VEXT_VV(vmul_vv_w, 4)
1611GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1612
1613static int8_t do_mulh_b(int8_t s2, int8_t s1)
1614{
1615 return (int16_t)s2 * (int16_t)s1 >> 8;
1616}
1617
1618static int16_t do_mulh_h(int16_t s2, int16_t s1)
1619{
1620 return (int32_t)s2 * (int32_t)s1 >> 16;
1621}
1622
1623static int32_t do_mulh_w(int32_t s2, int32_t s1)
1624{
1625 return (int64_t)s2 * (int64_t)s1 >> 32;
1626}
1627
1628static int64_t do_mulh_d(int64_t s2, int64_t s1)
1629{
1630 uint64_t hi_64, lo_64;
1631
1632 muls64(&lo_64, &hi_64, s1, s2);
1633 return hi_64;
1634}
1635
1636static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1637{
1638 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1639}
1640
1641static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1642{
1643 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1644}
1645
1646static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1647{
1648 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1649}
1650
1651static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1652{
1653 uint64_t hi_64, lo_64;
1654
1655 mulu64(&lo_64, &hi_64, s2, s1);
1656 return hi_64;
1657}
1658
1659static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1660{
1661 return (int16_t)s2 * (uint16_t)s1 >> 8;
1662}
1663
1664static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1665{
1666 return (int32_t)s2 * (uint32_t)s1 >> 16;
1667}
1668
1669static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1670{
1671 return (int64_t)s2 * (uint64_t)s1 >> 32;
1672}
1673
1674/*
1675 * Let A = signed operand,
1676 * B = unsigned operand
1677 * P = mulu64(A, B), unsigned product
1678 *
1679 * LET X = 2 ** 64 - A, 2's complement of A
1680 * SP = signed product
1681 * THEN
1682 * IF A < 0
1683 * SP = -X * B
1684 * = -(2 ** 64 - A) * B
1685 * = A * B - 2 ** 64 * B
1686 * = P - 2 ** 64 * B
1687 * ELSE
1688 * SP = P
1689 * THEN
1690 * HI_P -= (A < 0 ? B : 0)
1691 */
1692
1693static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1694{
1695 uint64_t hi_64, lo_64;
1696
1697 mulu64(&lo_64, &hi_64, s2, s1);
1698
1699 hi_64 -= s2 < 0 ? s1 : 0;
1700 return hi_64;
1701}
1702
1703RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1704RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1705RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1706RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1707RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1708RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1709RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1710RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1711RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1712RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1713RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1714RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1715GEN_VEXT_VV(vmulh_vv_b, 1)
1716GEN_VEXT_VV(vmulh_vv_h, 2)
1717GEN_VEXT_VV(vmulh_vv_w, 4)
1718GEN_VEXT_VV(vmulh_vv_d, 8)
1719GEN_VEXT_VV(vmulhu_vv_b, 1)
1720GEN_VEXT_VV(vmulhu_vv_h, 2)
1721GEN_VEXT_VV(vmulhu_vv_w, 4)
1722GEN_VEXT_VV(vmulhu_vv_d, 8)
1723GEN_VEXT_VV(vmulhsu_vv_b, 1)
1724GEN_VEXT_VV(vmulhsu_vv_h, 2)
1725GEN_VEXT_VV(vmulhsu_vv_w, 4)
1726GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1727
1728RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1729RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1730RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1731RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1732RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1733RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1734RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1735RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1736RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1737RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1738RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1739RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1740RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1741RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1742RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1743RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1744GEN_VEXT_VX(vmul_vx_b, 1)
1745GEN_VEXT_VX(vmul_vx_h, 2)
1746GEN_VEXT_VX(vmul_vx_w, 4)
1747GEN_VEXT_VX(vmul_vx_d, 8)
1748GEN_VEXT_VX(vmulh_vx_b, 1)
1749GEN_VEXT_VX(vmulh_vx_h, 2)
1750GEN_VEXT_VX(vmulh_vx_w, 4)
1751GEN_VEXT_VX(vmulh_vx_d, 8)
1752GEN_VEXT_VX(vmulhu_vx_b, 1)
1753GEN_VEXT_VX(vmulhu_vx_h, 2)
1754GEN_VEXT_VX(vmulhu_vx_w, 4)
1755GEN_VEXT_VX(vmulhu_vx_d, 8)
1756GEN_VEXT_VX(vmulhsu_vx_b, 1)
1757GEN_VEXT_VX(vmulhsu_vx_h, 2)
1758GEN_VEXT_VX(vmulhsu_vx_w, 4)
1759GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1760
1761/* Vector Integer Divide Instructions */
1762#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1763#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1764#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1765 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1766#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1767 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1768
1769RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1770RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1771RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1772RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1773RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1774RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1775RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1776RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1777RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1778RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1779RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1780RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1781RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1782RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1783RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1784RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1785GEN_VEXT_VV(vdivu_vv_b, 1)
1786GEN_VEXT_VV(vdivu_vv_h, 2)
1787GEN_VEXT_VV(vdivu_vv_w, 4)
1788GEN_VEXT_VV(vdivu_vv_d, 8)
1789GEN_VEXT_VV(vdiv_vv_b, 1)
1790GEN_VEXT_VV(vdiv_vv_h, 2)
1791GEN_VEXT_VV(vdiv_vv_w, 4)
1792GEN_VEXT_VV(vdiv_vv_d, 8)
1793GEN_VEXT_VV(vremu_vv_b, 1)
1794GEN_VEXT_VV(vremu_vv_h, 2)
1795GEN_VEXT_VV(vremu_vv_w, 4)
1796GEN_VEXT_VV(vremu_vv_d, 8)
1797GEN_VEXT_VV(vrem_vv_b, 1)
1798GEN_VEXT_VV(vrem_vv_h, 2)
1799GEN_VEXT_VV(vrem_vv_w, 4)
1800GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1801
1802RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1803RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1804RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1805RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1806RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1807RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1808RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1809RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1810RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1811RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1812RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1813RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1814RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1815RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1816RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1817RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1818GEN_VEXT_VX(vdivu_vx_b, 1)
1819GEN_VEXT_VX(vdivu_vx_h, 2)
1820GEN_VEXT_VX(vdivu_vx_w, 4)
1821GEN_VEXT_VX(vdivu_vx_d, 8)
1822GEN_VEXT_VX(vdiv_vx_b, 1)
1823GEN_VEXT_VX(vdiv_vx_h, 2)
1824GEN_VEXT_VX(vdiv_vx_w, 4)
1825GEN_VEXT_VX(vdiv_vx_d, 8)
1826GEN_VEXT_VX(vremu_vx_b, 1)
1827GEN_VEXT_VX(vremu_vx_h, 2)
1828GEN_VEXT_VX(vremu_vx_w, 4)
1829GEN_VEXT_VX(vremu_vx_d, 8)
1830GEN_VEXT_VX(vrem_vx_b, 1)
1831GEN_VEXT_VX(vrem_vx_h, 2)
1832GEN_VEXT_VX(vrem_vx_w, 4)
1833GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1834
1835/* Vector Widening Integer Multiply Instructions */
1836RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1837RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1838RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1839RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1840RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1841RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1842RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1843RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1844RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1845GEN_VEXT_VV(vwmul_vv_b, 2)
1846GEN_VEXT_VV(vwmul_vv_h, 4)
1847GEN_VEXT_VV(vwmul_vv_w, 8)
1848GEN_VEXT_VV(vwmulu_vv_b, 2)
1849GEN_VEXT_VV(vwmulu_vv_h, 4)
1850GEN_VEXT_VV(vwmulu_vv_w, 8)
1851GEN_VEXT_VV(vwmulsu_vv_b, 2)
1852GEN_VEXT_VV(vwmulsu_vv_h, 4)
1853GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1854
1855RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1856RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1857RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1858RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1859RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1860RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1861RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1862RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1863RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1864GEN_VEXT_VX(vwmul_vx_b, 2)
1865GEN_VEXT_VX(vwmul_vx_h, 4)
1866GEN_VEXT_VX(vwmul_vx_w, 8)
1867GEN_VEXT_VX(vwmulu_vx_b, 2)
1868GEN_VEXT_VX(vwmulu_vx_h, 4)
1869GEN_VEXT_VX(vwmulu_vx_w, 8)
1870GEN_VEXT_VX(vwmulsu_vx_b, 2)
1871GEN_VEXT_VX(vwmulsu_vx_h, 4)
1872GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1873
1874/* Vector Single-Width Integer Multiply-Add Instructions */
1875#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1876static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1877{ \
1878 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1879 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1880 TD d = *((TD *)vd + HD(i)); \
1881 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1882}
1883
1884#define DO_MACC(N, M, D) (M * N + D)
1885#define DO_NMSAC(N, M, D) (-(M * N) + D)
1886#define DO_MADD(N, M, D) (M * D + N)
1887#define DO_NMSUB(N, M, D) (-(M * D) + N)
1888RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1889RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1890RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1891RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1892RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1893RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1894RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1895RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1896RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1897RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1898RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1899RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1900RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1901RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1902RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1903RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1904GEN_VEXT_VV(vmacc_vv_b, 1)
1905GEN_VEXT_VV(vmacc_vv_h, 2)
1906GEN_VEXT_VV(vmacc_vv_w, 4)
1907GEN_VEXT_VV(vmacc_vv_d, 8)
1908GEN_VEXT_VV(vnmsac_vv_b, 1)
1909GEN_VEXT_VV(vnmsac_vv_h, 2)
1910GEN_VEXT_VV(vnmsac_vv_w, 4)
1911GEN_VEXT_VV(vnmsac_vv_d, 8)
1912GEN_VEXT_VV(vmadd_vv_b, 1)
1913GEN_VEXT_VV(vmadd_vv_h, 2)
1914GEN_VEXT_VV(vmadd_vv_w, 4)
1915GEN_VEXT_VV(vmadd_vv_d, 8)
1916GEN_VEXT_VV(vnmsub_vv_b, 1)
1917GEN_VEXT_VV(vnmsub_vv_h, 2)
1918GEN_VEXT_VV(vnmsub_vv_w, 4)
1919GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1920
1921#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1922static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1923{ \
1924 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1925 TD d = *((TD *)vd + HD(i)); \
1926 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1927}
1928
1929RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1930RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1931RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1932RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1933RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1934RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1935RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1936RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1937RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1938RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1939RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1940RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1941RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1942RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1943RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1944RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1945GEN_VEXT_VX(vmacc_vx_b, 1)
1946GEN_VEXT_VX(vmacc_vx_h, 2)
1947GEN_VEXT_VX(vmacc_vx_w, 4)
1948GEN_VEXT_VX(vmacc_vx_d, 8)
1949GEN_VEXT_VX(vnmsac_vx_b, 1)
1950GEN_VEXT_VX(vnmsac_vx_h, 2)
1951GEN_VEXT_VX(vnmsac_vx_w, 4)
1952GEN_VEXT_VX(vnmsac_vx_d, 8)
1953GEN_VEXT_VX(vmadd_vx_b, 1)
1954GEN_VEXT_VX(vmadd_vx_h, 2)
1955GEN_VEXT_VX(vmadd_vx_w, 4)
1956GEN_VEXT_VX(vmadd_vx_d, 8)
1957GEN_VEXT_VX(vnmsub_vx_b, 1)
1958GEN_VEXT_VX(vnmsub_vx_h, 2)
1959GEN_VEXT_VX(vnmsub_vx_w, 4)
1960GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1961
1962/* Vector Widening Integer Multiply-Add Instructions */
1963RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1964RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1965RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1966RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1967RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1968RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1969RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1970RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1971RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1972GEN_VEXT_VV(vwmaccu_vv_b, 2)
1973GEN_VEXT_VV(vwmaccu_vv_h, 4)
1974GEN_VEXT_VV(vwmaccu_vv_w, 8)
1975GEN_VEXT_VV(vwmacc_vv_b, 2)
1976GEN_VEXT_VV(vwmacc_vv_h, 4)
1977GEN_VEXT_VV(vwmacc_vv_w, 8)
1978GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1979GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1980GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1981
1982RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1983RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1984RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1985RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1986RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1987RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1988RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1989RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1990RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1991RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1992RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1993RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1994GEN_VEXT_VX(vwmaccu_vx_b, 2)
1995GEN_VEXT_VX(vwmaccu_vx_h, 4)
1996GEN_VEXT_VX(vwmaccu_vx_w, 8)
1997GEN_VEXT_VX(vwmacc_vx_b, 2)
1998GEN_VEXT_VX(vwmacc_vx_h, 4)
1999GEN_VEXT_VX(vwmacc_vx_w, 8)
2000GEN_VEXT_VX(vwmaccsu_vx_b, 2)
2001GEN_VEXT_VX(vwmaccsu_vx_h, 4)
2002GEN_VEXT_VX(vwmaccsu_vx_w, 8)
2003GEN_VEXT_VX(vwmaccus_vx_b, 2)
2004GEN_VEXT_VX(vwmaccus_vx_h, 4)
2005GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
2006
2007/* Vector Integer Merge and Move Instructions */
3479a814 2008#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
2009void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
2010 uint32_t desc) \
2011{ \
2012 uint32_t vl = env->vl; \
89a32de2 2013 uint32_t esz = sizeof(ETYPE); \
2014 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2015 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2016 uint32_t i; \
2017 \
f714361e 2018 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2019 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2020 *((ETYPE *)vd + H(i)) = s1; \
2021 } \
f714361e 2022 env->vstart = 0; \
89a32de2 2023 /* set tail elements to 1s */ \
2024 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2025}
2026
3479a814
FC
2027GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2028GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2029GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2030GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 2031
3479a814 2032#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2033void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2034 uint32_t desc) \
2035{ \
2036 uint32_t vl = env->vl; \
89a32de2 2037 uint32_t esz = sizeof(ETYPE); \
2038 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2039 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2040 uint32_t i; \
2041 \
f714361e 2042 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2043 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2044 } \
f714361e 2045 env->vstart = 0; \
89a32de2 2046 /* set tail elements to 1s */ \
2047 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2048}
2049
3479a814
FC
2050GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2051GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2052GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2053GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 2054
3479a814 2055#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
2056void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2057 CPURISCVState *env, uint32_t desc) \
2058{ \
f020a7a1 2059 uint32_t vl = env->vl; \
89a32de2 2060 uint32_t esz = sizeof(ETYPE); \
2061 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2062 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2063 uint32_t i; \
2064 \
f714361e 2065 for (i = env->vstart; i < vl; i++) { \
f9298de5 2066 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
2067 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2068 } \
f714361e 2069 env->vstart = 0; \
89a32de2 2070 /* set tail elements to 1s */ \
2071 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2072}
2073
3479a814
FC
2074GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2075GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2076GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2077GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 2078
3479a814 2079#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2080void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2081 void *vs2, CPURISCVState *env, uint32_t desc) \
2082{ \
f020a7a1 2083 uint32_t vl = env->vl; \
89a32de2 2084 uint32_t esz = sizeof(ETYPE); \
2085 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2086 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2087 uint32_t i; \
2088 \
f714361e 2089 for (i = env->vstart; i < vl; i++) { \
f020a7a1 2090 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 2091 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
2092 (ETYPE)(target_long)s1); \
2093 *((ETYPE *)vd + H(i)) = d; \
2094 } \
f714361e 2095 env->vstart = 0; \
89a32de2 2096 /* set tail elements to 1s */ \
2097 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2098}
2099
3479a814
FC
2100GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2101GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2102GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2103GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
2104
2105/*
2106 *** Vector Fixed-Point Arithmetic Instructions
2107 */
2108
2109/* Vector Single-Width Saturating Add and Subtract */
2110
2111/*
2112 * As fixed point instructions probably have round mode and saturation,
2113 * define common macros for fixed point here.
2114 */
2115typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2116 CPURISCVState *env, int vxrm);
2117
2118#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2119static inline void \
2120do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2121 CPURISCVState *env, int vxrm) \
2122{ \
2123 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2124 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2125 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2126}
2127
2128static inline void
2129vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2130 CPURISCVState *env,
f9298de5 2131 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2132 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2133{
f714361e 2134 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2135 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2136 /* set masked-off elements to 1s */
2137 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2138 continue;
2139 }
2140 fn(vd, vs1, vs2, i, env, vxrm);
2141 }
f714361e 2142 env->vstart = 0;
eb2650e3
LZ
2143}
2144
2145static inline void
2146vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2147 CPURISCVState *env,
8a085fb2 2148 uint32_t desc,
09106eed 2149 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 2150{
eb2650e3
LZ
2151 uint32_t vm = vext_vm(desc);
2152 uint32_t vl = env->vl;
09106eed 2153 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2154 uint32_t vta = vext_vta(desc);
72e17a9f 2155 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2156
2157 switch (env->vxrm) {
2158 case 0: /* rnu */
2159 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2160 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2161 break;
2162 case 1: /* rne */
2163 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2164 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2165 break;
2166 case 2: /* rdn */
2167 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2168 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2169 break;
2170 default: /* rod */
2171 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2172 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2173 break;
2174 }
09106eed 2175 /* set tail elements to 1s */
2176 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2177}
2178
2179/* generate helpers for fixed point instructions with OPIVV format */
09106eed 2180#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
2181void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2182 CPURISCVState *env, uint32_t desc) \
2183{ \
8a085fb2 2184 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 2185 do_##NAME, ESZ); \
eb2650e3
LZ
2186}
2187
2188static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2189{
2190 uint8_t res = a + b;
2191 if (res < a) {
2192 res = UINT8_MAX;
2193 env->vxsat = 0x1;
2194 }
2195 return res;
2196}
2197
2198static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2199 uint16_t b)
2200{
2201 uint16_t res = a + b;
2202 if (res < a) {
2203 res = UINT16_MAX;
2204 env->vxsat = 0x1;
2205 }
2206 return res;
2207}
2208
2209static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2210 uint32_t b)
2211{
2212 uint32_t res = a + b;
2213 if (res < a) {
2214 res = UINT32_MAX;
2215 env->vxsat = 0x1;
2216 }
2217 return res;
2218}
2219
2220static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2221 uint64_t b)
2222{
2223 uint64_t res = a + b;
2224 if (res < a) {
2225 res = UINT64_MAX;
2226 env->vxsat = 0x1;
2227 }
2228 return res;
2229}
2230
2231RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2232RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2233RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2234RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2235GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2236GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2237GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2238GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2239
2240typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2241 CPURISCVState *env, int vxrm);
2242
2243#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2244static inline void \
2245do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2246 CPURISCVState *env, int vxrm) \
2247{ \
2248 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2249 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2250}
2251
2252static inline void
2253vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2254 CPURISCVState *env,
f9298de5 2255 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2256 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2257{
f714361e 2258 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2259 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2260 /* set masked-off elements to 1s */
2261 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2262 continue;
2263 }
2264 fn(vd, s1, vs2, i, env, vxrm);
2265 }
f714361e 2266 env->vstart = 0;
eb2650e3
LZ
2267}
2268
2269static inline void
2270vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2271 CPURISCVState *env,
8a085fb2 2272 uint32_t desc,
09106eed 2273 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2274{
eb2650e3
LZ
2275 uint32_t vm = vext_vm(desc);
2276 uint32_t vl = env->vl;
09106eed 2277 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2278 uint32_t vta = vext_vta(desc);
72e17a9f 2279 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2280
2281 switch (env->vxrm) {
2282 case 0: /* rnu */
2283 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2284 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2285 break;
2286 case 1: /* rne */
2287 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2288 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2289 break;
2290 case 2: /* rdn */
2291 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2292 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2293 break;
2294 default: /* rod */
2295 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2296 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2297 break;
2298 }
09106eed 2299 /* set tail elements to 1s */
2300 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2301}
2302
2303/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2304#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3
LZ
2305void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2306 void *vs2, CPURISCVState *env, uint32_t desc) \
2307{ \
8a085fb2 2308 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2309 do_##NAME, ESZ); \
eb2650e3
LZ
2310}
2311
2312RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2313RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2314RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2315RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2316GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2317GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2318GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2319GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2320
2321static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2322{
2323 int8_t res = a + b;
2324 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2325 res = a > 0 ? INT8_MAX : INT8_MIN;
2326 env->vxsat = 0x1;
2327 }
2328 return res;
2329}
2330
2331static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2332{
2333 int16_t res = a + b;
2334 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2335 res = a > 0 ? INT16_MAX : INT16_MIN;
2336 env->vxsat = 0x1;
2337 }
2338 return res;
2339}
2340
2341static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2342{
2343 int32_t res = a + b;
2344 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2345 res = a > 0 ? INT32_MAX : INT32_MIN;
2346 env->vxsat = 0x1;
2347 }
2348 return res;
2349}
2350
2351static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2352{
2353 int64_t res = a + b;
2354 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2355 res = a > 0 ? INT64_MAX : INT64_MIN;
2356 env->vxsat = 0x1;
2357 }
2358 return res;
2359}
2360
2361RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2362RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2363RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2364RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2365GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2366GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2367GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2368GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2369
2370RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2371RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2372RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2373RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2374GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2375GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2376GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2377GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3
LZ
2378
2379static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2380{
2381 uint8_t res = a - b;
2382 if (res > a) {
2383 res = 0;
2384 env->vxsat = 0x1;
2385 }
2386 return res;
2387}
2388
2389static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2390 uint16_t b)
2391{
2392 uint16_t res = a - b;
2393 if (res > a) {
2394 res = 0;
2395 env->vxsat = 0x1;
2396 }
2397 return res;
2398}
2399
2400static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2401 uint32_t b)
2402{
2403 uint32_t res = a - b;
2404 if (res > a) {
2405 res = 0;
2406 env->vxsat = 0x1;
2407 }
2408 return res;
2409}
2410
2411static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2412 uint64_t b)
2413{
2414 uint64_t res = a - b;
2415 if (res > a) {
2416 res = 0;
2417 env->vxsat = 0x1;
2418 }
2419 return res;
2420}
2421
2422RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2423RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2424RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2425RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2426GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2427GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2428GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2429GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2430
2431RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2432RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2433RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2434RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2435GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2436GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2437GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2438GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2439
2440static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2441{
2442 int8_t res = a - b;
2443 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2444 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2445 env->vxsat = 0x1;
2446 }
2447 return res;
2448}
2449
2450static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2451{
2452 int16_t res = a - b;
2453 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2454 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2455 env->vxsat = 0x1;
2456 }
2457 return res;
2458}
2459
2460static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2461{
2462 int32_t res = a - b;
2463 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2464 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2465 env->vxsat = 0x1;
2466 }
2467 return res;
2468}
2469
2470static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2471{
2472 int64_t res = a - b;
2473 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2474 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2475 env->vxsat = 0x1;
2476 }
2477 return res;
2478}
2479
2480RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2481RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2482RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2483RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2484GEN_VEXT_VV_RM(vssub_vv_b, 1)
2485GEN_VEXT_VV_RM(vssub_vv_h, 2)
2486GEN_VEXT_VV_RM(vssub_vv_w, 4)
2487GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2488
2489RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2490RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2491RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2492RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2493GEN_VEXT_VX_RM(vssub_vx_b, 1)
2494GEN_VEXT_VX_RM(vssub_vx_h, 2)
2495GEN_VEXT_VX_RM(vssub_vx_w, 4)
2496GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2497
2498/* Vector Single-Width Averaging Add and Subtract */
2499static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2500{
2501 uint8_t d = extract64(v, shift, 1);
2502 uint8_t d1;
2503 uint64_t D1, D2;
2504
2505 if (shift == 0 || shift > 64) {
2506 return 0;
2507 }
2508
2509 d1 = extract64(v, shift - 1, 1);
2510 D1 = extract64(v, 0, shift);
2511 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2512 return d1;
2513 } else if (vxrm == 1) { /* round-to-nearest-even */
2514 if (shift > 1) {
2515 D2 = extract64(v, 0, shift - 1);
2516 return d1 & ((D2 != 0) | d);
2517 } else {
2518 return d1 & d;
2519 }
2520 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2521 return !d & (D1 != 0);
2522 }
2523 return 0; /* round-down (truncate) */
2524}
2525
2526static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2527{
2528 int64_t res = (int64_t)a + b;
2529 uint8_t round = get_round(vxrm, res, 1);
2530
2531 return (res >> 1) + round;
2532}
2533
2534static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2535{
2536 int64_t res = a + b;
2537 uint8_t round = get_round(vxrm, res, 1);
2538 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2539
2540 /* With signed overflow, bit 64 is inverse of bit 63. */
2541 return ((res >> 1) ^ over) + round;
2542}
2543
2544RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2545RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2546RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2547RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2548GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2549GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2550GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2551GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2552
2553RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2554RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2555RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2556RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2557GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2558GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2559GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2560GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2561
8b99a110
FC
2562static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2563 uint32_t a, uint32_t b)
2564{
2565 uint64_t res = (uint64_t)a + b;
2566 uint8_t round = get_round(vxrm, res, 1);
2567
2568 return (res >> 1) + round;
2569}
2570
2571static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2572 uint64_t a, uint64_t b)
2573{
2574 uint64_t res = a + b;
2575 uint8_t round = get_round(vxrm, res, 1);
2576 uint64_t over = (uint64_t)(res < a) << 63;
2577
2578 return ((res >> 1) | over) + round;
2579}
2580
2581RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2582RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2583RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2584RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2585GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2586GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2587GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2588GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2589
2590RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2591RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2592RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2593RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2594GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2595GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2596GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2597GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2598
b7aee481
LZ
2599static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2600{
2601 int64_t res = (int64_t)a - b;
2602 uint8_t round = get_round(vxrm, res, 1);
2603
2604 return (res >> 1) + round;
2605}
2606
2607static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2608{
2609 int64_t res = (int64_t)a - b;
2610 uint8_t round = get_round(vxrm, res, 1);
2611 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2612
2613 /* With signed overflow, bit 64 is inverse of bit 63. */
2614 return ((res >> 1) ^ over) + round;
2615}
2616
2617RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2618RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2619RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2620RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2621GEN_VEXT_VV_RM(vasub_vv_b, 1)
2622GEN_VEXT_VV_RM(vasub_vv_h, 2)
2623GEN_VEXT_VV_RM(vasub_vv_w, 4)
2624GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2625
2626RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2627RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2628RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2629RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2630GEN_VEXT_VX_RM(vasub_vx_b, 1)
2631GEN_VEXT_VX_RM(vasub_vx_h, 2)
2632GEN_VEXT_VX_RM(vasub_vx_w, 4)
2633GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2634
8b99a110
FC
2635static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2636 uint32_t a, uint32_t b)
2637{
2638 int64_t res = (int64_t)a - b;
2639 uint8_t round = get_round(vxrm, res, 1);
2640
2641 return (res >> 1) + round;
2642}
2643
2644static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2645 uint64_t a, uint64_t b)
2646{
2647 uint64_t res = (uint64_t)a - b;
2648 uint8_t round = get_round(vxrm, res, 1);
2649 uint64_t over = (uint64_t)(res > a) << 63;
2650
2651 return ((res >> 1) | over) + round;
2652}
2653
2654RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2655RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2656RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2657RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2658GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2659GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2660GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2661GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2662
2663RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2664RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2665RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2666RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2667GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2668GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2669GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2670GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2671
9f0ff9e5
LZ
2672/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2673static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2674{
2675 uint8_t round;
2676 int16_t res;
2677
2678 res = (int16_t)a * (int16_t)b;
2679 round = get_round(vxrm, res, 7);
2680 res = (res >> 7) + round;
2681
2682 if (res > INT8_MAX) {
2683 env->vxsat = 0x1;
2684 return INT8_MAX;
2685 } else if (res < INT8_MIN) {
2686 env->vxsat = 0x1;
2687 return INT8_MIN;
2688 } else {
2689 return res;
2690 }
2691}
2692
2693static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2694{
2695 uint8_t round;
2696 int32_t res;
2697
2698 res = (int32_t)a * (int32_t)b;
2699 round = get_round(vxrm, res, 15);
2700 res = (res >> 15) + round;
2701
2702 if (res > INT16_MAX) {
2703 env->vxsat = 0x1;
2704 return INT16_MAX;
2705 } else if (res < INT16_MIN) {
2706 env->vxsat = 0x1;
2707 return INT16_MIN;
2708 } else {
2709 return res;
2710 }
2711}
2712
2713static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2714{
2715 uint8_t round;
2716 int64_t res;
2717
2718 res = (int64_t)a * (int64_t)b;
2719 round = get_round(vxrm, res, 31);
2720 res = (res >> 31) + round;
2721
2722 if (res > INT32_MAX) {
2723 env->vxsat = 0x1;
2724 return INT32_MAX;
2725 } else if (res < INT32_MIN) {
2726 env->vxsat = 0x1;
2727 return INT32_MIN;
2728 } else {
2729 return res;
2730 }
2731}
2732
2733static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2734{
2735 uint8_t round;
2736 uint64_t hi_64, lo_64;
2737 int64_t res;
2738
2739 if (a == INT64_MIN && b == INT64_MIN) {
2740 env->vxsat = 1;
2741 return INT64_MAX;
2742 }
2743
2744 muls64(&lo_64, &hi_64, a, b);
2745 round = get_round(vxrm, lo_64, 63);
2746 /*
2747 * Cannot overflow, as there are always
2748 * 2 sign bits after multiply.
2749 */
2750 res = (hi_64 << 1) | (lo_64 >> 63);
2751 if (round) {
2752 if (res == INT64_MAX) {
2753 env->vxsat = 1;
2754 } else {
2755 res += 1;
2756 }
2757 }
2758 return res;
2759}
2760
2761RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2762RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2763RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2764RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2765GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2766GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2767GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2768GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2769
2770RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2771RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2772RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2773RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2774GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2775GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2776GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2777GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2778
04a61406
LZ
2779/* Vector Single-Width Scaling Shift Instructions */
2780static inline uint8_t
2781vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2782{
2783 uint8_t round, shift = b & 0x7;
2784 uint8_t res;
2785
2786 round = get_round(vxrm, a, shift);
2787 res = (a >> shift) + round;
2788 return res;
2789}
2790static inline uint16_t
2791vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2792{
2793 uint8_t round, shift = b & 0xf;
2794 uint16_t res;
2795
2796 round = get_round(vxrm, a, shift);
2797 res = (a >> shift) + round;
2798 return res;
2799}
2800static inline uint32_t
2801vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2802{
2803 uint8_t round, shift = b & 0x1f;
2804 uint32_t res;
2805
2806 round = get_round(vxrm, a, shift);
2807 res = (a >> shift) + round;
2808 return res;
2809}
2810static inline uint64_t
2811vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2812{
2813 uint8_t round, shift = b & 0x3f;
2814 uint64_t res;
2815
2816 round = get_round(vxrm, a, shift);
2817 res = (a >> shift) + round;
2818 return res;
2819}
2820RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2821RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2822RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2823RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2824GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2825GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2826GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2827GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2828
2829RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2830RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2831RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2832RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2833GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2834GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2835GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2836GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2837
2838static inline int8_t
2839vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2840{
2841 uint8_t round, shift = b & 0x7;
2842 int8_t res;
2843
2844 round = get_round(vxrm, a, shift);
2845 res = (a >> shift) + round;
2846 return res;
2847}
2848static inline int16_t
2849vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2850{
2851 uint8_t round, shift = b & 0xf;
2852 int16_t res;
2853
2854 round = get_round(vxrm, a, shift);
2855 res = (a >> shift) + round;
2856 return res;
2857}
2858static inline int32_t
2859vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2860{
2861 uint8_t round, shift = b & 0x1f;
2862 int32_t res;
2863
2864 round = get_round(vxrm, a, shift);
2865 res = (a >> shift) + round;
2866 return res;
2867}
2868static inline int64_t
2869vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2870{
2871 uint8_t round, shift = b & 0x3f;
2872 int64_t res;
2873
2874 round = get_round(vxrm, a, shift);
2875 res = (a >> shift) + round;
2876 return res;
2877}
9ff3d287 2878
04a61406
LZ
2879RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2880RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2881RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2882RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2883GEN_VEXT_VV_RM(vssra_vv_b, 1)
2884GEN_VEXT_VV_RM(vssra_vv_h, 2)
2885GEN_VEXT_VV_RM(vssra_vv_w, 4)
2886GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2887
2888RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2889RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2890RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2891RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2892GEN_VEXT_VX_RM(vssra_vx_b, 1)
2893GEN_VEXT_VX_RM(vssra_vx_h, 2)
2894GEN_VEXT_VX_RM(vssra_vx_w, 4)
2895GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2896
2897/* Vector Narrowing Fixed-Point Clip Instructions */
2898static inline int8_t
2899vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2900{
2901 uint8_t round, shift = b & 0xf;
2902 int16_t res;
2903
2904 round = get_round(vxrm, a, shift);
2905 res = (a >> shift) + round;
2906 if (res > INT8_MAX) {
2907 env->vxsat = 0x1;
2908 return INT8_MAX;
2909 } else if (res < INT8_MIN) {
2910 env->vxsat = 0x1;
2911 return INT8_MIN;
2912 } else {
2913 return res;
2914 }
2915}
2916
2917static inline int16_t
2918vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2919{
2920 uint8_t round, shift = b & 0x1f;
2921 int32_t res;
2922
2923 round = get_round(vxrm, a, shift);
2924 res = (a >> shift) + round;
2925 if (res > INT16_MAX) {
2926 env->vxsat = 0x1;
2927 return INT16_MAX;
2928 } else if (res < INT16_MIN) {
2929 env->vxsat = 0x1;
2930 return INT16_MIN;
2931 } else {
2932 return res;
2933 }
2934}
2935
2936static inline int32_t
2937vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2938{
2939 uint8_t round, shift = b & 0x3f;
2940 int64_t res;
2941
2942 round = get_round(vxrm, a, shift);
2943 res = (a >> shift) + round;
2944 if (res > INT32_MAX) {
2945 env->vxsat = 0x1;
2946 return INT32_MAX;
2947 } else if (res < INT32_MIN) {
2948 env->vxsat = 0x1;
2949 return INT32_MIN;
2950 } else {
2951 return res;
2952 }
2953}
2954
a70b3a73
FC
2955RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2956RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2957RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2958GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2959GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2960GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2961
2962RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2963RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2964RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2965GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2966GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2967GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2968
2969static inline uint8_t
2970vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2971{
2972 uint8_t round, shift = b & 0xf;
2973 uint16_t res;
2974
2975 round = get_round(vxrm, a, shift);
2976 res = (a >> shift) + round;
2977 if (res > UINT8_MAX) {
2978 env->vxsat = 0x1;
2979 return UINT8_MAX;
2980 } else {
2981 return res;
2982 }
2983}
2984
2985static inline uint16_t
2986vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2987{
2988 uint8_t round, shift = b & 0x1f;
2989 uint32_t res;
2990
2991 round = get_round(vxrm, a, shift);
2992 res = (a >> shift) + round;
2993 if (res > UINT16_MAX) {
2994 env->vxsat = 0x1;
2995 return UINT16_MAX;
2996 } else {
2997 return res;
2998 }
2999}
3000
3001static inline uint32_t
3002vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3003{
3004 uint8_t round, shift = b & 0x3f;
a70b3a73 3005 uint64_t res;
9ff3d287
LZ
3006
3007 round = get_round(vxrm, a, shift);
3008 res = (a >> shift) + round;
3009 if (res > UINT32_MAX) {
3010 env->vxsat = 0x1;
3011 return UINT32_MAX;
3012 } else {
3013 return res;
3014 }
3015}
3016
a70b3a73
FC
3017RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3018RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3019RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 3020GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3021GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3022GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 3023
a70b3a73
FC
3024RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3025RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3026RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 3027GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3028GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3029GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
3030
3031/*
3032 *** Vector Float Point Arithmetic Instructions
3033 */
3034/* Vector Single-Width Floating-Point Add/Subtract Instructions */
3035#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3036static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3037 CPURISCVState *env) \
3038{ \
3039 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3040 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3041 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3042}
3043
5eacf7d8 3044#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
3045void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3046 void *vs2, CPURISCVState *env, \
3047 uint32_t desc) \
3048{ \
ce2a0343
LZ
3049 uint32_t vm = vext_vm(desc); \
3050 uint32_t vl = env->vl; \
5eacf7d8 3051 uint32_t total_elems = \
3052 vext_get_total_elems(env, desc, ESZ); \
3053 uint32_t vta = vext_vta(desc); \
5b448f44 3054 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
3055 uint32_t i; \
3056 \
f714361e 3057 for (i = env->vstart; i < vl; i++) { \
f9298de5 3058 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3059 /* set masked-off elements to 1s */ \
3060 vext_set_elems_1s(vd, vma, i * ESZ, \
3061 (i + 1) * ESZ); \
ce2a0343
LZ
3062 continue; \
3063 } \
3064 do_##NAME(vd, vs1, vs2, i, env); \
3065 } \
f714361e 3066 env->vstart = 0; \
5eacf7d8 3067 /* set tail elements to 1s */ \
3068 vext_set_elems_1s(vd, vta, vl * ESZ, \
3069 total_elems * ESZ); \
ce2a0343
LZ
3070}
3071
3072RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3073RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3074RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 3075GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3076GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3077GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
3078
3079#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3080static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3081 CPURISCVState *env) \
3082{ \
3083 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3084 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3085}
3086
5eacf7d8 3087#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
3088void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3089 void *vs2, CPURISCVState *env, \
3090 uint32_t desc) \
3091{ \
ce2a0343
LZ
3092 uint32_t vm = vext_vm(desc); \
3093 uint32_t vl = env->vl; \
5eacf7d8 3094 uint32_t total_elems = \
3095 vext_get_total_elems(env, desc, ESZ); \
3096 uint32_t vta = vext_vta(desc); \
5b448f44 3097 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
3098 uint32_t i; \
3099 \
f714361e 3100 for (i = env->vstart; i < vl; i++) { \
f9298de5 3101 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3102 /* set masked-off elements to 1s */ \
3103 vext_set_elems_1s(vd, vma, i * ESZ, \
3104 (i + 1) * ESZ); \
ce2a0343
LZ
3105 continue; \
3106 } \
3107 do_##NAME(vd, s1, vs2, i, env); \
3108 } \
f714361e 3109 env->vstart = 0; \
5eacf7d8 3110 /* set tail elements to 1s */ \
3111 vext_set_elems_1s(vd, vta, vl * ESZ, \
3112 total_elems * ESZ); \
ce2a0343
LZ
3113}
3114
3115RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3116RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3117RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 3118GEN_VEXT_VF(vfadd_vf_h, 2)
3119GEN_VEXT_VF(vfadd_vf_w, 4)
3120GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
3121
3122RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3123RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3124RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 3125GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3126GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3127GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
3128RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3129RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3130RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 3131GEN_VEXT_VF(vfsub_vf_h, 2)
3132GEN_VEXT_VF(vfsub_vf_w, 4)
3133GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
3134
3135static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3136{
3137 return float16_sub(b, a, s);
3138}
3139
3140static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3141{
3142 return float32_sub(b, a, s);
3143}
3144
3145static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3146{
3147 return float64_sub(b, a, s);
3148}
3149
3150RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3151RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3152RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 3153GEN_VEXT_VF(vfrsub_vf_h, 2)
3154GEN_VEXT_VF(vfrsub_vf_w, 4)
3155GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
3156
3157/* Vector Widening Floating-Point Add/Subtract Instructions */
3158static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3159{
3160 return float32_add(float16_to_float32(a, true, s),
3161 float16_to_float32(b, true, s), s);
3162}
3163
3164static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3165{
3166 return float64_add(float32_to_float64(a, s),
3167 float32_to_float64(b, s), s);
3168
3169}
3170
3171RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3172RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 3173GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3174GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
3175RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3176RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 3177GEN_VEXT_VF(vfwadd_vf_h, 4)
3178GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
3179
3180static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3181{
3182 return float32_sub(float16_to_float32(a, true, s),
3183 float16_to_float32(b, true, s), s);
3184}
3185
3186static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3187{
3188 return float64_sub(float32_to_float64(a, s),
3189 float32_to_float64(b, s), s);
3190
3191}
3192
3193RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3194RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 3195GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3196GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
3197RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3198RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 3199GEN_VEXT_VF(vfwsub_vf_h, 4)
3200GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
3201
3202static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3203{
3204 return float32_add(a, float16_to_float32(b, true, s), s);
3205}
3206
3207static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3208{
3209 return float64_add(a, float32_to_float64(b, s), s);
3210}
3211
3212RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3213RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3214GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3215GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3216RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3217RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3218GEN_VEXT_VF(vfwadd_wf_h, 4)
3219GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3220
3221static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3222{
3223 return float32_sub(a, float16_to_float32(b, true, s), s);
3224}
3225
3226static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3227{
3228 return float64_sub(a, float32_to_float64(b, s), s);
3229}
3230
3231RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3232RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3233GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3234GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3235RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3236RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3237GEN_VEXT_VF(vfwsub_wf_h, 4)
3238GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3239
3240/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3241RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3242RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3243RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3244GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3245GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3246GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3247RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3248RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3249RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3250GEN_VEXT_VF(vfmul_vf_h, 2)
3251GEN_VEXT_VF(vfmul_vf_w, 4)
3252GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3253
3254RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3255RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3256RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3257GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3258GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3259GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3260RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3261RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3262RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3263GEN_VEXT_VF(vfdiv_vf_h, 2)
3264GEN_VEXT_VF(vfdiv_vf_w, 4)
3265GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3266
3267static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3268{
3269 return float16_div(b, a, s);
3270}
3271
3272static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3273{
3274 return float32_div(b, a, s);
3275}
3276
3277static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3278{
3279 return float64_div(b, a, s);
3280}
3281
3282RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3283RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3284RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3285GEN_VEXT_VF(vfrdiv_vf_h, 2)
3286GEN_VEXT_VF(vfrdiv_vf_w, 4)
3287GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3288
3289/* Vector Widening Floating-Point Multiply */
3290static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3291{
3292 return float32_mul(float16_to_float32(a, true, s),
3293 float16_to_float32(b, true, s), s);
3294}
3295
3296static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3297{
3298 return float64_mul(float32_to_float64(a, s),
3299 float32_to_float64(b, s), s);
3300
3301}
3302RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3303RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3304GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3305GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3306RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3307RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3308GEN_VEXT_VF(vfwmul_vf_h, 4)
3309GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3310
3311/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3312#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3313static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3314 CPURISCVState *env) \
3315{ \
3316 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3317 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3318 TD d = *((TD *)vd + HD(i)); \
3319 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3320}
3321
3322static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3323{
3324 return float16_muladd(a, b, d, 0, s);
3325}
3326
3327static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3328{
3329 return float32_muladd(a, b, d, 0, s);
3330}
3331
3332static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3333{
3334 return float64_muladd(a, b, d, 0, s);
3335}
3336
3337RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3338RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3339RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3340GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3341GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3342GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3343
3344#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3345static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3346 CPURISCVState *env) \
3347{ \
3348 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3349 TD d = *((TD *)vd + HD(i)); \
3350 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3351}
3352
3353RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3354RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3355RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3356GEN_VEXT_VF(vfmacc_vf_h, 2)
3357GEN_VEXT_VF(vfmacc_vf_w, 4)
3358GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3359
3360static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3361{
3362 return float16_muladd(a, b, d,
3363 float_muladd_negate_c | float_muladd_negate_product, s);
3364}
3365
3366static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3367{
3368 return float32_muladd(a, b, d,
3369 float_muladd_negate_c | float_muladd_negate_product, s);
3370}
3371
3372static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3373{
3374 return float64_muladd(a, b, d,
3375 float_muladd_negate_c | float_muladd_negate_product, s);
3376}
3377
3378RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3379RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3380RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3381GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3382GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3383GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3384RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3385RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3386RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3387GEN_VEXT_VF(vfnmacc_vf_h, 2)
3388GEN_VEXT_VF(vfnmacc_vf_w, 4)
3389GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3390
3391static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3392{
3393 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3394}
3395
3396static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3397{
3398 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3399}
3400
3401static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3402{
3403 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3404}
3405
3406RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3407RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3408RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3409GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3410GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3411GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3412RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3413RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3414RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3415GEN_VEXT_VF(vfmsac_vf_h, 2)
3416GEN_VEXT_VF(vfmsac_vf_w, 4)
3417GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3418
3419static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3420{
3421 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3422}
3423
3424static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3425{
3426 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3427}
3428
3429static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3430{
3431 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3432}
3433
3434RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3435RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3436RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3437GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3438GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3439GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3440RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3441RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3442RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3443GEN_VEXT_VF(vfnmsac_vf_h, 2)
3444GEN_VEXT_VF(vfnmsac_vf_w, 4)
3445GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3446
3447static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3448{
3449 return float16_muladd(d, b, a, 0, s);
3450}
3451
3452static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3453{
3454 return float32_muladd(d, b, a, 0, s);
3455}
3456
3457static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3458{
3459 return float64_muladd(d, b, a, 0, s);
3460}
3461
3462RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3463RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3464RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3465GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3466GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3467GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3468RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3469RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3470RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3471GEN_VEXT_VF(vfmadd_vf_h, 2)
3472GEN_VEXT_VF(vfmadd_vf_w, 4)
3473GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3474
3475static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3476{
3477 return float16_muladd(d, b, a,
3478 float_muladd_negate_c | float_muladd_negate_product, s);
3479}
3480
3481static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3482{
3483 return float32_muladd(d, b, a,
3484 float_muladd_negate_c | float_muladd_negate_product, s);
3485}
3486
3487static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3488{
3489 return float64_muladd(d, b, a,
3490 float_muladd_negate_c | float_muladd_negate_product, s);
3491}
3492
3493RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3494RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3495RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3496GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3497GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3498GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3499RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3500RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3501RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3502GEN_VEXT_VF(vfnmadd_vf_h, 2)
3503GEN_VEXT_VF(vfnmadd_vf_w, 4)
3504GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3505
3506static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3507{
3508 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3509}
3510
3511static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3512{
3513 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3514}
3515
3516static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3517{
3518 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3519}
3520
3521RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3522RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3523RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3524GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3525GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3526GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3527RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3528RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3529RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3530GEN_VEXT_VF(vfmsub_vf_h, 2)
3531GEN_VEXT_VF(vfmsub_vf_w, 4)
3532GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3533
3534static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3535{
3536 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3537}
3538
3539static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3540{
3541 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3542}
3543
3544static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3545{
3546 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3547}
3548
3549RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3550RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3551RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3552GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3553GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3554GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3555RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3556RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3557RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3558GEN_VEXT_VF(vfnmsub_vf_h, 2)
3559GEN_VEXT_VF(vfnmsub_vf_w, 4)
3560GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3561
3562/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3563static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3564{
3565 return float32_muladd(float16_to_float32(a, true, s),
3566 float16_to_float32(b, true, s), d, 0, s);
3567}
3568
3569static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3570{
3571 return float64_muladd(float32_to_float64(a, s),
3572 float32_to_float64(b, s), d, 0, s);
3573}
3574
3575RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3576RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3577GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3578GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3579RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3580RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3581GEN_VEXT_VF(vfwmacc_vf_h, 4)
3582GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959
LZ
3583
3584static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3585{
3586 return float32_muladd(float16_to_float32(a, true, s),
3587 float16_to_float32(b, true, s), d,
3588 float_muladd_negate_c | float_muladd_negate_product, s);
3589}
3590
3591static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3592{
3593 return float64_muladd(float32_to_float64(a, s),
3594 float32_to_float64(b, s), d,
3595 float_muladd_negate_c | float_muladd_negate_product, s);
3596}
3597
3598RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3599RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3600GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3601GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3602RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3603RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3604GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3605GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3606
3607static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3608{
3609 return float32_muladd(float16_to_float32(a, true, s),
3610 float16_to_float32(b, true, s), d,
3611 float_muladd_negate_c, s);
3612}
3613
3614static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3615{
3616 return float64_muladd(float32_to_float64(a, s),
3617 float32_to_float64(b, s), d,
3618 float_muladd_negate_c, s);
3619}
3620
3621RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3622RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3623GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3624GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3625RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3626RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3627GEN_VEXT_VF(vfwmsac_vf_h, 4)
3628GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3629
3630static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3631{
3632 return float32_muladd(float16_to_float32(a, true, s),
3633 float16_to_float32(b, true, s), d,
3634 float_muladd_negate_product, s);
3635}
3636
3637static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3638{
3639 return float64_muladd(float32_to_float64(a, s),
3640 float32_to_float64(b, s), d,
3641 float_muladd_negate_product, s);
3642}
3643
3644RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3645RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3646GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3647GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3648RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3649RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3650GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3651GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3652
3653/* Vector Floating-Point Square-Root Instruction */
3654/* (TD, T2, TX2) */
3655#define OP_UU_H uint16_t, uint16_t, uint16_t
3656#define OP_UU_W uint32_t, uint32_t, uint32_t
3657#define OP_UU_D uint64_t, uint64_t, uint64_t
3658
3659#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3660static void do_##NAME(void *vd, void *vs2, int i, \
3661 CPURISCVState *env) \
3662{ \
3663 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3664 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3665}
3666
5eacf7d8 3667#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72
LZ
3668void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3669 CPURISCVState *env, uint32_t desc) \
3670{ \
d9e4ce72
LZ
3671 uint32_t vm = vext_vm(desc); \
3672 uint32_t vl = env->vl; \
5eacf7d8 3673 uint32_t total_elems = \
3674 vext_get_total_elems(env, desc, ESZ); \
3675 uint32_t vta = vext_vta(desc); \
5b448f44 3676 uint32_t vma = vext_vma(desc); \
d9e4ce72
LZ
3677 uint32_t i; \
3678 \
3679 if (vl == 0) { \
3680 return; \
3681 } \
f714361e 3682 for (i = env->vstart; i < vl; i++) { \
f9298de5 3683 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3684 /* set masked-off elements to 1s */ \
3685 vext_set_elems_1s(vd, vma, i * ESZ, \
3686 (i + 1) * ESZ); \
d9e4ce72
LZ
3687 continue; \
3688 } \
3689 do_##NAME(vd, vs2, i, env); \
3690 } \
f714361e 3691 env->vstart = 0; \
5eacf7d8 3692 vext_set_elems_1s(vd, vta, vl * ESZ, \
3693 total_elems * ESZ); \
d9e4ce72
LZ
3694}
3695
3696RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3697RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3698RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3699GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3700GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3701GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3702
e848a1e5
FC
3703/*
3704 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3705 *
3706 * Adapted from riscv-v-spec recip.c:
3707 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3708 */
3709static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3710{
3711 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3712 uint64_t exp = extract64(f, frac_size, exp_size);
3713 uint64_t frac = extract64(f, 0, frac_size);
3714
3715 const uint8_t lookup_table[] = {
3716 52, 51, 50, 48, 47, 46, 44, 43,
3717 42, 41, 40, 39, 38, 36, 35, 34,
3718 33, 32, 31, 30, 30, 29, 28, 27,
3719 26, 25, 24, 23, 23, 22, 21, 20,
3720 19, 19, 18, 17, 16, 16, 15, 14,
3721 14, 13, 12, 12, 11, 10, 10, 9,
3722 9, 8, 7, 7, 6, 6, 5, 4,
3723 4, 3, 3, 2, 2, 1, 1, 0,
3724 127, 125, 123, 121, 119, 118, 116, 114,
3725 113, 111, 109, 108, 106, 105, 103, 102,
3726 100, 99, 97, 96, 95, 93, 92, 91,
3727 90, 88, 87, 86, 85, 84, 83, 82,
3728 80, 79, 78, 77, 76, 75, 74, 73,
3729 72, 71, 70, 70, 69, 68, 67, 66,
3730 65, 64, 63, 63, 62, 61, 60, 59,
3731 59, 58, 57, 56, 56, 55, 54, 53
3732 };
3733 const int precision = 7;
3734
3735 if (exp == 0 && frac != 0) { /* subnormal */
3736 /* Normalize the subnormal. */
3737 while (extract64(frac, frac_size - 1, 1) == 0) {
3738 exp--;
3739 frac <<= 1;
3740 }
3741
3742 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3743 }
3744
3745 int idx = ((exp & 1) << (precision - 1)) |
3746 (frac >> (frac_size - precision + 1));
3747 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3748 (frac_size - precision);
3749 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3750
3751 uint64_t val = 0;
3752 val = deposit64(val, 0, frac_size, out_frac);
3753 val = deposit64(val, frac_size, exp_size, out_exp);
3754 val = deposit64(val, frac_size + exp_size, 1, sign);
3755 return val;
3756}
3757
3758static float16 frsqrt7_h(float16 f, float_status *s)
3759{
3760 int exp_size = 5, frac_size = 10;
3761 bool sign = float16_is_neg(f);
3762
3763 /*
3764 * frsqrt7(sNaN) = canonical NaN
3765 * frsqrt7(-inf) = canonical NaN
3766 * frsqrt7(-normal) = canonical NaN
3767 * frsqrt7(-subnormal) = canonical NaN
3768 */
3769 if (float16_is_signaling_nan(f, s) ||
3770 (float16_is_infinity(f) && sign) ||
3771 (float16_is_normal(f) && sign) ||
3772 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3773 s->float_exception_flags |= float_flag_invalid;
3774 return float16_default_nan(s);
3775 }
3776
3777 /* frsqrt7(qNaN) = canonical NaN */
3778 if (float16_is_quiet_nan(f, s)) {
3779 return float16_default_nan(s);
3780 }
3781
3782 /* frsqrt7(+-0) = +-inf */
3783 if (float16_is_zero(f)) {
3784 s->float_exception_flags |= float_flag_divbyzero;
3785 return float16_set_sign(float16_infinity, sign);
3786 }
3787
3788 /* frsqrt7(+inf) = +0 */
3789 if (float16_is_infinity(f) && !sign) {
3790 return float16_set_sign(float16_zero, sign);
3791 }
3792
3793 /* +normal, +subnormal */
3794 uint64_t val = frsqrt7(f, exp_size, frac_size);
3795 return make_float16(val);
3796}
3797
3798static float32 frsqrt7_s(float32 f, float_status *s)
3799{
3800 int exp_size = 8, frac_size = 23;
3801 bool sign = float32_is_neg(f);
3802
3803 /*
3804 * frsqrt7(sNaN) = canonical NaN
3805 * frsqrt7(-inf) = canonical NaN
3806 * frsqrt7(-normal) = canonical NaN
3807 * frsqrt7(-subnormal) = canonical NaN
3808 */
3809 if (float32_is_signaling_nan(f, s) ||
3810 (float32_is_infinity(f) && sign) ||
3811 (float32_is_normal(f) && sign) ||
3812 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3813 s->float_exception_flags |= float_flag_invalid;
3814 return float32_default_nan(s);
3815 }
3816
3817 /* frsqrt7(qNaN) = canonical NaN */
3818 if (float32_is_quiet_nan(f, s)) {
3819 return float32_default_nan(s);
3820 }
3821
3822 /* frsqrt7(+-0) = +-inf */
3823 if (float32_is_zero(f)) {
3824 s->float_exception_flags |= float_flag_divbyzero;
3825 return float32_set_sign(float32_infinity, sign);
3826 }
3827
3828 /* frsqrt7(+inf) = +0 */
3829 if (float32_is_infinity(f) && !sign) {
3830 return float32_set_sign(float32_zero, sign);
3831 }
3832
3833 /* +normal, +subnormal */
3834 uint64_t val = frsqrt7(f, exp_size, frac_size);
3835 return make_float32(val);
3836}
3837
3838static float64 frsqrt7_d(float64 f, float_status *s)
3839{
3840 int exp_size = 11, frac_size = 52;
3841 bool sign = float64_is_neg(f);
3842
3843 /*
3844 * frsqrt7(sNaN) = canonical NaN
3845 * frsqrt7(-inf) = canonical NaN
3846 * frsqrt7(-normal) = canonical NaN
3847 * frsqrt7(-subnormal) = canonical NaN
3848 */
3849 if (float64_is_signaling_nan(f, s) ||
3850 (float64_is_infinity(f) && sign) ||
3851 (float64_is_normal(f) && sign) ||
3852 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3853 s->float_exception_flags |= float_flag_invalid;
3854 return float64_default_nan(s);
3855 }
3856
3857 /* frsqrt7(qNaN) = canonical NaN */
3858 if (float64_is_quiet_nan(f, s)) {
3859 return float64_default_nan(s);
3860 }
3861
3862 /* frsqrt7(+-0) = +-inf */
3863 if (float64_is_zero(f)) {
3864 s->float_exception_flags |= float_flag_divbyzero;
3865 return float64_set_sign(float64_infinity, sign);
3866 }
3867
3868 /* frsqrt7(+inf) = +0 */
3869 if (float64_is_infinity(f) && !sign) {
3870 return float64_set_sign(float64_zero, sign);
3871 }
3872
3873 /* +normal, +subnormal */
3874 uint64_t val = frsqrt7(f, exp_size, frac_size);
3875 return make_float64(val);
3876}
3877
3878RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3879RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3880RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3881GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3882GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3883GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3884
55c35407
FC
3885/*
3886 * Vector Floating-Point Reciprocal Estimate Instruction
3887 *
3888 * Adapted from riscv-v-spec recip.c:
3889 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3890 */
3891static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3892 float_status *s)
3893{
3894 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3895 uint64_t exp = extract64(f, frac_size, exp_size);
3896 uint64_t frac = extract64(f, 0, frac_size);
3897
3898 const uint8_t lookup_table[] = {
3899 127, 125, 123, 121, 119, 117, 116, 114,
3900 112, 110, 109, 107, 105, 104, 102, 100,
3901 99, 97, 96, 94, 93, 91, 90, 88,
3902 87, 85, 84, 83, 81, 80, 79, 77,
3903 76, 75, 74, 72, 71, 70, 69, 68,
3904 66, 65, 64, 63, 62, 61, 60, 59,
3905 58, 57, 56, 55, 54, 53, 52, 51,
3906 50, 49, 48, 47, 46, 45, 44, 43,
3907 42, 41, 40, 40, 39, 38, 37, 36,
3908 35, 35, 34, 33, 32, 31, 31, 30,
3909 29, 28, 28, 27, 26, 25, 25, 24,
3910 23, 23, 22, 21, 21, 20, 19, 19,
3911 18, 17, 17, 16, 15, 15, 14, 14,
3912 13, 12, 12, 11, 11, 10, 9, 9,
3913 8, 8, 7, 7, 6, 5, 5, 4,
3914 4, 3, 3, 2, 2, 1, 1, 0
3915 };
3916 const int precision = 7;
3917
3918 if (exp == 0 && frac != 0) { /* subnormal */
3919 /* Normalize the subnormal. */
3920 while (extract64(frac, frac_size - 1, 1) == 0) {
3921 exp--;
3922 frac <<= 1;
3923 }
3924
3925 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3926
3927 if (exp != 0 && exp != UINT64_MAX) {
3928 /*
3929 * Overflow to inf or max value of same sign,
3930 * depending on sign and rounding mode.
3931 */
3932 s->float_exception_flags |= (float_flag_inexact |
3933 float_flag_overflow);
3934
3935 if ((s->float_rounding_mode == float_round_to_zero) ||
3936 ((s->float_rounding_mode == float_round_down) && !sign) ||
3937 ((s->float_rounding_mode == float_round_up) && sign)) {
3938 /* Return greatest/negative finite value. */
3939 return (sign << (exp_size + frac_size)) |
3940 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3941 } else {
3942 /* Return +-inf. */
3943 return (sign << (exp_size + frac_size)) |
3944 MAKE_64BIT_MASK(frac_size, exp_size);
3945 }
3946 }
3947 }
3948
3949 int idx = frac >> (frac_size - precision);
3950 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3951 (frac_size - precision);
3952 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3953
3954 if (out_exp == 0 || out_exp == UINT64_MAX) {
3955 /*
3956 * The result is subnormal, but don't raise the underflow exception,
3957 * because there's no additional loss of precision.
3958 */
3959 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3960 if (out_exp == UINT64_MAX) {
3961 out_frac >>= 1;
3962 out_exp = 0;
3963 }
3964 }
3965
3966 uint64_t val = 0;
3967 val = deposit64(val, 0, frac_size, out_frac);
3968 val = deposit64(val, frac_size, exp_size, out_exp);
3969 val = deposit64(val, frac_size + exp_size, 1, sign);
3970 return val;
3971}
3972
3973static float16 frec7_h(float16 f, float_status *s)
3974{
3975 int exp_size = 5, frac_size = 10;
3976 bool sign = float16_is_neg(f);
3977
3978 /* frec7(+-inf) = +-0 */
3979 if (float16_is_infinity(f)) {
3980 return float16_set_sign(float16_zero, sign);
3981 }
3982
3983 /* frec7(+-0) = +-inf */
3984 if (float16_is_zero(f)) {
3985 s->float_exception_flags |= float_flag_divbyzero;
3986 return float16_set_sign(float16_infinity, sign);
3987 }
3988
3989 /* frec7(sNaN) = canonical NaN */
3990 if (float16_is_signaling_nan(f, s)) {
3991 s->float_exception_flags |= float_flag_invalid;
3992 return float16_default_nan(s);
3993 }
3994
3995 /* frec7(qNaN) = canonical NaN */
3996 if (float16_is_quiet_nan(f, s)) {
3997 return float16_default_nan(s);
3998 }
3999
4000 /* +-normal, +-subnormal */
4001 uint64_t val = frec7(f, exp_size, frac_size, s);
4002 return make_float16(val);
4003}
4004
4005static float32 frec7_s(float32 f, float_status *s)
4006{
4007 int exp_size = 8, frac_size = 23;
4008 bool sign = float32_is_neg(f);
4009
4010 /* frec7(+-inf) = +-0 */
4011 if (float32_is_infinity(f)) {
4012 return float32_set_sign(float32_zero, sign);
4013 }
4014
4015 /* frec7(+-0) = +-inf */
4016 if (float32_is_zero(f)) {
4017 s->float_exception_flags |= float_flag_divbyzero;
4018 return float32_set_sign(float32_infinity, sign);
4019 }
4020
4021 /* frec7(sNaN) = canonical NaN */
4022 if (float32_is_signaling_nan(f, s)) {
4023 s->float_exception_flags |= float_flag_invalid;
4024 return float32_default_nan(s);
4025 }
4026
4027 /* frec7(qNaN) = canonical NaN */
4028 if (float32_is_quiet_nan(f, s)) {
4029 return float32_default_nan(s);
4030 }
4031
4032 /* +-normal, +-subnormal */
4033 uint64_t val = frec7(f, exp_size, frac_size, s);
4034 return make_float32(val);
4035}
4036
4037static float64 frec7_d(float64 f, float_status *s)
4038{
4039 int exp_size = 11, frac_size = 52;
4040 bool sign = float64_is_neg(f);
4041
4042 /* frec7(+-inf) = +-0 */
4043 if (float64_is_infinity(f)) {
4044 return float64_set_sign(float64_zero, sign);
4045 }
4046
4047 /* frec7(+-0) = +-inf */
4048 if (float64_is_zero(f)) {
4049 s->float_exception_flags |= float_flag_divbyzero;
4050 return float64_set_sign(float64_infinity, sign);
4051 }
4052
4053 /* frec7(sNaN) = canonical NaN */
4054 if (float64_is_signaling_nan(f, s)) {
4055 s->float_exception_flags |= float_flag_invalid;
4056 return float64_default_nan(s);
4057 }
4058
4059 /* frec7(qNaN) = canonical NaN */
4060 if (float64_is_quiet_nan(f, s)) {
4061 return float64_default_nan(s);
4062 }
4063
4064 /* +-normal, +-subnormal */
4065 uint64_t val = frec7(f, exp_size, frac_size, s);
4066 return make_float64(val);
4067}
4068
4069RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4070RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4071RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 4072GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4073GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4074GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 4075
230b53dd 4076/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
4077RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4078RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4079RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 4080GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4081GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4082GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
4083RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4084RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4085RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 4086GEN_VEXT_VF(vfmin_vf_h, 2)
4087GEN_VEXT_VF(vfmin_vf_w, 4)
4088GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 4089
49c5611a
FC
4090RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4091RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4092RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 4093GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4094GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4095GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
4096RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4097RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4098RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 4099GEN_VEXT_VF(vfmax_vf_h, 2)
4100GEN_VEXT_VF(vfmax_vf_w, 4)
4101GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
4102
4103/* Vector Floating-Point Sign-Injection Instructions */
4104static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4105{
4106 return deposit64(b, 0, 15, a);
4107}
4108
4109static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4110{
4111 return deposit64(b, 0, 31, a);
4112}
4113
4114static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4115{
4116 return deposit64(b, 0, 63, a);
4117}
4118
4119RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4120RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4121RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 4122GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4123GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4124GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
4125RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4126RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4127RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 4128GEN_VEXT_VF(vfsgnj_vf_h, 2)
4129GEN_VEXT_VF(vfsgnj_vf_w, 4)
4130GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
4131
4132static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4133{
4134 return deposit64(~b, 0, 15, a);
4135}
4136
4137static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4138{
4139 return deposit64(~b, 0, 31, a);
4140}
4141
4142static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4143{
4144 return deposit64(~b, 0, 63, a);
4145}
4146
4147RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4148RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4149RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 4150GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4151GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4152GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
4153RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4154RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4155RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 4156GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4157GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4158GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
4159
4160static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4161{
4162 return deposit64(b ^ a, 0, 15, a);
4163}
4164
4165static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4166{
4167 return deposit64(b ^ a, 0, 31, a);
4168}
4169
4170static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4171{
4172 return deposit64(b ^ a, 0, 63, a);
4173}
4174
4175RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4176RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4177RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 4178GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4179GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4180GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
4181RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4182RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4183RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 4184GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4185GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4186GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
4187
4188/* Vector Floating-Point Compare Instructions */
4189#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4190void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4191 CPURISCVState *env, uint32_t desc) \
4192{ \
2a68e9e5
LZ
4193 uint32_t vm = vext_vm(desc); \
4194 uint32_t vl = env->vl; \
5eacf7d8 4195 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4196 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4197 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4198 uint32_t i; \
4199 \
f714361e 4200 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4201 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4202 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4203 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4204 /* set masked-off elements to 1s */ \
4205 if (vma) { \
4206 vext_set_elem_mask(vd, i, 1); \
4207 } \
2a68e9e5
LZ
4208 continue; \
4209 } \
f9298de5 4210 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4211 DO_OP(s2, s1, &env->fp_status)); \
4212 } \
f714361e 4213 env->vstart = 0; \
5eacf7d8 4214 /* mask destination register are always tail-agnostic */ \
4215 /* set tail elements to 1s */ \
4216 if (vta_all_1s) { \
4217 for (; i < total_elems; i++) { \
4218 vext_set_elem_mask(vd, i, 1); \
4219 } \
4220 } \
2a68e9e5
LZ
4221}
4222
2a68e9e5
LZ
4223GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4224GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4225GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4226
4227#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4228void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4229 CPURISCVState *env, uint32_t desc) \
4230{ \
2a68e9e5
LZ
4231 uint32_t vm = vext_vm(desc); \
4232 uint32_t vl = env->vl; \
5eacf7d8 4233 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4234 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4235 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4236 uint32_t i; \
4237 \
f714361e 4238 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4239 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4240 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4241 /* set masked-off elements to 1s */ \
4242 if (vma) { \
4243 vext_set_elem_mask(vd, i, 1); \
4244 } \
2a68e9e5
LZ
4245 continue; \
4246 } \
f9298de5 4247 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4248 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4249 } \
f714361e 4250 env->vstart = 0; \
5eacf7d8 4251 /* mask destination register are always tail-agnostic */ \
4252 /* set tail elements to 1s */ \
4253 if (vta_all_1s) { \
4254 for (; i < total_elems; i++) { \
4255 vext_set_elem_mask(vd, i, 1); \
4256 } \
4257 } \
2a68e9e5
LZ
4258}
4259
4260GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4261GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4262GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4263
4264static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4265{
4266 FloatRelation compare = float16_compare_quiet(a, b, s);
4267 return compare != float_relation_equal;
4268}
4269
4270static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4271{
4272 FloatRelation compare = float32_compare_quiet(a, b, s);
4273 return compare != float_relation_equal;
4274}
4275
4276static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4277{
4278 FloatRelation compare = float64_compare_quiet(a, b, s);
4279 return compare != float_relation_equal;
4280}
4281
4282GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4283GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4284GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4285GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4286GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4287GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4288
2a68e9e5
LZ
4289GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4290GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4291GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4292GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4293GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4294GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4295
2a68e9e5
LZ
4296GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4297GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4298GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4299GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4300GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4301GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4302
4303static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4304{
4305 FloatRelation compare = float16_compare(a, b, s);
4306 return compare == float_relation_greater;
4307}
4308
4309static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4310{
4311 FloatRelation compare = float32_compare(a, b, s);
4312 return compare == float_relation_greater;
4313}
4314
4315static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4316{
4317 FloatRelation compare = float64_compare(a, b, s);
4318 return compare == float_relation_greater;
4319}
4320
4321GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4322GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4323GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4324
4325static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4326{
4327 FloatRelation compare = float16_compare(a, b, s);
4328 return compare == float_relation_greater ||
4329 compare == float_relation_equal;
4330}
4331
4332static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4333{
4334 FloatRelation compare = float32_compare(a, b, s);
4335 return compare == float_relation_greater ||
4336 compare == float_relation_equal;
4337}
4338
4339static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4340{
4341 FloatRelation compare = float64_compare(a, b, s);
4342 return compare == float_relation_greater ||
4343 compare == float_relation_equal;
4344}
4345
4346GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4347GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4348GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4349
121ddbb3
LZ
4350/* Vector Floating-Point Classify Instruction */
4351#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4352static void do_##NAME(void *vd, void *vs2, int i) \
4353{ \
4354 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4355 *((TD *)vd + HD(i)) = OP(s2); \
4356}
4357
5eacf7d8 4358#define GEN_VEXT_V(NAME, ESZ) \
121ddbb3
LZ
4359void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4360 CPURISCVState *env, uint32_t desc) \
4361{ \
121ddbb3
LZ
4362 uint32_t vm = vext_vm(desc); \
4363 uint32_t vl = env->vl; \
5eacf7d8 4364 uint32_t total_elems = \
4365 vext_get_total_elems(env, desc, ESZ); \
4366 uint32_t vta = vext_vta(desc); \
5b448f44 4367 uint32_t vma = vext_vma(desc); \
121ddbb3
LZ
4368 uint32_t i; \
4369 \
f714361e 4370 for (i = env->vstart; i < vl; i++) { \
f9298de5 4371 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4372 /* set masked-off elements to 1s */ \
4373 vext_set_elems_1s(vd, vma, i * ESZ, \
4374 (i + 1) * ESZ); \
121ddbb3
LZ
4375 continue; \
4376 } \
4377 do_##NAME(vd, vs2, i); \
4378 } \
f714361e 4379 env->vstart = 0; \
5eacf7d8 4380 /* set tail elements to 1s */ \
4381 vext_set_elems_1s(vd, vta, vl * ESZ, \
4382 total_elems * ESZ); \
121ddbb3
LZ
4383}
4384
4385target_ulong fclass_h(uint64_t frs1)
4386{
4387 float16 f = frs1;
4388 bool sign = float16_is_neg(f);
4389
4390 if (float16_is_infinity(f)) {
4391 return sign ? 1 << 0 : 1 << 7;
4392 } else if (float16_is_zero(f)) {
4393 return sign ? 1 << 3 : 1 << 4;
4394 } else if (float16_is_zero_or_denormal(f)) {
4395 return sign ? 1 << 2 : 1 << 5;
4396 } else if (float16_is_any_nan(f)) {
4397 float_status s = { }; /* for snan_bit_is_one */
4398 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4399 } else {
4400 return sign ? 1 << 1 : 1 << 6;
4401 }
4402}
4403
4404target_ulong fclass_s(uint64_t frs1)
4405{
4406 float32 f = frs1;
4407 bool sign = float32_is_neg(f);
4408
4409 if (float32_is_infinity(f)) {
4410 return sign ? 1 << 0 : 1 << 7;
4411 } else if (float32_is_zero(f)) {
4412 return sign ? 1 << 3 : 1 << 4;
4413 } else if (float32_is_zero_or_denormal(f)) {
4414 return sign ? 1 << 2 : 1 << 5;
4415 } else if (float32_is_any_nan(f)) {
4416 float_status s = { }; /* for snan_bit_is_one */
4417 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4418 } else {
4419 return sign ? 1 << 1 : 1 << 6;
4420 }
4421}
4422
4423target_ulong fclass_d(uint64_t frs1)
4424{
4425 float64 f = frs1;
4426 bool sign = float64_is_neg(f);
4427
4428 if (float64_is_infinity(f)) {
4429 return sign ? 1 << 0 : 1 << 7;
4430 } else if (float64_is_zero(f)) {
4431 return sign ? 1 << 3 : 1 << 4;
4432 } else if (float64_is_zero_or_denormal(f)) {
4433 return sign ? 1 << 2 : 1 << 5;
4434 } else if (float64_is_any_nan(f)) {
4435 float_status s = { }; /* for snan_bit_is_one */
4436 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4437 } else {
4438 return sign ? 1 << 1 : 1 << 6;
4439 }
4440}
4441
4442RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4443RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4444RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4445GEN_VEXT_V(vfclass_v_h, 2)
4446GEN_VEXT_V(vfclass_v_w, 4)
4447GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4448
4449/* Vector Floating-Point Merge Instruction */
5eacf7d8 4450
3479a814 4451#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4452void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4453 CPURISCVState *env, uint32_t desc) \
4454{ \
64ab5846
LZ
4455 uint32_t vm = vext_vm(desc); \
4456 uint32_t vl = env->vl; \
5eacf7d8 4457 uint32_t esz = sizeof(ETYPE); \
4458 uint32_t total_elems = \
4459 vext_get_total_elems(env, desc, esz); \
4460 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4461 uint32_t i; \
4462 \
f714361e 4463 for (i = env->vstart; i < vl; i++) { \
64ab5846
LZ
4464 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4465 *((ETYPE *)vd + H(i)) \
f9298de5 4466 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4467 } \
f714361e 4468 env->vstart = 0; \
5eacf7d8 4469 /* set tail elements to 1s */ \
4470 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4471}
4472
3479a814
FC
4473GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4474GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4475GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4476
4477/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4478/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4479RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4480RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4481RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4482GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4483GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4484GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4485
4486/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4487RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4488RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4489RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4490GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4491GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4492GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4493
4494/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4495RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4496RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4497RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4498GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4499GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4500GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4501
4502/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4503RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4504RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4505RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4506GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4507GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4508GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4509
4510/* Widening Floating-Point/Integer Type-Convert Instructions */
4511/* (TD, T2, TX2) */
3ce4c09d 4512#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4513#define WOP_UU_H uint32_t, uint16_t, uint16_t
4514#define WOP_UU_W uint64_t, uint32_t, uint32_t
4515/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4516RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4517RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4518GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4519GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4520
4521/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4522RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4523RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4524GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4525GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1
LZ
4526
4527/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3ce4c09d 4528RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4529RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4530RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4531GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4532GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4533GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4534
4535/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4536RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4537RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4538RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4539GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4540GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4541GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4542
4543/*
3ce4c09d 4544 * vfwcvt.f.f.v vd, vs2, vm
4514b7b1
LZ
4545 * Convert single-width float to double-width float.
4546 */
4547static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4548{
4549 return float16_to_float32(a, true, s);
4550}
4551
4552RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4553RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4554GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4555GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e
LZ
4556
4557/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4558/* (TD, T2, TX2) */
ff679b58 4559#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4560#define NOP_UU_H uint16_t, uint32_t, uint32_t
4561#define NOP_UU_W uint32_t, uint64_t, uint64_t
4562/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4563RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4564RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4565RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4566GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4567GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4568GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4569
4570/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4571RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4572RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4573RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4574GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4575GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4576GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e
LZ
4577
4578/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
ff679b58
FC
4579RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4580RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4581GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4582GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4583
4584/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4585RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4586RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4587GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4588GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4589
4590/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4591static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4592{
4593 return float32_to_float16(a, true, s);
4594}
4595
ff679b58
FC
4596RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4597RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4598GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4599GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1
LZ
4600
4601/*
4602 *** Vector Reduction Operations
4603 */
4604/* Vector Single-Width Integer Reduction Instructions */
3479a814 4605#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1
LZ
4606void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4607 void *vs2, CPURISCVState *env, uint32_t desc) \
4608{ \
fe5c9ab1
LZ
4609 uint32_t vm = vext_vm(desc); \
4610 uint32_t vl = env->vl; \
df4f52a7 4611 uint32_t esz = sizeof(TD); \
4612 uint32_t vlenb = simd_maxsz(desc); \
4613 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4614 uint32_t i; \
fe5c9ab1
LZ
4615 TD s1 = *((TD *)vs1 + HD(0)); \
4616 \
f714361e 4617 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4618 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4619 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4620 continue; \
4621 } \
4622 s1 = OP(s1, (TD)s2); \
4623 } \
4624 *((TD *)vd + HD(0)) = s1; \
f714361e 4625 env->vstart = 0; \
df4f52a7 4626 /* set tail elements to 1s */ \
4627 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4628}
4629
4630/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4631GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4632GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4633GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4634GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4635
4636/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4637GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4638GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4639GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4640GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4641
4642/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4643GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4644GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4645GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4646GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4647
4648/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4649GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4650GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4651GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4652GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4653
4654/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4655GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4656GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4657GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4658GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4659
4660/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4661GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4662GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4663GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4664GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4665
4666/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4667GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4668GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4669GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4670GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4671
4672/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4673GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4674GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4675GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4676GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4677
4678/* Vector Widening Integer Reduction Instructions */
4679/* signed sum reduction into double-width accumulator */
3479a814
FC
4680GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4681GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4682GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4683
4684/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4685GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4686GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4687GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4688
4689/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4690#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4691void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4692 void *vs2, CPURISCVState *env, \
4693 uint32_t desc) \
4694{ \
523547f1
LZ
4695 uint32_t vm = vext_vm(desc); \
4696 uint32_t vl = env->vl; \
df4f52a7 4697 uint32_t esz = sizeof(TD); \
4698 uint32_t vlenb = simd_maxsz(desc); \
4699 uint32_t vta = vext_vta(desc); \
523547f1 4700 uint32_t i; \
523547f1
LZ
4701 TD s1 = *((TD *)vs1 + HD(0)); \
4702 \
f714361e 4703 for (i = env->vstart; i < vl; i++) { \
523547f1 4704 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4705 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4706 continue; \
4707 } \
4708 s1 = OP(s1, (TD)s2, &env->fp_status); \
4709 } \
4710 *((TD *)vd + HD(0)) = s1; \
f714361e 4711 env->vstart = 0; \
df4f52a7 4712 /* set tail elements to 1s */ \
4713 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4714}
4715
4716/* Unordered sum */
a3ab69f9
YL
4717GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4718GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4719GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4720
4721/* Ordered sum */
4722GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4723GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4724GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4725
4726/* Maximum value */
08b60eeb
FC
4727GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4728GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4729GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
523547f1
LZ
4730
4731/* Minimum value */
08b60eeb
FC
4732GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4733GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4734GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
696b0c26 4735
5bda21c0
YL
4736/* Vector Widening Floating-Point Add Instructions */
4737static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
696b0c26 4738{
5bda21c0 4739 return float32_add(a, float16_to_float32(b, true, s), s);
696b0c26
LZ
4740}
4741
5bda21c0 4742static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
696b0c26 4743{
5bda21c0 4744 return float64_add(a, float32_to_float64(b, s), s);
696b0c26 4745}
c21f34ae 4746
5bda21c0 4747/* Vector Widening Floating-Point Reduction Instructions */
a3ab69f9
YL
4748/* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4749GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4750GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4751GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4752GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
5bda21c0 4753
c21f34ae
LZ
4754/*
4755 *** Vector Mask Operations
4756 */
4757/* Vector Mask-Register Logical Instructions */
4758#define GEN_VEXT_MASK_VV(NAME, OP) \
4759void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4760 void *vs2, CPURISCVState *env, \
4761 uint32_t desc) \
4762{ \
c21f34ae 4763 uint32_t vl = env->vl; \
acc6ffd4 4764 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4765 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4766 uint32_t i; \
4767 int a, b; \
4768 \
f714361e 4769 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4770 a = vext_elem_mask(vs1, i); \
4771 b = vext_elem_mask(vs2, i); \
4772 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4773 } \
f714361e 4774 env->vstart = 0; \
acc6ffd4 4775 /* mask destination register are always tail- \
4776 * agnostic \
4777 */ \
4778 /* set tail elements to 1s */ \
4779 if (vta_all_1s) { \
4780 for (; i < total_elems; i++) { \
4781 vext_set_elem_mask(vd, i, 1); \
4782 } \
4783 } \
c21f34ae
LZ
4784}
4785
4786#define DO_NAND(N, M) (!(N & M))
4787#define DO_ANDNOT(N, M) (N & !M)
4788#define DO_NOR(N, M) (!(N | M))
4789#define DO_ORNOT(N, M) (N | !M)
4790#define DO_XNOR(N, M) (!(N ^ M))
4791
4792GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4793GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4794GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4795GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4796GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4797GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4798GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4799GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4800
0014aa74
FC
4801/* Vector count population in mask vcpop */
4802target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4803 uint32_t desc)
2e88f551
LZ
4804{
4805 target_ulong cnt = 0;
2e88f551
LZ
4806 uint32_t vm = vext_vm(desc);
4807 uint32_t vl = env->vl;
4808 int i;
4809
f714361e 4810 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4811 if (vm || vext_elem_mask(v0, i)) {
4812 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4813 cnt++;
4814 }
4815 }
4816 }
f714361e 4817 env->vstart = 0;
2e88f551
LZ
4818 return cnt;
4819}
0db67e1c 4820
d71a24fc
FC
4821/* vfirst find-first-set mask bit*/
4822target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4823 uint32_t desc)
0db67e1c 4824{
0db67e1c
LZ
4825 uint32_t vm = vext_vm(desc);
4826 uint32_t vl = env->vl;
4827 int i;
4828
f714361e 4829 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4830 if (vm || vext_elem_mask(v0, i)) {
4831 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4832 return i;
4833 }
4834 }
4835 }
f714361e 4836 env->vstart = 0;
0db67e1c
LZ
4837 return -1LL;
4838}
81fbf7da
LZ
4839
4840enum set_mask_type {
4841 ONLY_FIRST = 1,
4842 INCLUDE_FIRST,
4843 BEFORE_FIRST,
4844};
4845
4846static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4847 uint32_t desc, enum set_mask_type type)
4848{
81fbf7da
LZ
4849 uint32_t vm = vext_vm(desc);
4850 uint32_t vl = env->vl;
acc6ffd4 4851 uint32_t total_elems = env_archcpu(env)->cfg.vlen;
4852 uint32_t vta_all_1s = vext_vta_all_1s(desc);
35f2d795 4853 uint32_t vma = vext_vma(desc);
81fbf7da
LZ
4854 int i;
4855 bool first_mask_bit = false;
4856
f714361e 4857 for (i = env->vstart; i < vl; i++) {
f9298de5 4858 if (!vm && !vext_elem_mask(v0, i)) {
35f2d795
YTC
4859 /* set masked-off elements to 1s */
4860 if (vma) {
4861 vext_set_elem_mask(vd, i, 1);
4862 }
81fbf7da
LZ
4863 continue;
4864 }
4865 /* write a zero to all following active elements */
4866 if (first_mask_bit) {
f9298de5 4867 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4868 continue;
4869 }
f9298de5 4870 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4871 first_mask_bit = true;
4872 if (type == BEFORE_FIRST) {
f9298de5 4873 vext_set_elem_mask(vd, i, 0);
81fbf7da 4874 } else {
f9298de5 4875 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4876 }
4877 } else {
4878 if (type == ONLY_FIRST) {
f9298de5 4879 vext_set_elem_mask(vd, i, 0);
81fbf7da 4880 } else {
f9298de5 4881 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4882 }
4883 }
4884 }
f714361e 4885 env->vstart = 0;
acc6ffd4 4886 /* mask destination register are always tail-agnostic */
4887 /* set tail elements to 1s */
4888 if (vta_all_1s) {
4889 for (; i < total_elems; i++) {
4890 vext_set_elem_mask(vd, i, 1);
4891 }
4892 }
81fbf7da
LZ
4893}
4894
4895void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4896 uint32_t desc)
4897{
4898 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4899}
4900
4901void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4902 uint32_t desc)
4903{
4904 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4905}
4906
4907void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4908 uint32_t desc)
4909{
4910 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4911}
78d90cfe
LZ
4912
4913/* Vector Iota Instruction */
3479a814 4914#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4915void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4916 uint32_t desc) \
4917{ \
78d90cfe
LZ
4918 uint32_t vm = vext_vm(desc); \
4919 uint32_t vl = env->vl; \
acc6ffd4 4920 uint32_t esz = sizeof(ETYPE); \
4921 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4922 uint32_t vta = vext_vta(desc); \
35f2d795 4923 uint32_t vma = vext_vma(desc); \
78d90cfe
LZ
4924 uint32_t sum = 0; \
4925 int i; \
4926 \
f714361e 4927 for (i = env->vstart; i < vl; i++) { \
f9298de5 4928 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4929 /* set masked-off elements to 1s */ \
4930 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
78d90cfe
LZ
4931 continue; \
4932 } \
4933 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4934 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4935 sum++; \
4936 } \
4937 } \
f714361e 4938 env->vstart = 0; \
acc6ffd4 4939 /* set tail elements to 1s */ \
4940 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4941}
4942
3479a814
FC
4943GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4944GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4945GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4946GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4947
4948/* Vector Element Index Instruction */
3479a814 4949#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4950void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4951{ \
126bec3f
LZ
4952 uint32_t vm = vext_vm(desc); \
4953 uint32_t vl = env->vl; \
acc6ffd4 4954 uint32_t esz = sizeof(ETYPE); \
4955 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4956 uint32_t vta = vext_vta(desc); \
35f2d795 4957 uint32_t vma = vext_vma(desc); \
126bec3f
LZ
4958 int i; \
4959 \
f714361e 4960 for (i = env->vstart; i < vl; i++) { \
f9298de5 4961 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4962 /* set masked-off elements to 1s */ \
4963 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
126bec3f
LZ
4964 continue; \
4965 } \
4966 *((ETYPE *)vd + H(i)) = i; \
4967 } \
f714361e 4968 env->vstart = 0; \
acc6ffd4 4969 /* set tail elements to 1s */ \
4970 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4971}
4972
3479a814
FC
4973GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4974GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4975GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4976GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4977
4978/*
4979 *** Vector Permutation Instructions
4980 */
4981
4982/* Vector Slide Instructions */
3479a814 4983#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4984void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4985 CPURISCVState *env, uint32_t desc) \
4986{ \
ec17e036
LZ
4987 uint32_t vm = vext_vm(desc); \
4988 uint32_t vl = env->vl; \
803963f7 4989 uint32_t esz = sizeof(ETYPE); \
4990 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4991 uint32_t vta = vext_vta(desc); \
edabcd0e 4992 uint32_t vma = vext_vma(desc); \
f714361e 4993 target_ulong offset = s1, i_min, i; \
ec17e036 4994 \
f714361e
FC
4995 i_min = MAX(env->vstart, offset); \
4996 for (i = i_min; i < vl; i++) { \
f9298de5 4997 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4998 /* set masked-off elements to 1s */ \
4999 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
ec17e036
LZ
5000 continue; \
5001 } \
5002 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5003 } \
803963f7 5004 /* set tail elements to 1s */ \
5005 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
5006}
5007
5008/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
5009GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5010GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5011GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5012GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 5013
3479a814 5014#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
5015void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5016 CPURISCVState *env, uint32_t desc) \
5017{ \
6438ed61 5018 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
5019 uint32_t vm = vext_vm(desc); \
5020 uint32_t vl = env->vl; \
803963f7 5021 uint32_t esz = sizeof(ETYPE); \
5022 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5023 uint32_t vta = vext_vta(desc); \
edabcd0e 5024 uint32_t vma = vext_vma(desc); \
6438ed61 5025 target_ulong i_max, i; \
ec17e036 5026 \
f714361e
FC
5027 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
5028 for (i = env->vstart; i < i_max; ++i) { \
edabcd0e
YTC
5029 if (!vm && !vext_elem_mask(v0, i)) { \
5030 /* set masked-off elements to 1s */ \
5031 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5032 continue; \
6438ed61 5033 } \
edabcd0e 5034 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
6438ed61
FC
5035 } \
5036 \
5037 for (i = i_max; i < vl; ++i) { \
5038 if (vm || vext_elem_mask(v0, i)) { \
5039 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 5040 } \
ec17e036 5041 } \
f714361e
FC
5042 \
5043 env->vstart = 0; \
803963f7 5044 /* set tail elements to 1s */ \
5045 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
5046}
5047
5048/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
5049GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5050GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5051GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5052GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 5053
c7b8a421 5054#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5055static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
5056 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 5057{ \
c7b8a421 5058 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5059 uint32_t vm = vext_vm(desc); \
5060 uint32_t vl = env->vl; \
803963f7 5061 uint32_t esz = sizeof(ETYPE); \
5062 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5063 uint32_t vta = vext_vta(desc); \
edabcd0e 5064 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
5065 uint32_t i; \
5066 \
f714361e 5067 for (i = env->vstart; i < vl; i++) { \
8500d4ab 5068 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5069 /* set masked-off elements to 1s */ \
5070 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
5071 continue; \
5072 } \
5073 if (i == 0) { \
5074 *((ETYPE *)vd + H(i)) = s1; \
5075 } else { \
5076 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5077 } \
5078 } \
f714361e 5079 env->vstart = 0; \
803963f7 5080 /* set tail elements to 1s */ \
5081 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5082}
5083
5084GEN_VEXT_VSLIE1UP(8, H1)
5085GEN_VEXT_VSLIE1UP(16, H2)
5086GEN_VEXT_VSLIE1UP(32, H4)
5087GEN_VEXT_VSLIE1UP(64, H8)
5088
c7b8a421 5089#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
5090void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5091 CPURISCVState *env, uint32_t desc) \
5092{ \
c7b8a421 5093 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5094}
5095
5096/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
5097GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5098GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5099GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5100GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5101
c7b8a421 5102#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5103static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
5104 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 5105{ \
c7b8a421 5106 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5107 uint32_t vm = vext_vm(desc); \
5108 uint32_t vl = env->vl; \
803963f7 5109 uint32_t esz = sizeof(ETYPE); \
5110 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5111 uint32_t vta = vext_vta(desc); \
edabcd0e 5112 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
5113 uint32_t i; \
5114 \
f714361e 5115 for (i = env->vstart; i < vl; i++) { \
8500d4ab 5116 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5117 /* set masked-off elements to 1s */ \
5118 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
5119 continue; \
5120 } \
5121 if (i == vl - 1) { \
5122 *((ETYPE *)vd + H(i)) = s1; \
5123 } else { \
5124 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5125 } \
5126 } \
f714361e 5127 env->vstart = 0; \
803963f7 5128 /* set tail elements to 1s */ \
5129 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5130}
5131
5132GEN_VEXT_VSLIDE1DOWN(8, H1)
5133GEN_VEXT_VSLIDE1DOWN(16, H2)
5134GEN_VEXT_VSLIDE1DOWN(32, H4)
5135GEN_VEXT_VSLIDE1DOWN(64, H8)
5136
c7b8a421 5137#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
5138void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5139 CPURISCVState *env, uint32_t desc) \
5140{ \
c7b8a421 5141 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5142}
5143
5144/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
5145GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5146GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5147GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5148GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5149
5150/* Vector Floating-Point Slide Instructions */
c7b8a421 5151#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
5152void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5153 CPURISCVState *env, uint32_t desc) \
5154{ \
c7b8a421 5155 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5156}
5157
5158/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5159GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5160GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5161GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5162
c7b8a421 5163#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
5164void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5165 CPURISCVState *env, uint32_t desc) \
5166{ \
c7b8a421 5167 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5168}
5169
5170/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5171GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5172GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5173GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
5174
5175/* Vector Register Gather Instruction */
50bfb45b 5176#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
5177void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5178 CPURISCVState *env, uint32_t desc) \
5179{ \
f714361e 5180 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
5181 uint32_t vm = vext_vm(desc); \
5182 uint32_t vl = env->vl; \
803963f7 5183 uint32_t esz = sizeof(TS2); \
5184 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5185 uint32_t vta = vext_vta(desc); \
edabcd0e 5186 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5187 uint64_t index; \
5188 uint32_t i; \
e4b83d5c 5189 \
f714361e 5190 for (i = env->vstart; i < vl; i++) { \
f9298de5 5191 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5192 /* set masked-off elements to 1s */ \
5193 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5194 continue; \
5195 } \
50bfb45b 5196 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 5197 if (index >= vlmax) { \
50bfb45b 5198 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 5199 } else { \
50bfb45b 5200 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
5201 } \
5202 } \
f714361e 5203 env->vstart = 0; \
803963f7 5204 /* set tail elements to 1s */ \
5205 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5206}
5207
5208/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
5209GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5210GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5211GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5212GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5213
5214GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5215GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5216GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5217GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5218
3479a814 5219#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5220void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5221 CPURISCVState *env, uint32_t desc) \
5222{ \
5a9f8e15 5223 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5224 uint32_t vm = vext_vm(desc); \
5225 uint32_t vl = env->vl; \
803963f7 5226 uint32_t esz = sizeof(ETYPE); \
5227 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5228 uint32_t vta = vext_vta(desc); \
edabcd0e 5229 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5230 uint64_t index = s1; \
5231 uint32_t i; \
e4b83d5c 5232 \
f714361e 5233 for (i = env->vstart; i < vl; i++) { \
f9298de5 5234 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5235 /* set masked-off elements to 1s */ \
5236 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5237 continue; \
5238 } \
5239 if (index >= vlmax) { \
5240 *((ETYPE *)vd + H(i)) = 0; \
5241 } else { \
5242 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5243 } \
5244 } \
f714361e 5245 env->vstart = 0; \
803963f7 5246 /* set tail elements to 1s */ \
5247 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5248}
5249
5250/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5251GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5252GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5253GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5254GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5255
5256/* Vector Compress Instruction */
3479a814 5257#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5258void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5259 CPURISCVState *env, uint32_t desc) \
5260{ \
31bf42a2 5261 uint32_t vl = env->vl; \
803963f7 5262 uint32_t esz = sizeof(ETYPE); \
5263 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5264 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5265 uint32_t num = 0, i; \
5266 \
f714361e 5267 for (i = env->vstart; i < vl; i++) { \
f9298de5 5268 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5269 continue; \
5270 } \
5271 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5272 num++; \
5273 } \
f714361e 5274 env->vstart = 0; \
803963f7 5275 /* set tail elements to 1s */ \
5276 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5277}
5278
5279/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5280GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5281GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5282GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5283GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5284
f714361e 5285/* Vector Whole Register Move */
f32d82f6
WL
5286void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5287{
f06193c4 5288 /* EEW = SEW */
f32d82f6 5289 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5290 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5291 uint32_t startb = env->vstart * sewb;
5292 uint32_t i = startb;
f32d82f6
WL
5293
5294 memcpy((uint8_t *)vd + H1(i),
5295 (uint8_t *)vs2 + H1(i),
f06193c4 5296 maxsz - startb);
f714361e 5297
f32d82f6
WL
5298 env->vstart = 0;
5299}
f714361e 5300
cd01340e
FC
5301/* Vector Integer Extension */
5302#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5303void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5304 CPURISCVState *env, uint32_t desc) \
5305{ \
5306 uint32_t vl = env->vl; \
5307 uint32_t vm = vext_vm(desc); \
803963f7 5308 uint32_t esz = sizeof(ETYPE); \
5309 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5310 uint32_t vta = vext_vta(desc); \
edabcd0e 5311 uint32_t vma = vext_vma(desc); \
cd01340e
FC
5312 uint32_t i; \
5313 \
f714361e 5314 for (i = env->vstart; i < vl; i++) { \
cd01340e 5315 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5316 /* set masked-off elements to 1s */ \
5317 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
cd01340e
FC
5318 continue; \
5319 } \
5320 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5321 } \
f714361e 5322 env->vstart = 0; \
803963f7 5323 /* set tail elements to 1s */ \
5324 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5325}
5326
5327GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5328GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5329GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5330GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5331GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5332GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5333
5334GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5335GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5336GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5337GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5338GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5339GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)