]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc
LZ
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
ce2a0343 26#include "fpu/softfloat.h"
751538d5
LZ
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
2b7168fc
LZ
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 44
d9b7609a
FC
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
2b7168fc 57 /* only set vill bit. */
d96a271a
LZ
58 env->vill = 1;
59 env->vtype = 0;
2b7168fc
LZ
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
ac6bcf4d 74 env->vill = 0;
2b7168fc
LZ
75 return vl;
76}
751538d5
LZ
77
78/*
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
81 */
e03b5686 82#if HOST_BIG_ENDIAN
751538d5
LZ
83#define H1(x) ((x) ^ 7)
84#define H1_2(x) ((x) ^ 6)
85#define H1_4(x) ((x) ^ 4)
86#define H2(x) ((x) ^ 3)
87#define H4(x) ((x) ^ 1)
88#define H8(x) ((x))
89#else
90#define H1(x) (x)
91#define H1_2(x) (x)
92#define H1_4(x) (x)
93#define H2(x) (x)
94#define H4(x) (x)
95#define H8(x) (x)
96#endif
97
98static inline uint32_t vext_nf(uint32_t desc)
99{
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101}
102
751538d5
LZ
103static inline uint32_t vext_vm(uint32_t desc)
104{
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106}
107
33f1beaf
FC
108/*
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
119 */
120static inline int32_t vext_lmul(uint32_t desc)
751538d5 121{
33f1beaf 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
123}
124
125/*
5a9f8e15 126 * Get the maximum number of elements can be operated.
751538d5 127 *
5a9f8e15 128 * esz: log2 of element size in bytes.
751538d5 129 */
5a9f8e15 130static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
751538d5 131{
5a9f8e15 132 /*
8a4b5257 133 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
134 * so vlen in bytes (vlenb) is encoded as maxsz.
135 */
136 uint32_t vlenb = simd_maxsz(desc);
137
138 /* Return VLMAX */
139 int scale = vext_lmul(desc) - esz;
140 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
141}
142
d6b9d930
LZ
143static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
144{
145 return (addr & env->cur_pmmask) | env->cur_pmbase;
146}
147
751538d5
LZ
148/*
149 * This function checks watchpoint before real load operation.
150 *
151 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
152 * In user mode, there is no watchpoint support now.
153 *
154 * It will trigger an exception if there is no mapping in TLB
155 * and page table walk can't fill the TLB entry. Then the guest
156 * software can return here after process the exception or never return.
157 */
158static void probe_pages(CPURISCVState *env, target_ulong addr,
159 target_ulong len, uintptr_t ra,
160 MMUAccessType access_type)
161{
162 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
163 target_ulong curlen = MIN(pagelen, len);
164
d6b9d930 165 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
166 cpu_mmu_index(env, false), ra);
167 if (len > curlen) {
168 addr += curlen;
169 curlen = len - curlen;
d6b9d930 170 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
171 cpu_mmu_index(env, false), ra);
172 }
173}
174
f9298de5
FC
175static inline void vext_set_elem_mask(void *v0, int index,
176 uint8_t value)
3a6f8f68 177{
f9298de5
FC
178 int idx = index / 64;
179 int pos = index % 64;
3a6f8f68 180 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 181 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 182}
751538d5 183
f9298de5
FC
184/*
185 * Earlier designs (pre-0.9) had a varying number of bits
186 * per mask value (MLEN). In the 0.9 design, MLEN=1.
187 * (Section 4.5)
188 */
189static inline int vext_elem_mask(void *v0, int index)
751538d5 190{
f9298de5
FC
191 int idx = index / 64;
192 int pos = index % 64;
751538d5
LZ
193 return (((uint64_t *)v0)[idx] >> pos) & 1;
194}
195
196/* elements operations for load and store */
197typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
198 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 199
79556fb6 200#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
201static void NAME(CPURISCVState *env, abi_ptr addr, \
202 uint32_t idx, void *vd, uintptr_t retaddr)\
203{ \
751538d5 204 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 205 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
206} \
207
79556fb6
FC
208GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
209GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
210GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
211GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
212
213#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
214static void NAME(CPURISCVState *env, abi_ptr addr, \
215 uint32_t idx, void *vd, uintptr_t retaddr)\
216{ \
217 ETYPE data = *((ETYPE *)vd + H(idx)); \
218 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
219}
220
751538d5
LZ
221GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
222GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
223GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
224GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
225
226/*
227 *** stride: access vector element from strided memory
228 */
229static void
230vext_ldst_stride(void *vd, void *v0, target_ulong base,
231 target_ulong stride, CPURISCVState *env,
232 uint32_t desc, uint32_t vm,
3479a814 233 vext_ldst_elem_fn *ldst_elem,
79556fb6 234 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
751538d5
LZ
235{
236 uint32_t i, k;
237 uint32_t nf = vext_nf(desc);
5a9f8e15 238 uint32_t max_elems = vext_max_elems(desc, esz);
751538d5 239
f714361e 240 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f9298de5 241 if (!vm && !vext_elem_mask(v0, i)) {
751538d5
LZ
242 continue;
243 }
f714361e 244
751538d5 245 k = 0;
751538d5 246 while (k < nf) {
5a9f8e15 247 target_ulong addr = base + stride * i + (k << esz);
d6b9d930 248 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
249 k++;
250 }
251 }
f714361e 252 env->vstart = 0;
751538d5
LZ
253}
254
79556fb6 255#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
256void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
257 target_ulong stride, CPURISCVState *env, \
258 uint32_t desc) \
259{ \
260 uint32_t vm = vext_vm(desc); \
261 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
5a9f8e15 262 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
263}
264
79556fb6
FC
265GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
266GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
267GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
268GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
269
270#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
271void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
272 target_ulong stride, CPURISCVState *env, \
273 uint32_t desc) \
274{ \
275 uint32_t vm = vext_vm(desc); \
276 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
5a9f8e15 277 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
751538d5
LZ
278}
279
79556fb6
FC
280GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
281GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
282GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
283GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
284
285/*
286 *** unit-stride: access elements stored contiguously in memory
287 */
288
289/* unmasked unit-stride load and store operation*/
290static void
291vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
5c89e9c0
FC
292 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
293 uintptr_t ra, MMUAccessType access_type)
751538d5
LZ
294{
295 uint32_t i, k;
296 uint32_t nf = vext_nf(desc);
5a9f8e15 297 uint32_t max_elems = vext_max_elems(desc, esz);
751538d5 298
751538d5 299 /* load bytes from guest memory */
5c89e9c0 300 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
301 k = 0;
302 while (k < nf) {
5a9f8e15 303 target_ulong addr = base + ((i * nf + k) << esz);
d6b9d930 304 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
305 k++;
306 }
307 }
f714361e 308 env->vstart = 0;
751538d5
LZ
309}
310
311/*
312 * masked unit-stride load and store operation will be a special case of stride,
313 * stride = NF * sizeof (MTYPE)
314 */
315
79556fb6 316#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
317void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
318 CPURISCVState *env, uint32_t desc) \
319{ \
5a9f8e15 320 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 321 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
5a9f8e15 322 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
323} \
324 \
325void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
326 CPURISCVState *env, uint32_t desc) \
327{ \
3479a814 328 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
5c89e9c0 329 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
330}
331
79556fb6
FC
332GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
333GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
334GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
335GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
336
5c89e9c0
FC
337#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
338void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
339 CPURISCVState *env, uint32_t desc) \
340{ \
341 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
342 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
343 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
344} \
345 \
346void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
347 CPURISCVState *env, uint32_t desc) \
348{ \
349 vext_ldst_us(vd, base, env, desc, STORE_FN, \
350 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
751538d5
LZ
351}
352
79556fb6
FC
353GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
354GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
355GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
356GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 357
26086aea
FC
358/*
359 *** unit stride mask load and store, EEW = 1
360 */
361void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
362 CPURISCVState *env, uint32_t desc)
363{
364 /* evl = ceil(vl/8) */
365 uint8_t evl = (env->vl + 7) >> 3;
366 vext_ldst_us(vd, base, env, desc, lde_b,
367 0, evl, GETPC(), MMU_DATA_LOAD);
368}
369
370void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
371 CPURISCVState *env, uint32_t desc)
372{
373 /* evl = ceil(vl/8) */
374 uint8_t evl = (env->vl + 7) >> 3;
375 vext_ldst_us(vd, base, env, desc, ste_b,
376 0, evl, GETPC(), MMU_DATA_STORE);
377}
378
f732560e
LZ
379/*
380 *** index: access vector element from indexed memory
381 */
382typedef target_ulong vext_get_index_addr(target_ulong base,
383 uint32_t idx, void *vs2);
384
385#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
386static target_ulong NAME(target_ulong base, \
387 uint32_t idx, void *vs2) \
388{ \
389 return (base + *((ETYPE *)vs2 + H(idx))); \
390}
391
83fcd573
FC
392GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
393GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
394GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
395GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
396
397static inline void
398vext_ldst_index(void *vd, void *v0, target_ulong base,
399 void *vs2, CPURISCVState *env, uint32_t desc,
400 vext_get_index_addr get_index_addr,
401 vext_ldst_elem_fn *ldst_elem,
08b9d0ed 402 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
f732560e
LZ
403{
404 uint32_t i, k;
405 uint32_t nf = vext_nf(desc);
406 uint32_t vm = vext_vm(desc);
5a9f8e15 407 uint32_t max_elems = vext_max_elems(desc, esz);
f732560e 408
f732560e 409 /* load bytes from guest memory */
f714361e 410 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f9298de5 411 if (!vm && !vext_elem_mask(v0, i)) {
f732560e
LZ
412 continue;
413 }
f714361e
FC
414
415 k = 0;
f732560e 416 while (k < nf) {
5a9f8e15 417 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
d6b9d930 418 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
419 k++;
420 }
421 }
f714361e 422 env->vstart = 0;
f732560e
LZ
423}
424
08b9d0ed 425#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
426void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
427 void *vs2, CPURISCVState *env, uint32_t desc) \
428{ \
429 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 430 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
f732560e
LZ
431}
432
08b9d0ed
FC
433GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
434GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
435GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
436GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
437GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
438GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
439GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
440GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
441GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
442GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
443GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
444GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
445GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
446GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
447GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
448GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
449
450#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
451void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
452 void *vs2, CPURISCVState *env, uint32_t desc) \
453{ \
454 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 455 STORE_FN, ctzl(sizeof(ETYPE)), \
f732560e
LZ
456 GETPC(), MMU_DATA_STORE); \
457}
458
08b9d0ed
FC
459GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
460GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
461GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
462GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
463GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
464GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
465GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
466GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
467GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
468GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
469GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
470GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
471GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
472GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
473GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
474GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
475
476/*
477 *** unit-stride fault-only-fisrt load instructions
478 */
479static inline void
480vext_ldff(void *vd, void *v0, target_ulong base,
481 CPURISCVState *env, uint32_t desc,
482 vext_ldst_elem_fn *ldst_elem,
d3e5e2ff 483 uint32_t esz, uintptr_t ra)
022b4ecf
LZ
484{
485 void *host;
486 uint32_t i, k, vl = 0;
022b4ecf
LZ
487 uint32_t nf = vext_nf(desc);
488 uint32_t vm = vext_vm(desc);
5a9f8e15 489 uint32_t max_elems = vext_max_elems(desc, esz);
022b4ecf
LZ
490 target_ulong addr, offset, remain;
491
492 /* probe every access*/
f714361e 493 for (i = env->vstart; i < env->vl; i++) {
f9298de5 494 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
495 continue;
496 }
d6b9d930 497 addr = adjust_addr(env, base + i * (nf << esz));
022b4ecf 498 if (i == 0) {
5a9f8e15 499 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
500 } else {
501 /* if it triggers an exception, no need to check watchpoint */
5a9f8e15 502 remain = nf << esz;
022b4ecf
LZ
503 while (remain > 0) {
504 offset = -(addr | TARGET_PAGE_MASK);
505 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
506 cpu_mmu_index(env, false));
507 if (host) {
508#ifdef CONFIG_USER_ONLY
01d09525 509 if (page_check_range(addr, offset, PAGE_READ) < 0) {
022b4ecf
LZ
510 vl = i;
511 goto ProbeSuccess;
512 }
513#else
01d09525 514 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
515#endif
516 } else {
517 vl = i;
518 goto ProbeSuccess;
519 }
520 if (remain <= offset) {
521 break;
522 }
523 remain -= offset;
d6b9d930 524 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
525 }
526 }
527 }
528ProbeSuccess:
529 /* load bytes from guest memory */
530 if (vl != 0) {
531 env->vl = vl;
532 }
f714361e 533 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 534 k = 0;
f9298de5 535 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
536 continue;
537 }
538 while (k < nf) {
5a9f8e15 539 target_ulong addr = base + ((i * nf + k) << esz);
d6b9d930 540 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
541 k++;
542 }
543 }
f714361e 544 env->vstart = 0;
022b4ecf
LZ
545}
546
d3e5e2ff
FC
547#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
548void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
549 CPURISCVState *env, uint32_t desc) \
550{ \
551 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 552 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
553}
554
d3e5e2ff
FC
555GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
556GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
557GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
558GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 559
268fcca6
LZ
560#define DO_SWAP(N, M) (M)
561#define DO_AND(N, M) (N & M)
562#define DO_XOR(N, M) (N ^ M)
563#define DO_OR(N, M) (N | M)
564#define DO_ADD(N, M) (N + M)
565
268fcca6
LZ
566/* Signed min/max */
567#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
568#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
569
570/* Unsigned min/max */
571#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
572#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
573
30206bd8
FC
574/*
575 *** load and store whole register instructions
576 */
577static void
578vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
579 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
580 MMUAccessType access_type)
581{
f714361e 582 uint32_t i, k, off, pos;
30206bd8
FC
583 uint32_t nf = vext_nf(desc);
584 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
585 uint32_t max_elems = vlenb >> esz;
586
f714361e
FC
587 k = env->vstart / max_elems;
588 off = env->vstart % max_elems;
30206bd8 589
f714361e
FC
590 if (off) {
591 /* load/store rest of elements of current segment pointed by vstart */
592 for (pos = off; pos < max_elems; pos++, env->vstart++) {
593 target_ulong addr = base + ((pos + k * max_elems) << esz);
d6b9d930 594 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
f714361e
FC
595 }
596 k++;
597 }
598
599 /* load/store elements for rest of segments */
600 for (; k < nf; k++) {
601 for (i = 0; i < max_elems; i++, env->vstart++) {
30206bd8 602 target_ulong addr = base + ((i + k * max_elems) << esz);
d6b9d930 603 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
604 }
605 }
f714361e
FC
606
607 env->vstart = 0;
30206bd8
FC
608}
609
610#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
611void HELPER(NAME)(void *vd, target_ulong base, \
612 CPURISCVState *env, uint32_t desc) \
613{ \
614 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
615 ctzl(sizeof(ETYPE)), GETPC(), \
616 MMU_DATA_LOAD); \
617}
618
619GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
620GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
621GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
622GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
623GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
624GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
625GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
626GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
627GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
628GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
629GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
630GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
631GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
632GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
633GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
634GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
635
636#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
637void HELPER(NAME)(void *vd, target_ulong base, \
638 CPURISCVState *env, uint32_t desc) \
639{ \
640 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
641 ctzl(sizeof(ETYPE)), GETPC(), \
642 MMU_DATA_STORE); \
643}
644
645GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
646GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
647GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
648GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
649
43740e3a
LZ
650/*
651 *** Vector Integer Arithmetic Instructions
652 */
653
654/* expand macro args before macro */
655#define RVVCALL(macro, ...) macro(__VA_ARGS__)
656
657/* (TD, T1, T2, TX1, TX2) */
658#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
659#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
660#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
661#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
662#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
663#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
664#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
665#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
666#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
667#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
668#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
669#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
670#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
671#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
672#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
673#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
674#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
675#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
676#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
677#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
678#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
679#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
680#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
681#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
682#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
683#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
684#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
685#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
686#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
687#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
688
689/* operation of two vector elements */
690typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
691
692#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
693static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
694{ \
695 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
696 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
697 *((TD *)vd + HD(i)) = OP(s2, s1); \
698}
699#define DO_SUB(N, M) (N - M)
700#define DO_RSUB(N, M) (M - N)
701
702RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
703RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
704RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
705RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
706RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
707RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
708RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
709RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
710
711static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
712 CPURISCVState *env, uint32_t desc,
3479a814 713 opivv2_fn *fn)
43740e3a 714{
43740e3a
LZ
715 uint32_t vm = vext_vm(desc);
716 uint32_t vl = env->vl;
717 uint32_t i;
718
f714361e 719 for (i = env->vstart; i < vl; i++) {
f9298de5 720 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
721 continue;
722 }
723 fn(vd, vs1, vs2, i);
724 }
f714361e 725 env->vstart = 0;
43740e3a
LZ
726}
727
728/* generate the helpers for OPIVV */
8a085fb2 729#define GEN_VEXT_VV(NAME) \
43740e3a
LZ
730void HELPER(NAME)(void *vd, void *v0, void *vs1, \
731 void *vs2, CPURISCVState *env, \
732 uint32_t desc) \
733{ \
8a085fb2 734 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
3479a814 735 do_##NAME); \
43740e3a
LZ
736}
737
8a085fb2 738GEN_VEXT_VV(vadd_vv_b)
739GEN_VEXT_VV(vadd_vv_h)
740GEN_VEXT_VV(vadd_vv_w)
741GEN_VEXT_VV(vadd_vv_d)
742GEN_VEXT_VV(vsub_vv_b)
743GEN_VEXT_VV(vsub_vv_h)
744GEN_VEXT_VV(vsub_vv_w)
745GEN_VEXT_VV(vsub_vv_d)
43740e3a
LZ
746
747typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
748
749/*
750 * (T1)s1 gives the real operator type.
751 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
752 */
753#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
754static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
755{ \
756 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
757 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
758}
759
760RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
761RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
762RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
763RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
764RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
765RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
766RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
767RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
768RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
769RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
770RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
771RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
772
773static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
774 CPURISCVState *env, uint32_t desc,
3479a814 775 opivx2_fn fn)
43740e3a 776{
43740e3a
LZ
777 uint32_t vm = vext_vm(desc);
778 uint32_t vl = env->vl;
779 uint32_t i;
780
f714361e 781 for (i = env->vstart; i < vl; i++) {
f9298de5 782 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
783 continue;
784 }
785 fn(vd, s1, vs2, i);
786 }
f714361e 787 env->vstart = 0;
43740e3a
LZ
788}
789
790/* generate the helpers for OPIVX */
8a085fb2 791#define GEN_VEXT_VX(NAME) \
43740e3a
LZ
792void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
793 void *vs2, CPURISCVState *env, \
794 uint32_t desc) \
795{ \
8a085fb2 796 do_vext_vx(vd, v0, s1, vs2, env, desc, \
3479a814
FC
797 do_##NAME); \
798}
799
8a085fb2 800GEN_VEXT_VX(vadd_vx_b)
801GEN_VEXT_VX(vadd_vx_h)
802GEN_VEXT_VX(vadd_vx_w)
803GEN_VEXT_VX(vadd_vx_d)
804GEN_VEXT_VX(vsub_vx_b)
805GEN_VEXT_VX(vsub_vx_h)
806GEN_VEXT_VX(vsub_vx_w)
807GEN_VEXT_VX(vsub_vx_d)
808GEN_VEXT_VX(vrsub_vx_b)
809GEN_VEXT_VX(vrsub_vx_h)
810GEN_VEXT_VX(vrsub_vx_w)
811GEN_VEXT_VX(vrsub_vx_d)
43740e3a
LZ
812
813void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
814{
815 intptr_t oprsz = simd_oprsz(desc);
816 intptr_t i;
817
818 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
819 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
820 }
821}
822
823void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
824{
825 intptr_t oprsz = simd_oprsz(desc);
826 intptr_t i;
827
828 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
829 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
830 }
831}
832
833void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
834{
835 intptr_t oprsz = simd_oprsz(desc);
836 intptr_t i;
837
838 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
839 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
840 }
841}
842
843void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
844{
845 intptr_t oprsz = simd_oprsz(desc);
846 intptr_t i;
847
848 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
849 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
850 }
851}
8fcdf776
LZ
852
853/* Vector Widening Integer Add/Subtract */
854#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
855#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
856#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
857#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
858#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
859#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
860#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
861#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
862#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
863#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
864#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
865#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
866RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
867RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
868RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
869RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
870RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
871RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
872RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
873RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
874RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
875RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
876RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
877RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
878RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
879RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
880RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
881RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
882RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
883RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
884RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
885RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
886RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
887RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
888RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
889RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
8a085fb2 890GEN_VEXT_VV(vwaddu_vv_b)
891GEN_VEXT_VV(vwaddu_vv_h)
892GEN_VEXT_VV(vwaddu_vv_w)
893GEN_VEXT_VV(vwsubu_vv_b)
894GEN_VEXT_VV(vwsubu_vv_h)
895GEN_VEXT_VV(vwsubu_vv_w)
896GEN_VEXT_VV(vwadd_vv_b)
897GEN_VEXT_VV(vwadd_vv_h)
898GEN_VEXT_VV(vwadd_vv_w)
899GEN_VEXT_VV(vwsub_vv_b)
900GEN_VEXT_VV(vwsub_vv_h)
901GEN_VEXT_VV(vwsub_vv_w)
902GEN_VEXT_VV(vwaddu_wv_b)
903GEN_VEXT_VV(vwaddu_wv_h)
904GEN_VEXT_VV(vwaddu_wv_w)
905GEN_VEXT_VV(vwsubu_wv_b)
906GEN_VEXT_VV(vwsubu_wv_h)
907GEN_VEXT_VV(vwsubu_wv_w)
908GEN_VEXT_VV(vwadd_wv_b)
909GEN_VEXT_VV(vwadd_wv_h)
910GEN_VEXT_VV(vwadd_wv_w)
911GEN_VEXT_VV(vwsub_wv_b)
912GEN_VEXT_VV(vwsub_wv_h)
913GEN_VEXT_VV(vwsub_wv_w)
8fcdf776
LZ
914
915RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
916RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
917RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
918RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
919RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
920RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
921RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
922RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
923RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
924RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
925RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
926RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
927RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
928RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
929RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
930RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
931RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
932RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
933RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
934RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
935RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
936RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
937RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
938RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
8a085fb2 939GEN_VEXT_VX(vwaddu_vx_b)
940GEN_VEXT_VX(vwaddu_vx_h)
941GEN_VEXT_VX(vwaddu_vx_w)
942GEN_VEXT_VX(vwsubu_vx_b)
943GEN_VEXT_VX(vwsubu_vx_h)
944GEN_VEXT_VX(vwsubu_vx_w)
945GEN_VEXT_VX(vwadd_vx_b)
946GEN_VEXT_VX(vwadd_vx_h)
947GEN_VEXT_VX(vwadd_vx_w)
948GEN_VEXT_VX(vwsub_vx_b)
949GEN_VEXT_VX(vwsub_vx_h)
950GEN_VEXT_VX(vwsub_vx_w)
951GEN_VEXT_VX(vwaddu_wx_b)
952GEN_VEXT_VX(vwaddu_wx_h)
953GEN_VEXT_VX(vwaddu_wx_w)
954GEN_VEXT_VX(vwsubu_wx_b)
955GEN_VEXT_VX(vwsubu_wx_h)
956GEN_VEXT_VX(vwsubu_wx_w)
957GEN_VEXT_VX(vwadd_wx_b)
958GEN_VEXT_VX(vwadd_wx_h)
959GEN_VEXT_VX(vwadd_wx_w)
960GEN_VEXT_VX(vwsub_wx_b)
961GEN_VEXT_VX(vwsub_wx_h)
962GEN_VEXT_VX(vwsub_wx_w)
3a6f8f68
LZ
963
964/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
965#define DO_VADC(N, M, C) (N + M + C)
966#define DO_VSBC(N, M, C) (N - M - C)
967
3479a814 968#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
969void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
970 CPURISCVState *env, uint32_t desc) \
971{ \
3a6f8f68 972 uint32_t vl = env->vl; \
3a6f8f68
LZ
973 uint32_t i; \
974 \
f714361e 975 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
976 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
977 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 978 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
979 \
980 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
981 } \
f714361e 982 env->vstart = 0; \
3a6f8f68
LZ
983}
984
3479a814
FC
985GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
986GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
987GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
988GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 989
3479a814
FC
990GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
991GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
992GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
993GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 994
3479a814 995#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
996void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
997 CPURISCVState *env, uint32_t desc) \
998{ \
3a6f8f68 999 uint32_t vl = env->vl; \
3a6f8f68
LZ
1000 uint32_t i; \
1001 \
f714361e 1002 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1003 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1004 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1005 \
1006 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1007 } \
f714361e 1008 env->vstart = 0; \
3a6f8f68
LZ
1009}
1010
3479a814
FC
1011GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1012GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1013GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1014GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1015
3479a814
FC
1016GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1017GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1018GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1019GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
1020
1021#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1022 (__typeof(N))(N + M) < N)
1023#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1024
1025#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1026void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1027 CPURISCVState *env, uint32_t desc) \
1028{ \
3a6f8f68 1029 uint32_t vl = env->vl; \
bb45485a 1030 uint32_t vm = vext_vm(desc); \
3a6f8f68
LZ
1031 uint32_t i; \
1032 \
f714361e 1033 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1034 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1035 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1036 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1037 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1038 } \
f714361e 1039 env->vstart = 0; \
3a6f8f68
LZ
1040}
1041
1042GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1043GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1044GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1045GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1046
1047GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1048GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1049GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1050GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1051
1052#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1053void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1054 void *vs2, CPURISCVState *env, uint32_t desc) \
1055{ \
3a6f8f68 1056 uint32_t vl = env->vl; \
bb45485a 1057 uint32_t vm = vext_vm(desc); \
3a6f8f68
LZ
1058 uint32_t i; \
1059 \
f714361e 1060 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1061 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1062 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1063 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1064 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1065 } \
f714361e 1066 env->vstart = 0; \
3a6f8f68
LZ
1067}
1068
1069GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1070GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1071GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1072GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1073
1074GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1075GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1076GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1077GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1078
1079/* Vector Bitwise Logical Instructions */
1080RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1081RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1082RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1083RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1084RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1085RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1086RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1087RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1088RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1089RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1090RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1091RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
8a085fb2 1092GEN_VEXT_VV(vand_vv_b)
1093GEN_VEXT_VV(vand_vv_h)
1094GEN_VEXT_VV(vand_vv_w)
1095GEN_VEXT_VV(vand_vv_d)
1096GEN_VEXT_VV(vor_vv_b)
1097GEN_VEXT_VV(vor_vv_h)
1098GEN_VEXT_VV(vor_vv_w)
1099GEN_VEXT_VV(vor_vv_d)
1100GEN_VEXT_VV(vxor_vv_b)
1101GEN_VEXT_VV(vxor_vv_h)
1102GEN_VEXT_VV(vxor_vv_w)
1103GEN_VEXT_VV(vxor_vv_d)
d3842924
LZ
1104
1105RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1106RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1107RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1108RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1109RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1110RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1111RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1112RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1113RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1114RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1115RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1116RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
8a085fb2 1117GEN_VEXT_VX(vand_vx_b)
1118GEN_VEXT_VX(vand_vx_h)
1119GEN_VEXT_VX(vand_vx_w)
1120GEN_VEXT_VX(vand_vx_d)
1121GEN_VEXT_VX(vor_vx_b)
1122GEN_VEXT_VX(vor_vx_h)
1123GEN_VEXT_VX(vor_vx_w)
1124GEN_VEXT_VX(vor_vx_d)
1125GEN_VEXT_VX(vxor_vx_b)
1126GEN_VEXT_VX(vxor_vx_h)
1127GEN_VEXT_VX(vxor_vx_w)
1128GEN_VEXT_VX(vxor_vx_d)
3277d955
LZ
1129
1130/* Vector Single-Width Bit Shift Instructions */
1131#define DO_SLL(N, M) (N << (M))
1132#define DO_SRL(N, M) (N >> (M))
1133
1134/* generate the helpers for shift instructions with two vector operators */
3479a814 1135#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1136void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1137 void *vs2, CPURISCVState *env, uint32_t desc) \
1138{ \
3277d955
LZ
1139 uint32_t vm = vext_vm(desc); \
1140 uint32_t vl = env->vl; \
3277d955
LZ
1141 uint32_t i; \
1142 \
f714361e 1143 for (i = env->vstart; i < vl; i++) { \
f9298de5 1144 if (!vm && !vext_elem_mask(v0, i)) { \
3277d955
LZ
1145 continue; \
1146 } \
1147 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1148 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1149 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1150 } \
f714361e 1151 env->vstart = 0; \
3277d955
LZ
1152}
1153
3479a814
FC
1154GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1155GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1156GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1157GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1158
3479a814
FC
1159GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1160GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1161GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1162GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1163
3479a814
FC
1164GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1165GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1166GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1167GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955
LZ
1168
1169/* generate the helpers for shift instructions with one vector and one scalar */
3479a814
FC
1170#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1171void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1172 void *vs2, CPURISCVState *env, uint32_t desc) \
1173{ \
1174 uint32_t vm = vext_vm(desc); \
1175 uint32_t vl = env->vl; \
1176 uint32_t i; \
1177 \
f714361e 1178 for (i = env->vstart; i < vl; i++) { \
3479a814
FC
1179 if (!vm && !vext_elem_mask(v0, i)) { \
1180 continue; \
1181 } \
1182 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1183 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1184 } \
f714361e 1185 env->vstart = 0; \
3479a814
FC
1186}
1187
1188GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1189GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1190GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1191GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1192
1193GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1194GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1195GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1196GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1197
1198GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1199GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1200GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1201GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1202
1203/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1204GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1205GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1206GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1207GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1208GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1209GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1210GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1211GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1212GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1213GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1214GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1215GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1216
1217/* Vector Integer Comparison Instructions */
1218#define DO_MSEQ(N, M) (N == M)
1219#define DO_MSNE(N, M) (N != M)
1220#define DO_MSLT(N, M) (N < M)
1221#define DO_MSLE(N, M) (N <= M)
1222#define DO_MSGT(N, M) (N > M)
1223
1224#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1225void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1226 CPURISCVState *env, uint32_t desc) \
1227{ \
1366fc79
LZ
1228 uint32_t vm = vext_vm(desc); \
1229 uint32_t vl = env->vl; \
1366fc79
LZ
1230 uint32_t i; \
1231 \
f714361e 1232 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1233 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1234 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1235 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1236 continue; \
1237 } \
f9298de5 1238 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1239 } \
f714361e 1240 env->vstart = 0; \
1366fc79
LZ
1241}
1242
1243GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1244GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1245GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1246GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1247
1248GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1249GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1250GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1251GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1252
1253GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1254GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1255GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1256GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1257
1258GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1259GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1260GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1261GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1262
1263GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1264GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1265GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1266GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1267
1268GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1269GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1270GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1271GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1272
1273#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1274void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1275 CPURISCVState *env, uint32_t desc) \
1276{ \
1366fc79
LZ
1277 uint32_t vm = vext_vm(desc); \
1278 uint32_t vl = env->vl; \
1366fc79
LZ
1279 uint32_t i; \
1280 \
f714361e 1281 for (i = env->vstart; i < vl; i++) { \
1366fc79 1282 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1283 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1284 continue; \
1285 } \
f9298de5 1286 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1287 DO_OP(s2, (ETYPE)(target_long)s1)); \
1288 } \
f714361e 1289 env->vstart = 0; \
1366fc79
LZ
1290}
1291
1292GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1293GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1294GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1295GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1296
1297GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1298GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1299GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1300GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1301
1302GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1303GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1304GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1305GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1306
1307GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1308GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1309GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1310GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1311
1312GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1313GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1314GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1315GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1316
1317GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1318GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1319GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1320GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1321
1322GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1323GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1324GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1325GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1326
1327GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1328GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1329GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1330GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1331
1332/* Vector Integer Min/Max Instructions */
1333RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1334RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1335RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1336RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1337RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1338RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1339RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1340RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1341RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1342RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1343RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1344RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1345RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1346RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1347RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1348RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
8a085fb2 1349GEN_VEXT_VV(vminu_vv_b)
1350GEN_VEXT_VV(vminu_vv_h)
1351GEN_VEXT_VV(vminu_vv_w)
1352GEN_VEXT_VV(vminu_vv_d)
1353GEN_VEXT_VV(vmin_vv_b)
1354GEN_VEXT_VV(vmin_vv_h)
1355GEN_VEXT_VV(vmin_vv_w)
1356GEN_VEXT_VV(vmin_vv_d)
1357GEN_VEXT_VV(vmaxu_vv_b)
1358GEN_VEXT_VV(vmaxu_vv_h)
1359GEN_VEXT_VV(vmaxu_vv_w)
1360GEN_VEXT_VV(vmaxu_vv_d)
1361GEN_VEXT_VV(vmax_vv_b)
1362GEN_VEXT_VV(vmax_vv_h)
1363GEN_VEXT_VV(vmax_vv_w)
1364GEN_VEXT_VV(vmax_vv_d)
558fa779
LZ
1365
1366RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1367RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1368RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1369RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1370RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1371RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1372RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1373RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1374RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1375RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1376RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1377RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1378RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1379RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1380RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1381RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
8a085fb2 1382GEN_VEXT_VX(vminu_vx_b)
1383GEN_VEXT_VX(vminu_vx_h)
1384GEN_VEXT_VX(vminu_vx_w)
1385GEN_VEXT_VX(vminu_vx_d)
1386GEN_VEXT_VX(vmin_vx_b)
1387GEN_VEXT_VX(vmin_vx_h)
1388GEN_VEXT_VX(vmin_vx_w)
1389GEN_VEXT_VX(vmin_vx_d)
1390GEN_VEXT_VX(vmaxu_vx_b)
1391GEN_VEXT_VX(vmaxu_vx_h)
1392GEN_VEXT_VX(vmaxu_vx_w)
1393GEN_VEXT_VX(vmaxu_vx_d)
1394GEN_VEXT_VX(vmax_vx_b)
1395GEN_VEXT_VX(vmax_vx_h)
1396GEN_VEXT_VX(vmax_vx_w)
1397GEN_VEXT_VX(vmax_vx_d)
958b85f3
LZ
1398
1399/* Vector Single-Width Integer Multiply Instructions */
1400#define DO_MUL(N, M) (N * M)
1401RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1402RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1403RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1404RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
8a085fb2 1405GEN_VEXT_VV(vmul_vv_b)
1406GEN_VEXT_VV(vmul_vv_h)
1407GEN_VEXT_VV(vmul_vv_w)
1408GEN_VEXT_VV(vmul_vv_d)
958b85f3
LZ
1409
1410static int8_t do_mulh_b(int8_t s2, int8_t s1)
1411{
1412 return (int16_t)s2 * (int16_t)s1 >> 8;
1413}
1414
1415static int16_t do_mulh_h(int16_t s2, int16_t s1)
1416{
1417 return (int32_t)s2 * (int32_t)s1 >> 16;
1418}
1419
1420static int32_t do_mulh_w(int32_t s2, int32_t s1)
1421{
1422 return (int64_t)s2 * (int64_t)s1 >> 32;
1423}
1424
1425static int64_t do_mulh_d(int64_t s2, int64_t s1)
1426{
1427 uint64_t hi_64, lo_64;
1428
1429 muls64(&lo_64, &hi_64, s1, s2);
1430 return hi_64;
1431}
1432
1433static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1434{
1435 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1436}
1437
1438static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1439{
1440 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1441}
1442
1443static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1444{
1445 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1446}
1447
1448static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1449{
1450 uint64_t hi_64, lo_64;
1451
1452 mulu64(&lo_64, &hi_64, s2, s1);
1453 return hi_64;
1454}
1455
1456static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1457{
1458 return (int16_t)s2 * (uint16_t)s1 >> 8;
1459}
1460
1461static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1462{
1463 return (int32_t)s2 * (uint32_t)s1 >> 16;
1464}
1465
1466static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1467{
1468 return (int64_t)s2 * (uint64_t)s1 >> 32;
1469}
1470
1471/*
1472 * Let A = signed operand,
1473 * B = unsigned operand
1474 * P = mulu64(A, B), unsigned product
1475 *
1476 * LET X = 2 ** 64 - A, 2's complement of A
1477 * SP = signed product
1478 * THEN
1479 * IF A < 0
1480 * SP = -X * B
1481 * = -(2 ** 64 - A) * B
1482 * = A * B - 2 ** 64 * B
1483 * = P - 2 ** 64 * B
1484 * ELSE
1485 * SP = P
1486 * THEN
1487 * HI_P -= (A < 0 ? B : 0)
1488 */
1489
1490static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1491{
1492 uint64_t hi_64, lo_64;
1493
1494 mulu64(&lo_64, &hi_64, s2, s1);
1495
1496 hi_64 -= s2 < 0 ? s1 : 0;
1497 return hi_64;
1498}
1499
1500RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1501RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1502RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1503RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1504RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1505RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1506RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1507RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1508RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1509RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1510RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1511RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
8a085fb2 1512GEN_VEXT_VV(vmulh_vv_b)
1513GEN_VEXT_VV(vmulh_vv_h)
1514GEN_VEXT_VV(vmulh_vv_w)
1515GEN_VEXT_VV(vmulh_vv_d)
1516GEN_VEXT_VV(vmulhu_vv_b)
1517GEN_VEXT_VV(vmulhu_vv_h)
1518GEN_VEXT_VV(vmulhu_vv_w)
1519GEN_VEXT_VV(vmulhu_vv_d)
1520GEN_VEXT_VV(vmulhsu_vv_b)
1521GEN_VEXT_VV(vmulhsu_vv_h)
1522GEN_VEXT_VV(vmulhsu_vv_w)
1523GEN_VEXT_VV(vmulhsu_vv_d)
958b85f3
LZ
1524
1525RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1526RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1527RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1528RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1529RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1530RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1531RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1532RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1533RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1534RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1535RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1536RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1537RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1538RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1539RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1540RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
8a085fb2 1541GEN_VEXT_VX(vmul_vx_b)
1542GEN_VEXT_VX(vmul_vx_h)
1543GEN_VEXT_VX(vmul_vx_w)
1544GEN_VEXT_VX(vmul_vx_d)
1545GEN_VEXT_VX(vmulh_vx_b)
1546GEN_VEXT_VX(vmulh_vx_h)
1547GEN_VEXT_VX(vmulh_vx_w)
1548GEN_VEXT_VX(vmulh_vx_d)
1549GEN_VEXT_VX(vmulhu_vx_b)
1550GEN_VEXT_VX(vmulhu_vx_h)
1551GEN_VEXT_VX(vmulhu_vx_w)
1552GEN_VEXT_VX(vmulhu_vx_d)
1553GEN_VEXT_VX(vmulhsu_vx_b)
1554GEN_VEXT_VX(vmulhsu_vx_h)
1555GEN_VEXT_VX(vmulhsu_vx_w)
1556GEN_VEXT_VX(vmulhsu_vx_d)
85e6658c
LZ
1557
1558/* Vector Integer Divide Instructions */
1559#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1560#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1561#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1562 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1563#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1564 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1565
1566RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1567RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1568RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1569RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1570RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1571RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1572RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1573RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1574RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1575RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1576RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1577RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1578RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1579RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1580RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1581RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
8a085fb2 1582GEN_VEXT_VV(vdivu_vv_b)
1583GEN_VEXT_VV(vdivu_vv_h)
1584GEN_VEXT_VV(vdivu_vv_w)
1585GEN_VEXT_VV(vdivu_vv_d)
1586GEN_VEXT_VV(vdiv_vv_b)
1587GEN_VEXT_VV(vdiv_vv_h)
1588GEN_VEXT_VV(vdiv_vv_w)
1589GEN_VEXT_VV(vdiv_vv_d)
1590GEN_VEXT_VV(vremu_vv_b)
1591GEN_VEXT_VV(vremu_vv_h)
1592GEN_VEXT_VV(vremu_vv_w)
1593GEN_VEXT_VV(vremu_vv_d)
1594GEN_VEXT_VV(vrem_vv_b)
1595GEN_VEXT_VV(vrem_vv_h)
1596GEN_VEXT_VV(vrem_vv_w)
1597GEN_VEXT_VV(vrem_vv_d)
85e6658c
LZ
1598
1599RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1600RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1601RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1602RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1603RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1604RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1605RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1606RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1607RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1608RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1609RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1610RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1611RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1612RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1613RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1614RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
8a085fb2 1615GEN_VEXT_VX(vdivu_vx_b)
1616GEN_VEXT_VX(vdivu_vx_h)
1617GEN_VEXT_VX(vdivu_vx_w)
1618GEN_VEXT_VX(vdivu_vx_d)
1619GEN_VEXT_VX(vdiv_vx_b)
1620GEN_VEXT_VX(vdiv_vx_h)
1621GEN_VEXT_VX(vdiv_vx_w)
1622GEN_VEXT_VX(vdiv_vx_d)
1623GEN_VEXT_VX(vremu_vx_b)
1624GEN_VEXT_VX(vremu_vx_h)
1625GEN_VEXT_VX(vremu_vx_w)
1626GEN_VEXT_VX(vremu_vx_d)
1627GEN_VEXT_VX(vrem_vx_b)
1628GEN_VEXT_VX(vrem_vx_h)
1629GEN_VEXT_VX(vrem_vx_w)
1630GEN_VEXT_VX(vrem_vx_d)
97b1cba3
LZ
1631
1632/* Vector Widening Integer Multiply Instructions */
1633RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1634RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1635RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1636RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1637RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1638RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1639RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1640RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1641RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
8a085fb2 1642GEN_VEXT_VV(vwmul_vv_b)
1643GEN_VEXT_VV(vwmul_vv_h)
1644GEN_VEXT_VV(vwmul_vv_w)
1645GEN_VEXT_VV(vwmulu_vv_b)
1646GEN_VEXT_VV(vwmulu_vv_h)
1647GEN_VEXT_VV(vwmulu_vv_w)
1648GEN_VEXT_VV(vwmulsu_vv_b)
1649GEN_VEXT_VV(vwmulsu_vv_h)
1650GEN_VEXT_VV(vwmulsu_vv_w)
97b1cba3
LZ
1651
1652RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1653RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1654RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1655RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1656RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1657RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1658RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1659RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1660RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
8a085fb2 1661GEN_VEXT_VX(vwmul_vx_b)
1662GEN_VEXT_VX(vwmul_vx_h)
1663GEN_VEXT_VX(vwmul_vx_w)
1664GEN_VEXT_VX(vwmulu_vx_b)
1665GEN_VEXT_VX(vwmulu_vx_h)
1666GEN_VEXT_VX(vwmulu_vx_w)
1667GEN_VEXT_VX(vwmulsu_vx_b)
1668GEN_VEXT_VX(vwmulsu_vx_h)
1669GEN_VEXT_VX(vwmulsu_vx_w)
54df813a
LZ
1670
1671/* Vector Single-Width Integer Multiply-Add Instructions */
1672#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1673static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1674{ \
1675 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1676 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1677 TD d = *((TD *)vd + HD(i)); \
1678 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1679}
1680
1681#define DO_MACC(N, M, D) (M * N + D)
1682#define DO_NMSAC(N, M, D) (-(M * N) + D)
1683#define DO_MADD(N, M, D) (M * D + N)
1684#define DO_NMSUB(N, M, D) (-(M * D) + N)
1685RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1686RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1687RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1688RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1689RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1690RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1691RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1692RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1693RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1694RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1695RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1696RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1697RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1698RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1699RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1700RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
8a085fb2 1701GEN_VEXT_VV(vmacc_vv_b)
1702GEN_VEXT_VV(vmacc_vv_h)
1703GEN_VEXT_VV(vmacc_vv_w)
1704GEN_VEXT_VV(vmacc_vv_d)
1705GEN_VEXT_VV(vnmsac_vv_b)
1706GEN_VEXT_VV(vnmsac_vv_h)
1707GEN_VEXT_VV(vnmsac_vv_w)
1708GEN_VEXT_VV(vnmsac_vv_d)
1709GEN_VEXT_VV(vmadd_vv_b)
1710GEN_VEXT_VV(vmadd_vv_h)
1711GEN_VEXT_VV(vmadd_vv_w)
1712GEN_VEXT_VV(vmadd_vv_d)
1713GEN_VEXT_VV(vnmsub_vv_b)
1714GEN_VEXT_VV(vnmsub_vv_h)
1715GEN_VEXT_VV(vnmsub_vv_w)
1716GEN_VEXT_VV(vnmsub_vv_d)
54df813a
LZ
1717
1718#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1719static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1720{ \
1721 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1722 TD d = *((TD *)vd + HD(i)); \
1723 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1724}
1725
1726RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1727RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1728RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1729RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1730RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1731RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1732RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1733RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1734RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1735RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1736RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1737RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1738RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1739RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1740RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1741RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
8a085fb2 1742GEN_VEXT_VX(vmacc_vx_b)
1743GEN_VEXT_VX(vmacc_vx_h)
1744GEN_VEXT_VX(vmacc_vx_w)
1745GEN_VEXT_VX(vmacc_vx_d)
1746GEN_VEXT_VX(vnmsac_vx_b)
1747GEN_VEXT_VX(vnmsac_vx_h)
1748GEN_VEXT_VX(vnmsac_vx_w)
1749GEN_VEXT_VX(vnmsac_vx_d)
1750GEN_VEXT_VX(vmadd_vx_b)
1751GEN_VEXT_VX(vmadd_vx_h)
1752GEN_VEXT_VX(vmadd_vx_w)
1753GEN_VEXT_VX(vmadd_vx_d)
1754GEN_VEXT_VX(vnmsub_vx_b)
1755GEN_VEXT_VX(vnmsub_vx_h)
1756GEN_VEXT_VX(vnmsub_vx_w)
1757GEN_VEXT_VX(vnmsub_vx_d)
2b587b33
LZ
1758
1759/* Vector Widening Integer Multiply-Add Instructions */
1760RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1761RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1762RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1763RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1764RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1765RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1766RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1767RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1768RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
8a085fb2 1769GEN_VEXT_VV(vwmaccu_vv_b)
1770GEN_VEXT_VV(vwmaccu_vv_h)
1771GEN_VEXT_VV(vwmaccu_vv_w)
1772GEN_VEXT_VV(vwmacc_vv_b)
1773GEN_VEXT_VV(vwmacc_vv_h)
1774GEN_VEXT_VV(vwmacc_vv_w)
1775GEN_VEXT_VV(vwmaccsu_vv_b)
1776GEN_VEXT_VV(vwmaccsu_vv_h)
1777GEN_VEXT_VV(vwmaccsu_vv_w)
2b587b33
LZ
1778
1779RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1780RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1781RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1782RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1783RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1784RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1785RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1786RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1787RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1788RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1789RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1790RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
8a085fb2 1791GEN_VEXT_VX(vwmaccu_vx_b)
1792GEN_VEXT_VX(vwmaccu_vx_h)
1793GEN_VEXT_VX(vwmaccu_vx_w)
1794GEN_VEXT_VX(vwmacc_vx_b)
1795GEN_VEXT_VX(vwmacc_vx_h)
1796GEN_VEXT_VX(vwmacc_vx_w)
1797GEN_VEXT_VX(vwmaccsu_vx_b)
1798GEN_VEXT_VX(vwmaccsu_vx_h)
1799GEN_VEXT_VX(vwmaccsu_vx_w)
1800GEN_VEXT_VX(vwmaccus_vx_b)
1801GEN_VEXT_VX(vwmaccus_vx_h)
1802GEN_VEXT_VX(vwmaccus_vx_w)
f020a7a1
LZ
1803
1804/* Vector Integer Merge and Move Instructions */
3479a814 1805#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1806void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1807 uint32_t desc) \
1808{ \
1809 uint32_t vl = env->vl; \
f020a7a1
LZ
1810 uint32_t i; \
1811 \
f714361e 1812 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1813 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1814 *((ETYPE *)vd + H(i)) = s1; \
1815 } \
f714361e 1816 env->vstart = 0; \
f020a7a1
LZ
1817}
1818
3479a814
FC
1819GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1820GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1821GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1822GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1823
3479a814 1824#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1825void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1826 uint32_t desc) \
1827{ \
1828 uint32_t vl = env->vl; \
f020a7a1
LZ
1829 uint32_t i; \
1830 \
f714361e 1831 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1832 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1833 } \
f714361e 1834 env->vstart = 0; \
f020a7a1
LZ
1835}
1836
3479a814
FC
1837GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1838GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1839GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1840GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 1841
3479a814 1842#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1843void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1844 CPURISCVState *env, uint32_t desc) \
1845{ \
f020a7a1 1846 uint32_t vl = env->vl; \
f020a7a1
LZ
1847 uint32_t i; \
1848 \
f714361e 1849 for (i = env->vstart; i < vl; i++) { \
f9298de5 1850 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
1851 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1852 } \
f714361e 1853 env->vstart = 0; \
f020a7a1
LZ
1854}
1855
3479a814
FC
1856GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1857GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1858GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1859GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 1860
3479a814 1861#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1862void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1863 void *vs2, CPURISCVState *env, uint32_t desc) \
1864{ \
f020a7a1 1865 uint32_t vl = env->vl; \
f020a7a1
LZ
1866 uint32_t i; \
1867 \
f714361e 1868 for (i = env->vstart; i < vl; i++) { \
f020a7a1 1869 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1870 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
1871 (ETYPE)(target_long)s1); \
1872 *((ETYPE *)vd + H(i)) = d; \
1873 } \
f714361e 1874 env->vstart = 0; \
f020a7a1
LZ
1875}
1876
3479a814
FC
1877GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1878GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1879GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1880GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
1881
1882/*
1883 *** Vector Fixed-Point Arithmetic Instructions
1884 */
1885
1886/* Vector Single-Width Saturating Add and Subtract */
1887
1888/*
1889 * As fixed point instructions probably have round mode and saturation,
1890 * define common macros for fixed point here.
1891 */
1892typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1893 CPURISCVState *env, int vxrm);
1894
1895#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1896static inline void \
1897do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1898 CPURISCVState *env, int vxrm) \
1899{ \
1900 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1901 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1902 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1903}
1904
1905static inline void
1906vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1907 CPURISCVState *env,
f9298de5 1908 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
1909 opivv2_rm_fn *fn)
1910{
f714361e 1911 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 1912 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
1913 continue;
1914 }
1915 fn(vd, vs1, vs2, i, env, vxrm);
1916 }
f714361e 1917 env->vstart = 0;
eb2650e3
LZ
1918}
1919
1920static inline void
1921vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1922 CPURISCVState *env,
8a085fb2 1923 uint32_t desc,
3479a814 1924 opivv2_rm_fn *fn)
eb2650e3 1925{
eb2650e3
LZ
1926 uint32_t vm = vext_vm(desc);
1927 uint32_t vl = env->vl;
1928
1929 switch (env->vxrm) {
1930 case 0: /* rnu */
1931 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1932 env, vl, vm, 0, fn);
eb2650e3
LZ
1933 break;
1934 case 1: /* rne */
1935 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1936 env, vl, vm, 1, fn);
eb2650e3
LZ
1937 break;
1938 case 2: /* rdn */
1939 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1940 env, vl, vm, 2, fn);
eb2650e3
LZ
1941 break;
1942 default: /* rod */
1943 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1944 env, vl, vm, 3, fn);
eb2650e3
LZ
1945 break;
1946 }
eb2650e3
LZ
1947}
1948
1949/* generate helpers for fixed point instructions with OPIVV format */
8a085fb2 1950#define GEN_VEXT_VV_RM(NAME) \
eb2650e3
LZ
1951void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1952 CPURISCVState *env, uint32_t desc) \
1953{ \
8a085fb2 1954 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
3479a814 1955 do_##NAME); \
eb2650e3
LZ
1956}
1957
1958static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1959{
1960 uint8_t res = a + b;
1961 if (res < a) {
1962 res = UINT8_MAX;
1963 env->vxsat = 0x1;
1964 }
1965 return res;
1966}
1967
1968static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1969 uint16_t b)
1970{
1971 uint16_t res = a + b;
1972 if (res < a) {
1973 res = UINT16_MAX;
1974 env->vxsat = 0x1;
1975 }
1976 return res;
1977}
1978
1979static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1980 uint32_t b)
1981{
1982 uint32_t res = a + b;
1983 if (res < a) {
1984 res = UINT32_MAX;
1985 env->vxsat = 0x1;
1986 }
1987 return res;
1988}
1989
1990static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1991 uint64_t b)
1992{
1993 uint64_t res = a + b;
1994 if (res < a) {
1995 res = UINT64_MAX;
1996 env->vxsat = 0x1;
1997 }
1998 return res;
1999}
2000
2001RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2002RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2003RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2004RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
8a085fb2 2005GEN_VEXT_VV_RM(vsaddu_vv_b)
2006GEN_VEXT_VV_RM(vsaddu_vv_h)
2007GEN_VEXT_VV_RM(vsaddu_vv_w)
2008GEN_VEXT_VV_RM(vsaddu_vv_d)
eb2650e3
LZ
2009
2010typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2011 CPURISCVState *env, int vxrm);
2012
2013#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2014static inline void \
2015do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2016 CPURISCVState *env, int vxrm) \
2017{ \
2018 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2019 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2020}
2021
2022static inline void
2023vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2024 CPURISCVState *env,
f9298de5 2025 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
2026 opivx2_rm_fn *fn)
2027{
f714361e 2028 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2029 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
2030 continue;
2031 }
2032 fn(vd, s1, vs2, i, env, vxrm);
2033 }
f714361e 2034 env->vstart = 0;
eb2650e3
LZ
2035}
2036
2037static inline void
2038vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2039 CPURISCVState *env,
8a085fb2 2040 uint32_t desc,
3479a814 2041 opivx2_rm_fn *fn)
eb2650e3 2042{
eb2650e3
LZ
2043 uint32_t vm = vext_vm(desc);
2044 uint32_t vl = env->vl;
2045
2046 switch (env->vxrm) {
2047 case 0: /* rnu */
2048 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2049 env, vl, vm, 0, fn);
eb2650e3
LZ
2050 break;
2051 case 1: /* rne */
2052 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2053 env, vl, vm, 1, fn);
eb2650e3
LZ
2054 break;
2055 case 2: /* rdn */
2056 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2057 env, vl, vm, 2, fn);
eb2650e3
LZ
2058 break;
2059 default: /* rod */
2060 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2061 env, vl, vm, 3, fn);
eb2650e3
LZ
2062 break;
2063 }
eb2650e3
LZ
2064}
2065
2066/* generate helpers for fixed point instructions with OPIVX format */
8a085fb2 2067#define GEN_VEXT_VX_RM(NAME) \
eb2650e3
LZ
2068void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2069 void *vs2, CPURISCVState *env, uint32_t desc) \
2070{ \
8a085fb2 2071 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
3479a814 2072 do_##NAME); \
eb2650e3
LZ
2073}
2074
2075RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2076RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2077RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2078RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
8a085fb2 2079GEN_VEXT_VX_RM(vsaddu_vx_b)
2080GEN_VEXT_VX_RM(vsaddu_vx_h)
2081GEN_VEXT_VX_RM(vsaddu_vx_w)
2082GEN_VEXT_VX_RM(vsaddu_vx_d)
eb2650e3
LZ
2083
2084static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2085{
2086 int8_t res = a + b;
2087 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2088 res = a > 0 ? INT8_MAX : INT8_MIN;
2089 env->vxsat = 0x1;
2090 }
2091 return res;
2092}
2093
2094static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2095{
2096 int16_t res = a + b;
2097 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2098 res = a > 0 ? INT16_MAX : INT16_MIN;
2099 env->vxsat = 0x1;
2100 }
2101 return res;
2102}
2103
2104static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2105{
2106 int32_t res = a + b;
2107 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2108 res = a > 0 ? INT32_MAX : INT32_MIN;
2109 env->vxsat = 0x1;
2110 }
2111 return res;
2112}
2113
2114static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2115{
2116 int64_t res = a + b;
2117 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2118 res = a > 0 ? INT64_MAX : INT64_MIN;
2119 env->vxsat = 0x1;
2120 }
2121 return res;
2122}
2123
2124RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2125RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2126RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2127RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
8a085fb2 2128GEN_VEXT_VV_RM(vsadd_vv_b)
2129GEN_VEXT_VV_RM(vsadd_vv_h)
2130GEN_VEXT_VV_RM(vsadd_vv_w)
2131GEN_VEXT_VV_RM(vsadd_vv_d)
eb2650e3
LZ
2132
2133RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2134RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2135RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2136RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
8a085fb2 2137GEN_VEXT_VX_RM(vsadd_vx_b)
2138GEN_VEXT_VX_RM(vsadd_vx_h)
2139GEN_VEXT_VX_RM(vsadd_vx_w)
2140GEN_VEXT_VX_RM(vsadd_vx_d)
eb2650e3
LZ
2141
2142static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2143{
2144 uint8_t res = a - b;
2145 if (res > a) {
2146 res = 0;
2147 env->vxsat = 0x1;
2148 }
2149 return res;
2150}
2151
2152static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2153 uint16_t b)
2154{
2155 uint16_t res = a - b;
2156 if (res > a) {
2157 res = 0;
2158 env->vxsat = 0x1;
2159 }
2160 return res;
2161}
2162
2163static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2164 uint32_t b)
2165{
2166 uint32_t res = a - b;
2167 if (res > a) {
2168 res = 0;
2169 env->vxsat = 0x1;
2170 }
2171 return res;
2172}
2173
2174static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2175 uint64_t b)
2176{
2177 uint64_t res = a - b;
2178 if (res > a) {
2179 res = 0;
2180 env->vxsat = 0x1;
2181 }
2182 return res;
2183}
2184
2185RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2186RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2187RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2188RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
8a085fb2 2189GEN_VEXT_VV_RM(vssubu_vv_b)
2190GEN_VEXT_VV_RM(vssubu_vv_h)
2191GEN_VEXT_VV_RM(vssubu_vv_w)
2192GEN_VEXT_VV_RM(vssubu_vv_d)
eb2650e3
LZ
2193
2194RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2195RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2196RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2197RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
8a085fb2 2198GEN_VEXT_VX_RM(vssubu_vx_b)
2199GEN_VEXT_VX_RM(vssubu_vx_h)
2200GEN_VEXT_VX_RM(vssubu_vx_w)
2201GEN_VEXT_VX_RM(vssubu_vx_d)
eb2650e3
LZ
2202
2203static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2204{
2205 int8_t res = a - b;
2206 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2207 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2208 env->vxsat = 0x1;
2209 }
2210 return res;
2211}
2212
2213static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2214{
2215 int16_t res = a - b;
2216 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2217 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2218 env->vxsat = 0x1;
2219 }
2220 return res;
2221}
2222
2223static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2224{
2225 int32_t res = a - b;
2226 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2227 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2228 env->vxsat = 0x1;
2229 }
2230 return res;
2231}
2232
2233static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2234{
2235 int64_t res = a - b;
2236 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2237 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2238 env->vxsat = 0x1;
2239 }
2240 return res;
2241}
2242
2243RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2244RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2245RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2246RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
8a085fb2 2247GEN_VEXT_VV_RM(vssub_vv_b)
2248GEN_VEXT_VV_RM(vssub_vv_h)
2249GEN_VEXT_VV_RM(vssub_vv_w)
2250GEN_VEXT_VV_RM(vssub_vv_d)
eb2650e3
LZ
2251
2252RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2253RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2254RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2255RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
8a085fb2 2256GEN_VEXT_VX_RM(vssub_vx_b)
2257GEN_VEXT_VX_RM(vssub_vx_h)
2258GEN_VEXT_VX_RM(vssub_vx_w)
2259GEN_VEXT_VX_RM(vssub_vx_d)
b7aee481
LZ
2260
2261/* Vector Single-Width Averaging Add and Subtract */
2262static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2263{
2264 uint8_t d = extract64(v, shift, 1);
2265 uint8_t d1;
2266 uint64_t D1, D2;
2267
2268 if (shift == 0 || shift > 64) {
2269 return 0;
2270 }
2271
2272 d1 = extract64(v, shift - 1, 1);
2273 D1 = extract64(v, 0, shift);
2274 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2275 return d1;
2276 } else if (vxrm == 1) { /* round-to-nearest-even */
2277 if (shift > 1) {
2278 D2 = extract64(v, 0, shift - 1);
2279 return d1 & ((D2 != 0) | d);
2280 } else {
2281 return d1 & d;
2282 }
2283 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2284 return !d & (D1 != 0);
2285 }
2286 return 0; /* round-down (truncate) */
2287}
2288
2289static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2290{
2291 int64_t res = (int64_t)a + b;
2292 uint8_t round = get_round(vxrm, res, 1);
2293
2294 return (res >> 1) + round;
2295}
2296
2297static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2298{
2299 int64_t res = a + b;
2300 uint8_t round = get_round(vxrm, res, 1);
2301 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2302
2303 /* With signed overflow, bit 64 is inverse of bit 63. */
2304 return ((res >> 1) ^ over) + round;
2305}
2306
2307RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2308RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2309RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2310RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
8a085fb2 2311GEN_VEXT_VV_RM(vaadd_vv_b)
2312GEN_VEXT_VV_RM(vaadd_vv_h)
2313GEN_VEXT_VV_RM(vaadd_vv_w)
2314GEN_VEXT_VV_RM(vaadd_vv_d)
b7aee481
LZ
2315
2316RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2317RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2318RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2319RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
8a085fb2 2320GEN_VEXT_VX_RM(vaadd_vx_b)
2321GEN_VEXT_VX_RM(vaadd_vx_h)
2322GEN_VEXT_VX_RM(vaadd_vx_w)
2323GEN_VEXT_VX_RM(vaadd_vx_d)
b7aee481 2324
8b99a110
FC
2325static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2326 uint32_t a, uint32_t b)
2327{
2328 uint64_t res = (uint64_t)a + b;
2329 uint8_t round = get_round(vxrm, res, 1);
2330
2331 return (res >> 1) + round;
2332}
2333
2334static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2335 uint64_t a, uint64_t b)
2336{
2337 uint64_t res = a + b;
2338 uint8_t round = get_round(vxrm, res, 1);
2339 uint64_t over = (uint64_t)(res < a) << 63;
2340
2341 return ((res >> 1) | over) + round;
2342}
2343
2344RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2345RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2346RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2347RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
8a085fb2 2348GEN_VEXT_VV_RM(vaaddu_vv_b)
2349GEN_VEXT_VV_RM(vaaddu_vv_h)
2350GEN_VEXT_VV_RM(vaaddu_vv_w)
2351GEN_VEXT_VV_RM(vaaddu_vv_d)
8b99a110
FC
2352
2353RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2354RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2355RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2356RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
8a085fb2 2357GEN_VEXT_VX_RM(vaaddu_vx_b)
2358GEN_VEXT_VX_RM(vaaddu_vx_h)
2359GEN_VEXT_VX_RM(vaaddu_vx_w)
2360GEN_VEXT_VX_RM(vaaddu_vx_d)
8b99a110 2361
b7aee481
LZ
2362static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2363{
2364 int64_t res = (int64_t)a - b;
2365 uint8_t round = get_round(vxrm, res, 1);
2366
2367 return (res >> 1) + round;
2368}
2369
2370static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2371{
2372 int64_t res = (int64_t)a - b;
2373 uint8_t round = get_round(vxrm, res, 1);
2374 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2375
2376 /* With signed overflow, bit 64 is inverse of bit 63. */
2377 return ((res >> 1) ^ over) + round;
2378}
2379
2380RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2381RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2382RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2383RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
8a085fb2 2384GEN_VEXT_VV_RM(vasub_vv_b)
2385GEN_VEXT_VV_RM(vasub_vv_h)
2386GEN_VEXT_VV_RM(vasub_vv_w)
2387GEN_VEXT_VV_RM(vasub_vv_d)
b7aee481
LZ
2388
2389RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2390RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2391RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2392RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
8a085fb2 2393GEN_VEXT_VX_RM(vasub_vx_b)
2394GEN_VEXT_VX_RM(vasub_vx_h)
2395GEN_VEXT_VX_RM(vasub_vx_w)
2396GEN_VEXT_VX_RM(vasub_vx_d)
9f0ff9e5 2397
8b99a110
FC
2398static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2399 uint32_t a, uint32_t b)
2400{
2401 int64_t res = (int64_t)a - b;
2402 uint8_t round = get_round(vxrm, res, 1);
2403
2404 return (res >> 1) + round;
2405}
2406
2407static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2408 uint64_t a, uint64_t b)
2409{
2410 uint64_t res = (uint64_t)a - b;
2411 uint8_t round = get_round(vxrm, res, 1);
2412 uint64_t over = (uint64_t)(res > a) << 63;
2413
2414 return ((res >> 1) | over) + round;
2415}
2416
2417RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2418RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2419RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2420RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
8a085fb2 2421GEN_VEXT_VV_RM(vasubu_vv_b)
2422GEN_VEXT_VV_RM(vasubu_vv_h)
2423GEN_VEXT_VV_RM(vasubu_vv_w)
2424GEN_VEXT_VV_RM(vasubu_vv_d)
8b99a110
FC
2425
2426RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2427RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2428RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2429RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
8a085fb2 2430GEN_VEXT_VX_RM(vasubu_vx_b)
2431GEN_VEXT_VX_RM(vasubu_vx_h)
2432GEN_VEXT_VX_RM(vasubu_vx_w)
2433GEN_VEXT_VX_RM(vasubu_vx_d)
8b99a110 2434
9f0ff9e5
LZ
2435/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2436static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2437{
2438 uint8_t round;
2439 int16_t res;
2440
2441 res = (int16_t)a * (int16_t)b;
2442 round = get_round(vxrm, res, 7);
2443 res = (res >> 7) + round;
2444
2445 if (res > INT8_MAX) {
2446 env->vxsat = 0x1;
2447 return INT8_MAX;
2448 } else if (res < INT8_MIN) {
2449 env->vxsat = 0x1;
2450 return INT8_MIN;
2451 } else {
2452 return res;
2453 }
2454}
2455
2456static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2457{
2458 uint8_t round;
2459 int32_t res;
2460
2461 res = (int32_t)a * (int32_t)b;
2462 round = get_round(vxrm, res, 15);
2463 res = (res >> 15) + round;
2464
2465 if (res > INT16_MAX) {
2466 env->vxsat = 0x1;
2467 return INT16_MAX;
2468 } else if (res < INT16_MIN) {
2469 env->vxsat = 0x1;
2470 return INT16_MIN;
2471 } else {
2472 return res;
2473 }
2474}
2475
2476static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2477{
2478 uint8_t round;
2479 int64_t res;
2480
2481 res = (int64_t)a * (int64_t)b;
2482 round = get_round(vxrm, res, 31);
2483 res = (res >> 31) + round;
2484
2485 if (res > INT32_MAX) {
2486 env->vxsat = 0x1;
2487 return INT32_MAX;
2488 } else if (res < INT32_MIN) {
2489 env->vxsat = 0x1;
2490 return INT32_MIN;
2491 } else {
2492 return res;
2493 }
2494}
2495
2496static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2497{
2498 uint8_t round;
2499 uint64_t hi_64, lo_64;
2500 int64_t res;
2501
2502 if (a == INT64_MIN && b == INT64_MIN) {
2503 env->vxsat = 1;
2504 return INT64_MAX;
2505 }
2506
2507 muls64(&lo_64, &hi_64, a, b);
2508 round = get_round(vxrm, lo_64, 63);
2509 /*
2510 * Cannot overflow, as there are always
2511 * 2 sign bits after multiply.
2512 */
2513 res = (hi_64 << 1) | (lo_64 >> 63);
2514 if (round) {
2515 if (res == INT64_MAX) {
2516 env->vxsat = 1;
2517 } else {
2518 res += 1;
2519 }
2520 }
2521 return res;
2522}
2523
2524RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2525RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2526RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2527RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
8a085fb2 2528GEN_VEXT_VV_RM(vsmul_vv_b)
2529GEN_VEXT_VV_RM(vsmul_vv_h)
2530GEN_VEXT_VV_RM(vsmul_vv_w)
2531GEN_VEXT_VV_RM(vsmul_vv_d)
9f0ff9e5
LZ
2532
2533RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2534RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2535RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2536RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
8a085fb2 2537GEN_VEXT_VX_RM(vsmul_vx_b)
2538GEN_VEXT_VX_RM(vsmul_vx_h)
2539GEN_VEXT_VX_RM(vsmul_vx_w)
2540GEN_VEXT_VX_RM(vsmul_vx_d)
0a1eaf00 2541
04a61406
LZ
2542/* Vector Single-Width Scaling Shift Instructions */
2543static inline uint8_t
2544vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2545{
2546 uint8_t round, shift = b & 0x7;
2547 uint8_t res;
2548
2549 round = get_round(vxrm, a, shift);
2550 res = (a >> shift) + round;
2551 return res;
2552}
2553static inline uint16_t
2554vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2555{
2556 uint8_t round, shift = b & 0xf;
2557 uint16_t res;
2558
2559 round = get_round(vxrm, a, shift);
2560 res = (a >> shift) + round;
2561 return res;
2562}
2563static inline uint32_t
2564vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2565{
2566 uint8_t round, shift = b & 0x1f;
2567 uint32_t res;
2568
2569 round = get_round(vxrm, a, shift);
2570 res = (a >> shift) + round;
2571 return res;
2572}
2573static inline uint64_t
2574vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2575{
2576 uint8_t round, shift = b & 0x3f;
2577 uint64_t res;
2578
2579 round = get_round(vxrm, a, shift);
2580 res = (a >> shift) + round;
2581 return res;
2582}
2583RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2584RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2585RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2586RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
8a085fb2 2587GEN_VEXT_VV_RM(vssrl_vv_b)
2588GEN_VEXT_VV_RM(vssrl_vv_h)
2589GEN_VEXT_VV_RM(vssrl_vv_w)
2590GEN_VEXT_VV_RM(vssrl_vv_d)
04a61406
LZ
2591
2592RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2593RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2594RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2595RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
8a085fb2 2596GEN_VEXT_VX_RM(vssrl_vx_b)
2597GEN_VEXT_VX_RM(vssrl_vx_h)
2598GEN_VEXT_VX_RM(vssrl_vx_w)
2599GEN_VEXT_VX_RM(vssrl_vx_d)
04a61406
LZ
2600
2601static inline int8_t
2602vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2603{
2604 uint8_t round, shift = b & 0x7;
2605 int8_t res;
2606
2607 round = get_round(vxrm, a, shift);
2608 res = (a >> shift) + round;
2609 return res;
2610}
2611static inline int16_t
2612vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2613{
2614 uint8_t round, shift = b & 0xf;
2615 int16_t res;
2616
2617 round = get_round(vxrm, a, shift);
2618 res = (a >> shift) + round;
2619 return res;
2620}
2621static inline int32_t
2622vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2623{
2624 uint8_t round, shift = b & 0x1f;
2625 int32_t res;
2626
2627 round = get_round(vxrm, a, shift);
2628 res = (a >> shift) + round;
2629 return res;
2630}
2631static inline int64_t
2632vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2633{
2634 uint8_t round, shift = b & 0x3f;
2635 int64_t res;
2636
2637 round = get_round(vxrm, a, shift);
2638 res = (a >> shift) + round;
2639 return res;
2640}
9ff3d287 2641
04a61406
LZ
2642RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2643RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2644RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2645RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
8a085fb2 2646GEN_VEXT_VV_RM(vssra_vv_b)
2647GEN_VEXT_VV_RM(vssra_vv_h)
2648GEN_VEXT_VV_RM(vssra_vv_w)
2649GEN_VEXT_VV_RM(vssra_vv_d)
04a61406
LZ
2650
2651RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2652RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2653RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2654RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
8a085fb2 2655GEN_VEXT_VX_RM(vssra_vx_b)
2656GEN_VEXT_VX_RM(vssra_vx_h)
2657GEN_VEXT_VX_RM(vssra_vx_w)
2658GEN_VEXT_VX_RM(vssra_vx_d)
9ff3d287
LZ
2659
2660/* Vector Narrowing Fixed-Point Clip Instructions */
2661static inline int8_t
2662vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2663{
2664 uint8_t round, shift = b & 0xf;
2665 int16_t res;
2666
2667 round = get_round(vxrm, a, shift);
2668 res = (a >> shift) + round;
2669 if (res > INT8_MAX) {
2670 env->vxsat = 0x1;
2671 return INT8_MAX;
2672 } else if (res < INT8_MIN) {
2673 env->vxsat = 0x1;
2674 return INT8_MIN;
2675 } else {
2676 return res;
2677 }
2678}
2679
2680static inline int16_t
2681vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2682{
2683 uint8_t round, shift = b & 0x1f;
2684 int32_t res;
2685
2686 round = get_round(vxrm, a, shift);
2687 res = (a >> shift) + round;
2688 if (res > INT16_MAX) {
2689 env->vxsat = 0x1;
2690 return INT16_MAX;
2691 } else if (res < INT16_MIN) {
2692 env->vxsat = 0x1;
2693 return INT16_MIN;
2694 } else {
2695 return res;
2696 }
2697}
2698
2699static inline int32_t
2700vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2701{
2702 uint8_t round, shift = b & 0x3f;
2703 int64_t res;
2704
2705 round = get_round(vxrm, a, shift);
2706 res = (a >> shift) + round;
2707 if (res > INT32_MAX) {
2708 env->vxsat = 0x1;
2709 return INT32_MAX;
2710 } else if (res < INT32_MIN) {
2711 env->vxsat = 0x1;
2712 return INT32_MIN;
2713 } else {
2714 return res;
2715 }
2716}
2717
a70b3a73
FC
2718RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2719RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2720RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
8a085fb2 2721GEN_VEXT_VV_RM(vnclip_wv_b)
2722GEN_VEXT_VV_RM(vnclip_wv_h)
2723GEN_VEXT_VV_RM(vnclip_wv_w)
a70b3a73
FC
2724
2725RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2726RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2727RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
8a085fb2 2728GEN_VEXT_VX_RM(vnclip_wx_b)
2729GEN_VEXT_VX_RM(vnclip_wx_h)
2730GEN_VEXT_VX_RM(vnclip_wx_w)
9ff3d287
LZ
2731
2732static inline uint8_t
2733vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2734{
2735 uint8_t round, shift = b & 0xf;
2736 uint16_t res;
2737
2738 round = get_round(vxrm, a, shift);
2739 res = (a >> shift) + round;
2740 if (res > UINT8_MAX) {
2741 env->vxsat = 0x1;
2742 return UINT8_MAX;
2743 } else {
2744 return res;
2745 }
2746}
2747
2748static inline uint16_t
2749vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2750{
2751 uint8_t round, shift = b & 0x1f;
2752 uint32_t res;
2753
2754 round = get_round(vxrm, a, shift);
2755 res = (a >> shift) + round;
2756 if (res > UINT16_MAX) {
2757 env->vxsat = 0x1;
2758 return UINT16_MAX;
2759 } else {
2760 return res;
2761 }
2762}
2763
2764static inline uint32_t
2765vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2766{
2767 uint8_t round, shift = b & 0x3f;
a70b3a73 2768 uint64_t res;
9ff3d287
LZ
2769
2770 round = get_round(vxrm, a, shift);
2771 res = (a >> shift) + round;
2772 if (res > UINT32_MAX) {
2773 env->vxsat = 0x1;
2774 return UINT32_MAX;
2775 } else {
2776 return res;
2777 }
2778}
2779
a70b3a73
FC
2780RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2781RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2782RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
8a085fb2 2783GEN_VEXT_VV_RM(vnclipu_wv_b)
2784GEN_VEXT_VV_RM(vnclipu_wv_h)
2785GEN_VEXT_VV_RM(vnclipu_wv_w)
9ff3d287 2786
a70b3a73
FC
2787RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2788RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2789RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
8a085fb2 2790GEN_VEXT_VX_RM(vnclipu_wx_b)
2791GEN_VEXT_VX_RM(vnclipu_wx_h)
2792GEN_VEXT_VX_RM(vnclipu_wx_w)
ce2a0343
LZ
2793
2794/*
2795 *** Vector Float Point Arithmetic Instructions
2796 */
2797/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2798#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2799static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2800 CPURISCVState *env) \
2801{ \
2802 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2803 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2804 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2805}
2806
8a085fb2 2807#define GEN_VEXT_VV_ENV(NAME) \
ce2a0343
LZ
2808void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2809 void *vs2, CPURISCVState *env, \
2810 uint32_t desc) \
2811{ \
ce2a0343
LZ
2812 uint32_t vm = vext_vm(desc); \
2813 uint32_t vl = env->vl; \
2814 uint32_t i; \
2815 \
f714361e 2816 for (i = env->vstart; i < vl; i++) { \
f9298de5 2817 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
2818 continue; \
2819 } \
2820 do_##NAME(vd, vs1, vs2, i, env); \
2821 } \
f714361e 2822 env->vstart = 0; \
ce2a0343
LZ
2823}
2824
2825RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2826RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2827RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
8a085fb2 2828GEN_VEXT_VV_ENV(vfadd_vv_h)
2829GEN_VEXT_VV_ENV(vfadd_vv_w)
2830GEN_VEXT_VV_ENV(vfadd_vv_d)
ce2a0343
LZ
2831
2832#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2833static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2834 CPURISCVState *env) \
2835{ \
2836 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2837 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2838}
2839
8a085fb2 2840#define GEN_VEXT_VF(NAME) \
ce2a0343
LZ
2841void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2842 void *vs2, CPURISCVState *env, \
2843 uint32_t desc) \
2844{ \
ce2a0343
LZ
2845 uint32_t vm = vext_vm(desc); \
2846 uint32_t vl = env->vl; \
2847 uint32_t i; \
2848 \
f714361e 2849 for (i = env->vstart; i < vl; i++) { \
f9298de5 2850 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
2851 continue; \
2852 } \
2853 do_##NAME(vd, s1, vs2, i, env); \
2854 } \
f714361e 2855 env->vstart = 0; \
ce2a0343
LZ
2856}
2857
2858RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2859RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2860RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
8a085fb2 2861GEN_VEXT_VF(vfadd_vf_h)
2862GEN_VEXT_VF(vfadd_vf_w)
2863GEN_VEXT_VF(vfadd_vf_d)
ce2a0343
LZ
2864
2865RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2866RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2867RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
8a085fb2 2868GEN_VEXT_VV_ENV(vfsub_vv_h)
2869GEN_VEXT_VV_ENV(vfsub_vv_w)
2870GEN_VEXT_VV_ENV(vfsub_vv_d)
ce2a0343
LZ
2871RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2872RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2873RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
8a085fb2 2874GEN_VEXT_VF(vfsub_vf_h)
2875GEN_VEXT_VF(vfsub_vf_w)
2876GEN_VEXT_VF(vfsub_vf_d)
ce2a0343
LZ
2877
2878static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2879{
2880 return float16_sub(b, a, s);
2881}
2882
2883static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2884{
2885 return float32_sub(b, a, s);
2886}
2887
2888static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2889{
2890 return float64_sub(b, a, s);
2891}
2892
2893RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2894RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2895RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
8a085fb2 2896GEN_VEXT_VF(vfrsub_vf_h)
2897GEN_VEXT_VF(vfrsub_vf_w)
2898GEN_VEXT_VF(vfrsub_vf_d)
eeffab2e
LZ
2899
2900/* Vector Widening Floating-Point Add/Subtract Instructions */
2901static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2902{
2903 return float32_add(float16_to_float32(a, true, s),
2904 float16_to_float32(b, true, s), s);
2905}
2906
2907static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2908{
2909 return float64_add(float32_to_float64(a, s),
2910 float32_to_float64(b, s), s);
2911
2912}
2913
2914RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2915RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
8a085fb2 2916GEN_VEXT_VV_ENV(vfwadd_vv_h)
2917GEN_VEXT_VV_ENV(vfwadd_vv_w)
eeffab2e
LZ
2918RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2919RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
8a085fb2 2920GEN_VEXT_VF(vfwadd_vf_h)
2921GEN_VEXT_VF(vfwadd_vf_w)
eeffab2e
LZ
2922
2923static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2924{
2925 return float32_sub(float16_to_float32(a, true, s),
2926 float16_to_float32(b, true, s), s);
2927}
2928
2929static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2930{
2931 return float64_sub(float32_to_float64(a, s),
2932 float32_to_float64(b, s), s);
2933
2934}
2935
2936RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2937RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
8a085fb2 2938GEN_VEXT_VV_ENV(vfwsub_vv_h)
2939GEN_VEXT_VV_ENV(vfwsub_vv_w)
eeffab2e
LZ
2940RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2941RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
8a085fb2 2942GEN_VEXT_VF(vfwsub_vf_h)
2943GEN_VEXT_VF(vfwsub_vf_w)
eeffab2e
LZ
2944
2945static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2946{
2947 return float32_add(a, float16_to_float32(b, true, s), s);
2948}
2949
2950static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2951{
2952 return float64_add(a, float32_to_float64(b, s), s);
2953}
2954
2955RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2956RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
8a085fb2 2957GEN_VEXT_VV_ENV(vfwadd_wv_h)
2958GEN_VEXT_VV_ENV(vfwadd_wv_w)
eeffab2e
LZ
2959RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2960RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
8a085fb2 2961GEN_VEXT_VF(vfwadd_wf_h)
2962GEN_VEXT_VF(vfwadd_wf_w)
eeffab2e
LZ
2963
2964static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2965{
2966 return float32_sub(a, float16_to_float32(b, true, s), s);
2967}
2968
2969static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2970{
2971 return float64_sub(a, float32_to_float64(b, s), s);
2972}
2973
2974RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2975RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
8a085fb2 2976GEN_VEXT_VV_ENV(vfwsub_wv_h)
2977GEN_VEXT_VV_ENV(vfwsub_wv_w)
eeffab2e
LZ
2978RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2979RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
8a085fb2 2980GEN_VEXT_VF(vfwsub_wf_h)
2981GEN_VEXT_VF(vfwsub_wf_w)
0e0057cb
LZ
2982
2983/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2984RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2985RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2986RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
8a085fb2 2987GEN_VEXT_VV_ENV(vfmul_vv_h)
2988GEN_VEXT_VV_ENV(vfmul_vv_w)
2989GEN_VEXT_VV_ENV(vfmul_vv_d)
0e0057cb
LZ
2990RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2991RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2992RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
8a085fb2 2993GEN_VEXT_VF(vfmul_vf_h)
2994GEN_VEXT_VF(vfmul_vf_w)
2995GEN_VEXT_VF(vfmul_vf_d)
0e0057cb
LZ
2996
2997RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2998RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2999RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
8a085fb2 3000GEN_VEXT_VV_ENV(vfdiv_vv_h)
3001GEN_VEXT_VV_ENV(vfdiv_vv_w)
3002GEN_VEXT_VV_ENV(vfdiv_vv_d)
0e0057cb
LZ
3003RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3004RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3005RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
8a085fb2 3006GEN_VEXT_VF(vfdiv_vf_h)
3007GEN_VEXT_VF(vfdiv_vf_w)
3008GEN_VEXT_VF(vfdiv_vf_d)
0e0057cb
LZ
3009
3010static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3011{
3012 return float16_div(b, a, s);
3013}
3014
3015static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3016{
3017 return float32_div(b, a, s);
3018}
3019
3020static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3021{
3022 return float64_div(b, a, s);
3023}
3024
3025RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3026RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3027RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
8a085fb2 3028GEN_VEXT_VF(vfrdiv_vf_h)
3029GEN_VEXT_VF(vfrdiv_vf_w)
3030GEN_VEXT_VF(vfrdiv_vf_d)
f7c7b7cd
LZ
3031
3032/* Vector Widening Floating-Point Multiply */
3033static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3034{
3035 return float32_mul(float16_to_float32(a, true, s),
3036 float16_to_float32(b, true, s), s);
3037}
3038
3039static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3040{
3041 return float64_mul(float32_to_float64(a, s),
3042 float32_to_float64(b, s), s);
3043
3044}
3045RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3046RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
8a085fb2 3047GEN_VEXT_VV_ENV(vfwmul_vv_h)
3048GEN_VEXT_VV_ENV(vfwmul_vv_w)
f7c7b7cd
LZ
3049RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3050RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
8a085fb2 3051GEN_VEXT_VF(vfwmul_vf_h)
3052GEN_VEXT_VF(vfwmul_vf_w)
4aa5a8fe
LZ
3053
3054/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3055#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3056static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3057 CPURISCVState *env) \
3058{ \
3059 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3060 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3061 TD d = *((TD *)vd + HD(i)); \
3062 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3063}
3064
3065static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3066{
3067 return float16_muladd(a, b, d, 0, s);
3068}
3069
3070static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3071{
3072 return float32_muladd(a, b, d, 0, s);
3073}
3074
3075static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3076{
3077 return float64_muladd(a, b, d, 0, s);
3078}
3079
3080RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3081RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3082RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
8a085fb2 3083GEN_VEXT_VV_ENV(vfmacc_vv_h)
3084GEN_VEXT_VV_ENV(vfmacc_vv_w)
3085GEN_VEXT_VV_ENV(vfmacc_vv_d)
4aa5a8fe
LZ
3086
3087#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3088static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3089 CPURISCVState *env) \
3090{ \
3091 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3092 TD d = *((TD *)vd + HD(i)); \
3093 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3094}
3095
3096RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3097RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3098RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
8a085fb2 3099GEN_VEXT_VF(vfmacc_vf_h)
3100GEN_VEXT_VF(vfmacc_vf_w)
3101GEN_VEXT_VF(vfmacc_vf_d)
4aa5a8fe
LZ
3102
3103static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3104{
3105 return float16_muladd(a, b, d,
3106 float_muladd_negate_c | float_muladd_negate_product, s);
3107}
3108
3109static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3110{
3111 return float32_muladd(a, b, d,
3112 float_muladd_negate_c | float_muladd_negate_product, s);
3113}
3114
3115static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3116{
3117 return float64_muladd(a, b, d,
3118 float_muladd_negate_c | float_muladd_negate_product, s);
3119}
3120
3121RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3122RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3123RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
8a085fb2 3124GEN_VEXT_VV_ENV(vfnmacc_vv_h)
3125GEN_VEXT_VV_ENV(vfnmacc_vv_w)
3126GEN_VEXT_VV_ENV(vfnmacc_vv_d)
4aa5a8fe
LZ
3127RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3128RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3129RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
8a085fb2 3130GEN_VEXT_VF(vfnmacc_vf_h)
3131GEN_VEXT_VF(vfnmacc_vf_w)
3132GEN_VEXT_VF(vfnmacc_vf_d)
4aa5a8fe
LZ
3133
3134static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3135{
3136 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3137}
3138
3139static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3140{
3141 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3142}
3143
3144static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3145{
3146 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3147}
3148
3149RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3150RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3151RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
8a085fb2 3152GEN_VEXT_VV_ENV(vfmsac_vv_h)
3153GEN_VEXT_VV_ENV(vfmsac_vv_w)
3154GEN_VEXT_VV_ENV(vfmsac_vv_d)
4aa5a8fe
LZ
3155RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3156RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3157RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
8a085fb2 3158GEN_VEXT_VF(vfmsac_vf_h)
3159GEN_VEXT_VF(vfmsac_vf_w)
3160GEN_VEXT_VF(vfmsac_vf_d)
4aa5a8fe
LZ
3161
3162static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3163{
3164 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3165}
3166
3167static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3168{
3169 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3170}
3171
3172static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3173{
3174 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3175}
3176
3177RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3178RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3179RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
8a085fb2 3180GEN_VEXT_VV_ENV(vfnmsac_vv_h)
3181GEN_VEXT_VV_ENV(vfnmsac_vv_w)
3182GEN_VEXT_VV_ENV(vfnmsac_vv_d)
4aa5a8fe
LZ
3183RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3184RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3185RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
8a085fb2 3186GEN_VEXT_VF(vfnmsac_vf_h)
3187GEN_VEXT_VF(vfnmsac_vf_w)
3188GEN_VEXT_VF(vfnmsac_vf_d)
4aa5a8fe
LZ
3189
3190static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3191{
3192 return float16_muladd(d, b, a, 0, s);
3193}
3194
3195static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3196{
3197 return float32_muladd(d, b, a, 0, s);
3198}
3199
3200static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3201{
3202 return float64_muladd(d, b, a, 0, s);
3203}
3204
3205RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3206RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3207RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
8a085fb2 3208GEN_VEXT_VV_ENV(vfmadd_vv_h)
3209GEN_VEXT_VV_ENV(vfmadd_vv_w)
3210GEN_VEXT_VV_ENV(vfmadd_vv_d)
4aa5a8fe
LZ
3211RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3212RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3213RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
8a085fb2 3214GEN_VEXT_VF(vfmadd_vf_h)
3215GEN_VEXT_VF(vfmadd_vf_w)
3216GEN_VEXT_VF(vfmadd_vf_d)
4aa5a8fe
LZ
3217
3218static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3219{
3220 return float16_muladd(d, b, a,
3221 float_muladd_negate_c | float_muladd_negate_product, s);
3222}
3223
3224static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3225{
3226 return float32_muladd(d, b, a,
3227 float_muladd_negate_c | float_muladd_negate_product, s);
3228}
3229
3230static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3231{
3232 return float64_muladd(d, b, a,
3233 float_muladd_negate_c | float_muladd_negate_product, s);
3234}
3235
3236RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3237RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3238RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
8a085fb2 3239GEN_VEXT_VV_ENV(vfnmadd_vv_h)
3240GEN_VEXT_VV_ENV(vfnmadd_vv_w)
3241GEN_VEXT_VV_ENV(vfnmadd_vv_d)
4aa5a8fe
LZ
3242RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3243RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3244RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
8a085fb2 3245GEN_VEXT_VF(vfnmadd_vf_h)
3246GEN_VEXT_VF(vfnmadd_vf_w)
3247GEN_VEXT_VF(vfnmadd_vf_d)
4aa5a8fe
LZ
3248
3249static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3250{
3251 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3252}
3253
3254static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3255{
3256 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3257}
3258
3259static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3260{
3261 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3262}
3263
3264RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3265RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3266RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
8a085fb2 3267GEN_VEXT_VV_ENV(vfmsub_vv_h)
3268GEN_VEXT_VV_ENV(vfmsub_vv_w)
3269GEN_VEXT_VV_ENV(vfmsub_vv_d)
4aa5a8fe
LZ
3270RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3271RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3272RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
8a085fb2 3273GEN_VEXT_VF(vfmsub_vf_h)
3274GEN_VEXT_VF(vfmsub_vf_w)
3275GEN_VEXT_VF(vfmsub_vf_d)
4aa5a8fe
LZ
3276
3277static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3278{
3279 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3280}
3281
3282static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3283{
3284 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3285}
3286
3287static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3288{
3289 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3290}
3291
3292RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3293RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3294RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
8a085fb2 3295GEN_VEXT_VV_ENV(vfnmsub_vv_h)
3296GEN_VEXT_VV_ENV(vfnmsub_vv_w)
3297GEN_VEXT_VV_ENV(vfnmsub_vv_d)
4aa5a8fe
LZ
3298RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3299RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3300RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
8a085fb2 3301GEN_VEXT_VF(vfnmsub_vf_h)
3302GEN_VEXT_VF(vfnmsub_vf_w)
3303GEN_VEXT_VF(vfnmsub_vf_d)
0dd50959
LZ
3304
3305/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3306static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3307{
3308 return float32_muladd(float16_to_float32(a, true, s),
3309 float16_to_float32(b, true, s), d, 0, s);
3310}
3311
3312static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3313{
3314 return float64_muladd(float32_to_float64(a, s),
3315 float32_to_float64(b, s), d, 0, s);
3316}
3317
3318RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3319RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
8a085fb2 3320GEN_VEXT_VV_ENV(vfwmacc_vv_h)
3321GEN_VEXT_VV_ENV(vfwmacc_vv_w)
0dd50959
LZ
3322RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3323RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
8a085fb2 3324GEN_VEXT_VF(vfwmacc_vf_h)
3325GEN_VEXT_VF(vfwmacc_vf_w)
0dd50959
LZ
3326
3327static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3328{
3329 return float32_muladd(float16_to_float32(a, true, s),
3330 float16_to_float32(b, true, s), d,
3331 float_muladd_negate_c | float_muladd_negate_product, s);
3332}
3333
3334static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3335{
3336 return float64_muladd(float32_to_float64(a, s),
3337 float32_to_float64(b, s), d,
3338 float_muladd_negate_c | float_muladd_negate_product, s);
3339}
3340
3341RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3342RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
8a085fb2 3343GEN_VEXT_VV_ENV(vfwnmacc_vv_h)
3344GEN_VEXT_VV_ENV(vfwnmacc_vv_w)
0dd50959
LZ
3345RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3346RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
8a085fb2 3347GEN_VEXT_VF(vfwnmacc_vf_h)
3348GEN_VEXT_VF(vfwnmacc_vf_w)
0dd50959
LZ
3349
3350static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3351{
3352 return float32_muladd(float16_to_float32(a, true, s),
3353 float16_to_float32(b, true, s), d,
3354 float_muladd_negate_c, s);
3355}
3356
3357static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3358{
3359 return float64_muladd(float32_to_float64(a, s),
3360 float32_to_float64(b, s), d,
3361 float_muladd_negate_c, s);
3362}
3363
3364RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3365RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
8a085fb2 3366GEN_VEXT_VV_ENV(vfwmsac_vv_h)
3367GEN_VEXT_VV_ENV(vfwmsac_vv_w)
0dd50959
LZ
3368RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3369RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
8a085fb2 3370GEN_VEXT_VF(vfwmsac_vf_h)
3371GEN_VEXT_VF(vfwmsac_vf_w)
0dd50959
LZ
3372
3373static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3374{
3375 return float32_muladd(float16_to_float32(a, true, s),
3376 float16_to_float32(b, true, s), d,
3377 float_muladd_negate_product, s);
3378}
3379
3380static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3381{
3382 return float64_muladd(float32_to_float64(a, s),
3383 float32_to_float64(b, s), d,
3384 float_muladd_negate_product, s);
3385}
3386
3387RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3388RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
8a085fb2 3389GEN_VEXT_VV_ENV(vfwnmsac_vv_h)
3390GEN_VEXT_VV_ENV(vfwnmsac_vv_w)
0dd50959
LZ
3391RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3392RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
8a085fb2 3393GEN_VEXT_VF(vfwnmsac_vf_h)
3394GEN_VEXT_VF(vfwnmsac_vf_w)
d9e4ce72
LZ
3395
3396/* Vector Floating-Point Square-Root Instruction */
3397/* (TD, T2, TX2) */
3398#define OP_UU_H uint16_t, uint16_t, uint16_t
3399#define OP_UU_W uint32_t, uint32_t, uint32_t
3400#define OP_UU_D uint64_t, uint64_t, uint64_t
3401
3402#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3403static void do_##NAME(void *vd, void *vs2, int i, \
3404 CPURISCVState *env) \
3405{ \
3406 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3407 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3408}
3409
8a085fb2 3410#define GEN_VEXT_V_ENV(NAME) \
d9e4ce72
LZ
3411void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3412 CPURISCVState *env, uint32_t desc) \
3413{ \
d9e4ce72
LZ
3414 uint32_t vm = vext_vm(desc); \
3415 uint32_t vl = env->vl; \
3416 uint32_t i; \
3417 \
3418 if (vl == 0) { \
3419 return; \
3420 } \
f714361e 3421 for (i = env->vstart; i < vl; i++) { \
f9298de5 3422 if (!vm && !vext_elem_mask(v0, i)) { \
d9e4ce72
LZ
3423 continue; \
3424 } \
3425 do_##NAME(vd, vs2, i, env); \
3426 } \
f714361e 3427 env->vstart = 0; \
d9e4ce72
LZ
3428}
3429
3430RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3431RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3432RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
8a085fb2 3433GEN_VEXT_V_ENV(vfsqrt_v_h)
3434GEN_VEXT_V_ENV(vfsqrt_v_w)
3435GEN_VEXT_V_ENV(vfsqrt_v_d)
230b53dd 3436
e848a1e5
FC
3437/*
3438 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3439 *
3440 * Adapted from riscv-v-spec recip.c:
3441 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3442 */
3443static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3444{
3445 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3446 uint64_t exp = extract64(f, frac_size, exp_size);
3447 uint64_t frac = extract64(f, 0, frac_size);
3448
3449 const uint8_t lookup_table[] = {
3450 52, 51, 50, 48, 47, 46, 44, 43,
3451 42, 41, 40, 39, 38, 36, 35, 34,
3452 33, 32, 31, 30, 30, 29, 28, 27,
3453 26, 25, 24, 23, 23, 22, 21, 20,
3454 19, 19, 18, 17, 16, 16, 15, 14,
3455 14, 13, 12, 12, 11, 10, 10, 9,
3456 9, 8, 7, 7, 6, 6, 5, 4,
3457 4, 3, 3, 2, 2, 1, 1, 0,
3458 127, 125, 123, 121, 119, 118, 116, 114,
3459 113, 111, 109, 108, 106, 105, 103, 102,
3460 100, 99, 97, 96, 95, 93, 92, 91,
3461 90, 88, 87, 86, 85, 84, 83, 82,
3462 80, 79, 78, 77, 76, 75, 74, 73,
3463 72, 71, 70, 70, 69, 68, 67, 66,
3464 65, 64, 63, 63, 62, 61, 60, 59,
3465 59, 58, 57, 56, 56, 55, 54, 53
3466 };
3467 const int precision = 7;
3468
3469 if (exp == 0 && frac != 0) { /* subnormal */
3470 /* Normalize the subnormal. */
3471 while (extract64(frac, frac_size - 1, 1) == 0) {
3472 exp--;
3473 frac <<= 1;
3474 }
3475
3476 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3477 }
3478
3479 int idx = ((exp & 1) << (precision - 1)) |
3480 (frac >> (frac_size - precision + 1));
3481 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3482 (frac_size - precision);
3483 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3484
3485 uint64_t val = 0;
3486 val = deposit64(val, 0, frac_size, out_frac);
3487 val = deposit64(val, frac_size, exp_size, out_exp);
3488 val = deposit64(val, frac_size + exp_size, 1, sign);
3489 return val;
3490}
3491
3492static float16 frsqrt7_h(float16 f, float_status *s)
3493{
3494 int exp_size = 5, frac_size = 10;
3495 bool sign = float16_is_neg(f);
3496
3497 /*
3498 * frsqrt7(sNaN) = canonical NaN
3499 * frsqrt7(-inf) = canonical NaN
3500 * frsqrt7(-normal) = canonical NaN
3501 * frsqrt7(-subnormal) = canonical NaN
3502 */
3503 if (float16_is_signaling_nan(f, s) ||
3504 (float16_is_infinity(f) && sign) ||
3505 (float16_is_normal(f) && sign) ||
3506 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3507 s->float_exception_flags |= float_flag_invalid;
3508 return float16_default_nan(s);
3509 }
3510
3511 /* frsqrt7(qNaN) = canonical NaN */
3512 if (float16_is_quiet_nan(f, s)) {
3513 return float16_default_nan(s);
3514 }
3515
3516 /* frsqrt7(+-0) = +-inf */
3517 if (float16_is_zero(f)) {
3518 s->float_exception_flags |= float_flag_divbyzero;
3519 return float16_set_sign(float16_infinity, sign);
3520 }
3521
3522 /* frsqrt7(+inf) = +0 */
3523 if (float16_is_infinity(f) && !sign) {
3524 return float16_set_sign(float16_zero, sign);
3525 }
3526
3527 /* +normal, +subnormal */
3528 uint64_t val = frsqrt7(f, exp_size, frac_size);
3529 return make_float16(val);
3530}
3531
3532static float32 frsqrt7_s(float32 f, float_status *s)
3533{
3534 int exp_size = 8, frac_size = 23;
3535 bool sign = float32_is_neg(f);
3536
3537 /*
3538 * frsqrt7(sNaN) = canonical NaN
3539 * frsqrt7(-inf) = canonical NaN
3540 * frsqrt7(-normal) = canonical NaN
3541 * frsqrt7(-subnormal) = canonical NaN
3542 */
3543 if (float32_is_signaling_nan(f, s) ||
3544 (float32_is_infinity(f) && sign) ||
3545 (float32_is_normal(f) && sign) ||
3546 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3547 s->float_exception_flags |= float_flag_invalid;
3548 return float32_default_nan(s);
3549 }
3550
3551 /* frsqrt7(qNaN) = canonical NaN */
3552 if (float32_is_quiet_nan(f, s)) {
3553 return float32_default_nan(s);
3554 }
3555
3556 /* frsqrt7(+-0) = +-inf */
3557 if (float32_is_zero(f)) {
3558 s->float_exception_flags |= float_flag_divbyzero;
3559 return float32_set_sign(float32_infinity, sign);
3560 }
3561
3562 /* frsqrt7(+inf) = +0 */
3563 if (float32_is_infinity(f) && !sign) {
3564 return float32_set_sign(float32_zero, sign);
3565 }
3566
3567 /* +normal, +subnormal */
3568 uint64_t val = frsqrt7(f, exp_size, frac_size);
3569 return make_float32(val);
3570}
3571
3572static float64 frsqrt7_d(float64 f, float_status *s)
3573{
3574 int exp_size = 11, frac_size = 52;
3575 bool sign = float64_is_neg(f);
3576
3577 /*
3578 * frsqrt7(sNaN) = canonical NaN
3579 * frsqrt7(-inf) = canonical NaN
3580 * frsqrt7(-normal) = canonical NaN
3581 * frsqrt7(-subnormal) = canonical NaN
3582 */
3583 if (float64_is_signaling_nan(f, s) ||
3584 (float64_is_infinity(f) && sign) ||
3585 (float64_is_normal(f) && sign) ||
3586 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3587 s->float_exception_flags |= float_flag_invalid;
3588 return float64_default_nan(s);
3589 }
3590
3591 /* frsqrt7(qNaN) = canonical NaN */
3592 if (float64_is_quiet_nan(f, s)) {
3593 return float64_default_nan(s);
3594 }
3595
3596 /* frsqrt7(+-0) = +-inf */
3597 if (float64_is_zero(f)) {
3598 s->float_exception_flags |= float_flag_divbyzero;
3599 return float64_set_sign(float64_infinity, sign);
3600 }
3601
3602 /* frsqrt7(+inf) = +0 */
3603 if (float64_is_infinity(f) && !sign) {
3604 return float64_set_sign(float64_zero, sign);
3605 }
3606
3607 /* +normal, +subnormal */
3608 uint64_t val = frsqrt7(f, exp_size, frac_size);
3609 return make_float64(val);
3610}
3611
3612RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3613RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3614RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
8a085fb2 3615GEN_VEXT_V_ENV(vfrsqrt7_v_h)
3616GEN_VEXT_V_ENV(vfrsqrt7_v_w)
3617GEN_VEXT_V_ENV(vfrsqrt7_v_d)
e848a1e5 3618
55c35407
FC
3619/*
3620 * Vector Floating-Point Reciprocal Estimate Instruction
3621 *
3622 * Adapted from riscv-v-spec recip.c:
3623 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3624 */
3625static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3626 float_status *s)
3627{
3628 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3629 uint64_t exp = extract64(f, frac_size, exp_size);
3630 uint64_t frac = extract64(f, 0, frac_size);
3631
3632 const uint8_t lookup_table[] = {
3633 127, 125, 123, 121, 119, 117, 116, 114,
3634 112, 110, 109, 107, 105, 104, 102, 100,
3635 99, 97, 96, 94, 93, 91, 90, 88,
3636 87, 85, 84, 83, 81, 80, 79, 77,
3637 76, 75, 74, 72, 71, 70, 69, 68,
3638 66, 65, 64, 63, 62, 61, 60, 59,
3639 58, 57, 56, 55, 54, 53, 52, 51,
3640 50, 49, 48, 47, 46, 45, 44, 43,
3641 42, 41, 40, 40, 39, 38, 37, 36,
3642 35, 35, 34, 33, 32, 31, 31, 30,
3643 29, 28, 28, 27, 26, 25, 25, 24,
3644 23, 23, 22, 21, 21, 20, 19, 19,
3645 18, 17, 17, 16, 15, 15, 14, 14,
3646 13, 12, 12, 11, 11, 10, 9, 9,
3647 8, 8, 7, 7, 6, 5, 5, 4,
3648 4, 3, 3, 2, 2, 1, 1, 0
3649 };
3650 const int precision = 7;
3651
3652 if (exp == 0 && frac != 0) { /* subnormal */
3653 /* Normalize the subnormal. */
3654 while (extract64(frac, frac_size - 1, 1) == 0) {
3655 exp--;
3656 frac <<= 1;
3657 }
3658
3659 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3660
3661 if (exp != 0 && exp != UINT64_MAX) {
3662 /*
3663 * Overflow to inf or max value of same sign,
3664 * depending on sign and rounding mode.
3665 */
3666 s->float_exception_flags |= (float_flag_inexact |
3667 float_flag_overflow);
3668
3669 if ((s->float_rounding_mode == float_round_to_zero) ||
3670 ((s->float_rounding_mode == float_round_down) && !sign) ||
3671 ((s->float_rounding_mode == float_round_up) && sign)) {
3672 /* Return greatest/negative finite value. */
3673 return (sign << (exp_size + frac_size)) |
3674 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3675 } else {
3676 /* Return +-inf. */
3677 return (sign << (exp_size + frac_size)) |
3678 MAKE_64BIT_MASK(frac_size, exp_size);
3679 }
3680 }
3681 }
3682
3683 int idx = frac >> (frac_size - precision);
3684 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3685 (frac_size - precision);
3686 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3687
3688 if (out_exp == 0 || out_exp == UINT64_MAX) {
3689 /*
3690 * The result is subnormal, but don't raise the underflow exception,
3691 * because there's no additional loss of precision.
3692 */
3693 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3694 if (out_exp == UINT64_MAX) {
3695 out_frac >>= 1;
3696 out_exp = 0;
3697 }
3698 }
3699
3700 uint64_t val = 0;
3701 val = deposit64(val, 0, frac_size, out_frac);
3702 val = deposit64(val, frac_size, exp_size, out_exp);
3703 val = deposit64(val, frac_size + exp_size, 1, sign);
3704 return val;
3705}
3706
3707static float16 frec7_h(float16 f, float_status *s)
3708{
3709 int exp_size = 5, frac_size = 10;
3710 bool sign = float16_is_neg(f);
3711
3712 /* frec7(+-inf) = +-0 */
3713 if (float16_is_infinity(f)) {
3714 return float16_set_sign(float16_zero, sign);
3715 }
3716
3717 /* frec7(+-0) = +-inf */
3718 if (float16_is_zero(f)) {
3719 s->float_exception_flags |= float_flag_divbyzero;
3720 return float16_set_sign(float16_infinity, sign);
3721 }
3722
3723 /* frec7(sNaN) = canonical NaN */
3724 if (float16_is_signaling_nan(f, s)) {
3725 s->float_exception_flags |= float_flag_invalid;
3726 return float16_default_nan(s);
3727 }
3728
3729 /* frec7(qNaN) = canonical NaN */
3730 if (float16_is_quiet_nan(f, s)) {
3731 return float16_default_nan(s);
3732 }
3733
3734 /* +-normal, +-subnormal */
3735 uint64_t val = frec7(f, exp_size, frac_size, s);
3736 return make_float16(val);
3737}
3738
3739static float32 frec7_s(float32 f, float_status *s)
3740{
3741 int exp_size = 8, frac_size = 23;
3742 bool sign = float32_is_neg(f);
3743
3744 /* frec7(+-inf) = +-0 */
3745 if (float32_is_infinity(f)) {
3746 return float32_set_sign(float32_zero, sign);
3747 }
3748
3749 /* frec7(+-0) = +-inf */
3750 if (float32_is_zero(f)) {
3751 s->float_exception_flags |= float_flag_divbyzero;
3752 return float32_set_sign(float32_infinity, sign);
3753 }
3754
3755 /* frec7(sNaN) = canonical NaN */
3756 if (float32_is_signaling_nan(f, s)) {
3757 s->float_exception_flags |= float_flag_invalid;
3758 return float32_default_nan(s);
3759 }
3760
3761 /* frec7(qNaN) = canonical NaN */
3762 if (float32_is_quiet_nan(f, s)) {
3763 return float32_default_nan(s);
3764 }
3765
3766 /* +-normal, +-subnormal */
3767 uint64_t val = frec7(f, exp_size, frac_size, s);
3768 return make_float32(val);
3769}
3770
3771static float64 frec7_d(float64 f, float_status *s)
3772{
3773 int exp_size = 11, frac_size = 52;
3774 bool sign = float64_is_neg(f);
3775
3776 /* frec7(+-inf) = +-0 */
3777 if (float64_is_infinity(f)) {
3778 return float64_set_sign(float64_zero, sign);
3779 }
3780
3781 /* frec7(+-0) = +-inf */
3782 if (float64_is_zero(f)) {
3783 s->float_exception_flags |= float_flag_divbyzero;
3784 return float64_set_sign(float64_infinity, sign);
3785 }
3786
3787 /* frec7(sNaN) = canonical NaN */
3788 if (float64_is_signaling_nan(f, s)) {
3789 s->float_exception_flags |= float_flag_invalid;
3790 return float64_default_nan(s);
3791 }
3792
3793 /* frec7(qNaN) = canonical NaN */
3794 if (float64_is_quiet_nan(f, s)) {
3795 return float64_default_nan(s);
3796 }
3797
3798 /* +-normal, +-subnormal */
3799 uint64_t val = frec7(f, exp_size, frac_size, s);
3800 return make_float64(val);
3801}
3802
3803RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3804RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3805RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
8a085fb2 3806GEN_VEXT_V_ENV(vfrec7_v_h)
3807GEN_VEXT_V_ENV(vfrec7_v_w)
3808GEN_VEXT_V_ENV(vfrec7_v_d)
55c35407 3809
230b53dd 3810/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
3811RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3812RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3813RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
8a085fb2 3814GEN_VEXT_VV_ENV(vfmin_vv_h)
3815GEN_VEXT_VV_ENV(vfmin_vv_w)
3816GEN_VEXT_VV_ENV(vfmin_vv_d)
49c5611a
FC
3817RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3818RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3819RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
8a085fb2 3820GEN_VEXT_VF(vfmin_vf_h)
3821GEN_VEXT_VF(vfmin_vf_w)
3822GEN_VEXT_VF(vfmin_vf_d)
230b53dd 3823
49c5611a
FC
3824RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3825RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3826RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
8a085fb2 3827GEN_VEXT_VV_ENV(vfmax_vv_h)
3828GEN_VEXT_VV_ENV(vfmax_vv_w)
3829GEN_VEXT_VV_ENV(vfmax_vv_d)
49c5611a
FC
3830RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3831RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3832RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
8a085fb2 3833GEN_VEXT_VF(vfmax_vf_h)
3834GEN_VEXT_VF(vfmax_vf_w)
3835GEN_VEXT_VF(vfmax_vf_d)
1d426b81
LZ
3836
3837/* Vector Floating-Point Sign-Injection Instructions */
3838static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3839{
3840 return deposit64(b, 0, 15, a);
3841}
3842
3843static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3844{
3845 return deposit64(b, 0, 31, a);
3846}
3847
3848static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3849{
3850 return deposit64(b, 0, 63, a);
3851}
3852
3853RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3854RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3855RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
8a085fb2 3856GEN_VEXT_VV_ENV(vfsgnj_vv_h)
3857GEN_VEXT_VV_ENV(vfsgnj_vv_w)
3858GEN_VEXT_VV_ENV(vfsgnj_vv_d)
1d426b81
LZ
3859RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3860RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3861RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
8a085fb2 3862GEN_VEXT_VF(vfsgnj_vf_h)
3863GEN_VEXT_VF(vfsgnj_vf_w)
3864GEN_VEXT_VF(vfsgnj_vf_d)
1d426b81
LZ
3865
3866static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3867{
3868 return deposit64(~b, 0, 15, a);
3869}
3870
3871static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3872{
3873 return deposit64(~b, 0, 31, a);
3874}
3875
3876static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3877{
3878 return deposit64(~b, 0, 63, a);
3879}
3880
3881RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3882RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3883RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
8a085fb2 3884GEN_VEXT_VV_ENV(vfsgnjn_vv_h)
3885GEN_VEXT_VV_ENV(vfsgnjn_vv_w)
3886GEN_VEXT_VV_ENV(vfsgnjn_vv_d)
1d426b81
LZ
3887RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3888RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3889RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
8a085fb2 3890GEN_VEXT_VF(vfsgnjn_vf_h)
3891GEN_VEXT_VF(vfsgnjn_vf_w)
3892GEN_VEXT_VF(vfsgnjn_vf_d)
1d426b81
LZ
3893
3894static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3895{
3896 return deposit64(b ^ a, 0, 15, a);
3897}
3898
3899static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3900{
3901 return deposit64(b ^ a, 0, 31, a);
3902}
3903
3904static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3905{
3906 return deposit64(b ^ a, 0, 63, a);
3907}
3908
3909RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3910RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3911RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
8a085fb2 3912GEN_VEXT_VV_ENV(vfsgnjx_vv_h)
3913GEN_VEXT_VV_ENV(vfsgnjx_vv_w)
3914GEN_VEXT_VV_ENV(vfsgnjx_vv_d)
1d426b81
LZ
3915RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3916RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3917RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
8a085fb2 3918GEN_VEXT_VF(vfsgnjx_vf_h)
3919GEN_VEXT_VF(vfsgnjx_vf_w)
3920GEN_VEXT_VF(vfsgnjx_vf_d)
2a68e9e5
LZ
3921
3922/* Vector Floating-Point Compare Instructions */
3923#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
3924void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
3925 CPURISCVState *env, uint32_t desc) \
3926{ \
2a68e9e5
LZ
3927 uint32_t vm = vext_vm(desc); \
3928 uint32_t vl = env->vl; \
2a68e9e5
LZ
3929 uint32_t i; \
3930 \
f714361e 3931 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
3932 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
3933 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 3934 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
3935 continue; \
3936 } \
f9298de5 3937 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
3938 DO_OP(s2, s1, &env->fp_status)); \
3939 } \
f714361e 3940 env->vstart = 0; \
2a68e9e5
LZ
3941}
3942
2a68e9e5
LZ
3943GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3944GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3945GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3946
3947#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
3948void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3949 CPURISCVState *env, uint32_t desc) \
3950{ \
2a68e9e5
LZ
3951 uint32_t vm = vext_vm(desc); \
3952 uint32_t vl = env->vl; \
2a68e9e5
LZ
3953 uint32_t i; \
3954 \
f714361e 3955 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 3956 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 3957 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
3958 continue; \
3959 } \
f9298de5 3960 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
3961 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
3962 } \
f714361e 3963 env->vstart = 0; \
2a68e9e5
LZ
3964}
3965
3966GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3967GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3968GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3969
3970static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3971{
3972 FloatRelation compare = float16_compare_quiet(a, b, s);
3973 return compare != float_relation_equal;
3974}
3975
3976static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3977{
3978 FloatRelation compare = float32_compare_quiet(a, b, s);
3979 return compare != float_relation_equal;
3980}
3981
3982static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3983{
3984 FloatRelation compare = float64_compare_quiet(a, b, s);
3985 return compare != float_relation_equal;
3986}
3987
3988GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3989GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3990GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3991GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3992GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3993GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3994
2a68e9e5
LZ
3995GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3996GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3997GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3998GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3999GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4000GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4001
2a68e9e5
LZ
4002GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4003GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4004GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4005GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4006GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4007GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4008
4009static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4010{
4011 FloatRelation compare = float16_compare(a, b, s);
4012 return compare == float_relation_greater;
4013}
4014
4015static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4016{
4017 FloatRelation compare = float32_compare(a, b, s);
4018 return compare == float_relation_greater;
4019}
4020
4021static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4022{
4023 FloatRelation compare = float64_compare(a, b, s);
4024 return compare == float_relation_greater;
4025}
4026
4027GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4028GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4029GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4030
4031static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4032{
4033 FloatRelation compare = float16_compare(a, b, s);
4034 return compare == float_relation_greater ||
4035 compare == float_relation_equal;
4036}
4037
4038static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4039{
4040 FloatRelation compare = float32_compare(a, b, s);
4041 return compare == float_relation_greater ||
4042 compare == float_relation_equal;
4043}
4044
4045static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4046{
4047 FloatRelation compare = float64_compare(a, b, s);
4048 return compare == float_relation_greater ||
4049 compare == float_relation_equal;
4050}
4051
4052GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4053GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4054GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4055
121ddbb3
LZ
4056/* Vector Floating-Point Classify Instruction */
4057#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4058static void do_##NAME(void *vd, void *vs2, int i) \
4059{ \
4060 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4061 *((TD *)vd + HD(i)) = OP(s2); \
4062}
4063
8a085fb2 4064#define GEN_VEXT_V(NAME) \
121ddbb3
LZ
4065void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4066 CPURISCVState *env, uint32_t desc) \
4067{ \
121ddbb3
LZ
4068 uint32_t vm = vext_vm(desc); \
4069 uint32_t vl = env->vl; \
4070 uint32_t i; \
4071 \
f714361e 4072 for (i = env->vstart; i < vl; i++) { \
f9298de5 4073 if (!vm && !vext_elem_mask(v0, i)) { \
121ddbb3
LZ
4074 continue; \
4075 } \
4076 do_##NAME(vd, vs2, i); \
4077 } \
f714361e 4078 env->vstart = 0; \
121ddbb3
LZ
4079}
4080
4081target_ulong fclass_h(uint64_t frs1)
4082{
4083 float16 f = frs1;
4084 bool sign = float16_is_neg(f);
4085
4086 if (float16_is_infinity(f)) {
4087 return sign ? 1 << 0 : 1 << 7;
4088 } else if (float16_is_zero(f)) {
4089 return sign ? 1 << 3 : 1 << 4;
4090 } else if (float16_is_zero_or_denormal(f)) {
4091 return sign ? 1 << 2 : 1 << 5;
4092 } else if (float16_is_any_nan(f)) {
4093 float_status s = { }; /* for snan_bit_is_one */
4094 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4095 } else {
4096 return sign ? 1 << 1 : 1 << 6;
4097 }
4098}
4099
4100target_ulong fclass_s(uint64_t frs1)
4101{
4102 float32 f = frs1;
4103 bool sign = float32_is_neg(f);
4104
4105 if (float32_is_infinity(f)) {
4106 return sign ? 1 << 0 : 1 << 7;
4107 } else if (float32_is_zero(f)) {
4108 return sign ? 1 << 3 : 1 << 4;
4109 } else if (float32_is_zero_or_denormal(f)) {
4110 return sign ? 1 << 2 : 1 << 5;
4111 } else if (float32_is_any_nan(f)) {
4112 float_status s = { }; /* for snan_bit_is_one */
4113 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4114 } else {
4115 return sign ? 1 << 1 : 1 << 6;
4116 }
4117}
4118
4119target_ulong fclass_d(uint64_t frs1)
4120{
4121 float64 f = frs1;
4122 bool sign = float64_is_neg(f);
4123
4124 if (float64_is_infinity(f)) {
4125 return sign ? 1 << 0 : 1 << 7;
4126 } else if (float64_is_zero(f)) {
4127 return sign ? 1 << 3 : 1 << 4;
4128 } else if (float64_is_zero_or_denormal(f)) {
4129 return sign ? 1 << 2 : 1 << 5;
4130 } else if (float64_is_any_nan(f)) {
4131 float_status s = { }; /* for snan_bit_is_one */
4132 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4133 } else {
4134 return sign ? 1 << 1 : 1 << 6;
4135 }
4136}
4137
4138RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4139RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4140RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
8a085fb2 4141GEN_VEXT_V(vfclass_v_h)
4142GEN_VEXT_V(vfclass_v_w)
4143GEN_VEXT_V(vfclass_v_d)
64ab5846
LZ
4144
4145/* Vector Floating-Point Merge Instruction */
3479a814 4146#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4147void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4148 CPURISCVState *env, uint32_t desc) \
4149{ \
64ab5846
LZ
4150 uint32_t vm = vext_vm(desc); \
4151 uint32_t vl = env->vl; \
64ab5846
LZ
4152 uint32_t i; \
4153 \
f714361e 4154 for (i = env->vstart; i < vl; i++) { \
64ab5846
LZ
4155 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4156 *((ETYPE *)vd + H(i)) \
f9298de5 4157 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4158 } \
f714361e 4159 env->vstart = 0; \
64ab5846
LZ
4160}
4161
3479a814
FC
4162GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4163GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4164GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4165
4166/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4167/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4168RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4169RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4170RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
8a085fb2 4171GEN_VEXT_V_ENV(vfcvt_xu_f_v_h)
4172GEN_VEXT_V_ENV(vfcvt_xu_f_v_w)
4173GEN_VEXT_V_ENV(vfcvt_xu_f_v_d)
92100973
LZ
4174
4175/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4176RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4177RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4178RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
8a085fb2 4179GEN_VEXT_V_ENV(vfcvt_x_f_v_h)
4180GEN_VEXT_V_ENV(vfcvt_x_f_v_w)
4181GEN_VEXT_V_ENV(vfcvt_x_f_v_d)
92100973
LZ
4182
4183/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4184RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4185RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4186RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
8a085fb2 4187GEN_VEXT_V_ENV(vfcvt_f_xu_v_h)
4188GEN_VEXT_V_ENV(vfcvt_f_xu_v_w)
4189GEN_VEXT_V_ENV(vfcvt_f_xu_v_d)
92100973
LZ
4190
4191/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4192RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4193RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4194RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
8a085fb2 4195GEN_VEXT_V_ENV(vfcvt_f_x_v_h)
4196GEN_VEXT_V_ENV(vfcvt_f_x_v_w)
4197GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
4514b7b1
LZ
4198
4199/* Widening Floating-Point/Integer Type-Convert Instructions */
4200/* (TD, T2, TX2) */
3ce4c09d 4201#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4202#define WOP_UU_H uint32_t, uint16_t, uint16_t
4203#define WOP_UU_W uint64_t, uint32_t, uint32_t
4204/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4205RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4206RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
8a085fb2 4207GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h)
4208GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w)
4514b7b1
LZ
4209
4210/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4211RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4212RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
8a085fb2 4213GEN_VEXT_V_ENV(vfwcvt_x_f_v_h)
4214GEN_VEXT_V_ENV(vfwcvt_x_f_v_w)
4514b7b1
LZ
4215
4216/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3ce4c09d 4217RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4218RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4219RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
8a085fb2 4220GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b)
4221GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h)
4222GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w)
4514b7b1
LZ
4223
4224/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4225RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4226RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4227RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
8a085fb2 4228GEN_VEXT_V_ENV(vfwcvt_f_x_v_b)
4229GEN_VEXT_V_ENV(vfwcvt_f_x_v_h)
4230GEN_VEXT_V_ENV(vfwcvt_f_x_v_w)
4514b7b1
LZ
4231
4232/*
3ce4c09d 4233 * vfwcvt.f.f.v vd, vs2, vm
4514b7b1
LZ
4234 * Convert single-width float to double-width float.
4235 */
4236static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4237{
4238 return float16_to_float32(a, true, s);
4239}
4240
4241RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4242RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
8a085fb2 4243GEN_VEXT_V_ENV(vfwcvt_f_f_v_h)
4244GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
878d406e
LZ
4245
4246/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4247/* (TD, T2, TX2) */
ff679b58 4248#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4249#define NOP_UU_H uint16_t, uint32_t, uint32_t
4250#define NOP_UU_W uint32_t, uint64_t, uint64_t
4251/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4252RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4253RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4254RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
8a085fb2 4255GEN_VEXT_V_ENV(vfncvt_xu_f_w_b)
4256GEN_VEXT_V_ENV(vfncvt_xu_f_w_h)
4257GEN_VEXT_V_ENV(vfncvt_xu_f_w_w)
878d406e
LZ
4258
4259/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4260RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4261RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4262RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
8a085fb2 4263GEN_VEXT_V_ENV(vfncvt_x_f_w_b)
4264GEN_VEXT_V_ENV(vfncvt_x_f_w_h)
4265GEN_VEXT_V_ENV(vfncvt_x_f_w_w)
878d406e
LZ
4266
4267/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
ff679b58
FC
4268RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4269RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
8a085fb2 4270GEN_VEXT_V_ENV(vfncvt_f_xu_w_h)
4271GEN_VEXT_V_ENV(vfncvt_f_xu_w_w)
878d406e
LZ
4272
4273/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4274RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4275RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
8a085fb2 4276GEN_VEXT_V_ENV(vfncvt_f_x_w_h)
4277GEN_VEXT_V_ENV(vfncvt_f_x_w_w)
878d406e
LZ
4278
4279/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4280static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4281{
4282 return float32_to_float16(a, true, s);
4283}
4284
ff679b58
FC
4285RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4286RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
8a085fb2 4287GEN_VEXT_V_ENV(vfncvt_f_f_w_h)
4288GEN_VEXT_V_ENV(vfncvt_f_f_w_w)
fe5c9ab1
LZ
4289
4290/*
4291 *** Vector Reduction Operations
4292 */
4293/* Vector Single-Width Integer Reduction Instructions */
3479a814 4294#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1
LZ
4295void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4296 void *vs2, CPURISCVState *env, uint32_t desc) \
4297{ \
fe5c9ab1
LZ
4298 uint32_t vm = vext_vm(desc); \
4299 uint32_t vl = env->vl; \
4300 uint32_t i; \
fe5c9ab1
LZ
4301 TD s1 = *((TD *)vs1 + HD(0)); \
4302 \
f714361e 4303 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4304 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4305 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4306 continue; \
4307 } \
4308 s1 = OP(s1, (TD)s2); \
4309 } \
4310 *((TD *)vd + HD(0)) = s1; \
f714361e 4311 env->vstart = 0; \
fe5c9ab1
LZ
4312}
4313
4314/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4315GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4316GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4317GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4318GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4319
4320/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4321GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4322GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4323GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4324GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4325
4326/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4327GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4328GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4329GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4330GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4331
4332/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4333GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4334GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4335GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4336GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4337
4338/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4339GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4340GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4341GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4342GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4343
4344/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4345GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4346GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4347GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4348GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4349
4350/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4351GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4352GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4353GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4354GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4355
4356/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4357GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4358GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4359GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4360GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4361
4362/* Vector Widening Integer Reduction Instructions */
4363/* signed sum reduction into double-width accumulator */
3479a814
FC
4364GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4365GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4366GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4367
4368/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4369GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4370GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4371GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4372
4373/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4374#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4375void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4376 void *vs2, CPURISCVState *env, \
4377 uint32_t desc) \
4378{ \
523547f1
LZ
4379 uint32_t vm = vext_vm(desc); \
4380 uint32_t vl = env->vl; \
4381 uint32_t i; \
523547f1
LZ
4382 TD s1 = *((TD *)vs1 + HD(0)); \
4383 \
f714361e 4384 for (i = env->vstart; i < vl; i++) { \
523547f1 4385 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4386 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4387 continue; \
4388 } \
4389 s1 = OP(s1, (TD)s2, &env->fp_status); \
4390 } \
4391 *((TD *)vd + HD(0)) = s1; \
f714361e 4392 env->vstart = 0; \
523547f1
LZ
4393}
4394
4395/* Unordered sum */
3479a814
FC
4396GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4397GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4398GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4399
4400/* Maximum value */
08b60eeb
FC
4401GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4402GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4403GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
523547f1
LZ
4404
4405/* Minimum value */
08b60eeb
FC
4406GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4407GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4408GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
696b0c26
LZ
4409
4410/* Vector Widening Floating-Point Reduction Instructions */
4411/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4412void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4413 void *vs2, CPURISCVState *env, uint32_t desc)
4414{
696b0c26
LZ
4415 uint32_t vm = vext_vm(desc);
4416 uint32_t vl = env->vl;
4417 uint32_t i;
696b0c26
LZ
4418 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4419
f714361e 4420 for (i = env->vstart; i < vl; i++) {
696b0c26 4421 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
f9298de5 4422 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4423 continue;
4424 }
4425 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4426 &env->fp_status);
4427 }
4428 *((uint32_t *)vd + H4(0)) = s1;
f714361e 4429 env->vstart = 0;
696b0c26
LZ
4430}
4431
4432void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4433 void *vs2, CPURISCVState *env, uint32_t desc)
4434{
696b0c26
LZ
4435 uint32_t vm = vext_vm(desc);
4436 uint32_t vl = env->vl;
4437 uint32_t i;
696b0c26
LZ
4438 uint64_t s1 = *((uint64_t *)vs1);
4439
f714361e 4440 for (i = env->vstart; i < vl; i++) {
696b0c26 4441 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
f9298de5 4442 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4443 continue;
4444 }
4445 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4446 &env->fp_status);
4447 }
4448 *((uint64_t *)vd) = s1;
f714361e 4449 env->vstart = 0;
696b0c26 4450}
c21f34ae
LZ
4451
4452/*
4453 *** Vector Mask Operations
4454 */
4455/* Vector Mask-Register Logical Instructions */
4456#define GEN_VEXT_MASK_VV(NAME, OP) \
4457void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4458 void *vs2, CPURISCVState *env, \
4459 uint32_t desc) \
4460{ \
c21f34ae
LZ
4461 uint32_t vl = env->vl; \
4462 uint32_t i; \
4463 int a, b; \
4464 \
f714361e 4465 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4466 a = vext_elem_mask(vs1, i); \
4467 b = vext_elem_mask(vs2, i); \
4468 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4469 } \
f714361e 4470 env->vstart = 0; \
c21f34ae
LZ
4471}
4472
4473#define DO_NAND(N, M) (!(N & M))
4474#define DO_ANDNOT(N, M) (N & !M)
4475#define DO_NOR(N, M) (!(N | M))
4476#define DO_ORNOT(N, M) (N | !M)
4477#define DO_XNOR(N, M) (!(N ^ M))
4478
4479GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4480GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4481GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4482GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4483GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4484GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4485GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4486GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4487
0014aa74
FC
4488/* Vector count population in mask vcpop */
4489target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4490 uint32_t desc)
2e88f551
LZ
4491{
4492 target_ulong cnt = 0;
2e88f551
LZ
4493 uint32_t vm = vext_vm(desc);
4494 uint32_t vl = env->vl;
4495 int i;
4496
f714361e 4497 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4498 if (vm || vext_elem_mask(v0, i)) {
4499 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4500 cnt++;
4501 }
4502 }
4503 }
f714361e 4504 env->vstart = 0;
2e88f551
LZ
4505 return cnt;
4506}
0db67e1c 4507
d71a24fc
FC
4508/* vfirst find-first-set mask bit*/
4509target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4510 uint32_t desc)
0db67e1c 4511{
0db67e1c
LZ
4512 uint32_t vm = vext_vm(desc);
4513 uint32_t vl = env->vl;
4514 int i;
4515
f714361e 4516 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4517 if (vm || vext_elem_mask(v0, i)) {
4518 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4519 return i;
4520 }
4521 }
4522 }
f714361e 4523 env->vstart = 0;
0db67e1c
LZ
4524 return -1LL;
4525}
81fbf7da
LZ
4526
4527enum set_mask_type {
4528 ONLY_FIRST = 1,
4529 INCLUDE_FIRST,
4530 BEFORE_FIRST,
4531};
4532
4533static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4534 uint32_t desc, enum set_mask_type type)
4535{
81fbf7da
LZ
4536 uint32_t vm = vext_vm(desc);
4537 uint32_t vl = env->vl;
4538 int i;
4539 bool first_mask_bit = false;
4540
f714361e 4541 for (i = env->vstart; i < vl; i++) {
f9298de5 4542 if (!vm && !vext_elem_mask(v0, i)) {
81fbf7da
LZ
4543 continue;
4544 }
4545 /* write a zero to all following active elements */
4546 if (first_mask_bit) {
f9298de5 4547 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4548 continue;
4549 }
f9298de5 4550 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4551 first_mask_bit = true;
4552 if (type == BEFORE_FIRST) {
f9298de5 4553 vext_set_elem_mask(vd, i, 0);
81fbf7da 4554 } else {
f9298de5 4555 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4556 }
4557 } else {
4558 if (type == ONLY_FIRST) {
f9298de5 4559 vext_set_elem_mask(vd, i, 0);
81fbf7da 4560 } else {
f9298de5 4561 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4562 }
4563 }
4564 }
f714361e 4565 env->vstart = 0;
81fbf7da
LZ
4566}
4567
4568void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4569 uint32_t desc)
4570{
4571 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4572}
4573
4574void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4575 uint32_t desc)
4576{
4577 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4578}
4579
4580void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4581 uint32_t desc)
4582{
4583 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4584}
78d90cfe
LZ
4585
4586/* Vector Iota Instruction */
3479a814 4587#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4588void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4589 uint32_t desc) \
4590{ \
78d90cfe
LZ
4591 uint32_t vm = vext_vm(desc); \
4592 uint32_t vl = env->vl; \
4593 uint32_t sum = 0; \
4594 int i; \
4595 \
f714361e 4596 for (i = env->vstart; i < vl; i++) { \
f9298de5 4597 if (!vm && !vext_elem_mask(v0, i)) { \
78d90cfe
LZ
4598 continue; \
4599 } \
4600 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4601 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4602 sum++; \
4603 } \
4604 } \
f714361e 4605 env->vstart = 0; \
78d90cfe
LZ
4606}
4607
3479a814
FC
4608GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4609GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4610GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4611GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4612
4613/* Vector Element Index Instruction */
3479a814 4614#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4615void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4616{ \
126bec3f
LZ
4617 uint32_t vm = vext_vm(desc); \
4618 uint32_t vl = env->vl; \
4619 int i; \
4620 \
f714361e 4621 for (i = env->vstart; i < vl; i++) { \
f9298de5 4622 if (!vm && !vext_elem_mask(v0, i)) { \
126bec3f
LZ
4623 continue; \
4624 } \
4625 *((ETYPE *)vd + H(i)) = i; \
4626 } \
f714361e 4627 env->vstart = 0; \
126bec3f
LZ
4628}
4629
3479a814
FC
4630GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4631GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4632GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4633GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4634
4635/*
4636 *** Vector Permutation Instructions
4637 */
4638
4639/* Vector Slide Instructions */
3479a814 4640#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4641void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4642 CPURISCVState *env, uint32_t desc) \
4643{ \
ec17e036
LZ
4644 uint32_t vm = vext_vm(desc); \
4645 uint32_t vl = env->vl; \
f714361e 4646 target_ulong offset = s1, i_min, i; \
ec17e036 4647 \
f714361e
FC
4648 i_min = MAX(env->vstart, offset); \
4649 for (i = i_min; i < vl; i++) { \
f9298de5 4650 if (!vm && !vext_elem_mask(v0, i)) { \
ec17e036
LZ
4651 continue; \
4652 } \
4653 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4654 } \
ec17e036
LZ
4655}
4656
4657/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4658GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4659GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4660GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4661GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4662
3479a814 4663#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4664void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4665 CPURISCVState *env, uint32_t desc) \
4666{ \
6438ed61 4667 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4668 uint32_t vm = vext_vm(desc); \
4669 uint32_t vl = env->vl; \
6438ed61 4670 target_ulong i_max, i; \
ec17e036 4671 \
f714361e
FC
4672 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4673 for (i = env->vstart; i < i_max; ++i) { \
6438ed61
FC
4674 if (vm || vext_elem_mask(v0, i)) { \
4675 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4676 } \
4677 } \
4678 \
4679 for (i = i_max; i < vl; ++i) { \
4680 if (vm || vext_elem_mask(v0, i)) { \
4681 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4682 } \
ec17e036 4683 } \
f714361e
FC
4684 \
4685 env->vstart = 0; \
ec17e036
LZ
4686}
4687
4688/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4689GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4690GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4691GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4692GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4693
8500d4ab
FC
4694#define GEN_VEXT_VSLIE1UP(ESZ, H) \
4695static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4696 CPURISCVState *env, uint32_t desc) \
4697{ \
4698 typedef uint##ESZ##_t ETYPE; \
4699 uint32_t vm = vext_vm(desc); \
4700 uint32_t vl = env->vl; \
4701 uint32_t i; \
4702 \
f714361e 4703 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
4704 if (!vm && !vext_elem_mask(v0, i)) { \
4705 continue; \
4706 } \
4707 if (i == 0) { \
4708 *((ETYPE *)vd + H(i)) = s1; \
4709 } else { \
4710 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4711 } \
4712 } \
f714361e 4713 env->vstart = 0; \
8500d4ab
FC
4714}
4715
4716GEN_VEXT_VSLIE1UP(8, H1)
4717GEN_VEXT_VSLIE1UP(16, H2)
4718GEN_VEXT_VSLIE1UP(32, H4)
4719GEN_VEXT_VSLIE1UP(64, H8)
4720
4721#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \
4722void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4723 CPURISCVState *env, uint32_t desc) \
4724{ \
4725 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4726}
4727
4728/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
4729GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4730GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4731GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4732GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4733
4734#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \
4735static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4736 CPURISCVState *env, uint32_t desc) \
4737{ \
4738 typedef uint##ESZ##_t ETYPE; \
4739 uint32_t vm = vext_vm(desc); \
4740 uint32_t vl = env->vl; \
4741 uint32_t i; \
4742 \
f714361e 4743 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
4744 if (!vm && !vext_elem_mask(v0, i)) { \
4745 continue; \
4746 } \
4747 if (i == vl - 1) { \
4748 *((ETYPE *)vd + H(i)) = s1; \
4749 } else { \
4750 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4751 } \
4752 } \
f714361e 4753 env->vstart = 0; \
8500d4ab
FC
4754}
4755
4756GEN_VEXT_VSLIDE1DOWN(8, H1)
4757GEN_VEXT_VSLIDE1DOWN(16, H2)
4758GEN_VEXT_VSLIDE1DOWN(32, H4)
4759GEN_VEXT_VSLIDE1DOWN(64, H8)
4760
4761#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \
4762void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4763 CPURISCVState *env, uint32_t desc) \
4764{ \
4765 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4766}
4767
4768/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
4769GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4770GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4771GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4772GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4773
4774/* Vector Floating-Point Slide Instructions */
4775#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \
4776void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4777 CPURISCVState *env, uint32_t desc) \
4778{ \
4779 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
4780}
4781
4782/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4783GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4784GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4785GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4786
4787#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \
4788void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4789 CPURISCVState *env, uint32_t desc) \
4790{ \
4791 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
4792}
4793
4794/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4795GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4796GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4797GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
4798
4799/* Vector Register Gather Instruction */
50bfb45b 4800#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
4801void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4802 CPURISCVState *env, uint32_t desc) \
4803{ \
f714361e 4804 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
4805 uint32_t vm = vext_vm(desc); \
4806 uint32_t vl = env->vl; \
b11e84b8
FC
4807 uint64_t index; \
4808 uint32_t i; \
e4b83d5c 4809 \
f714361e 4810 for (i = env->vstart; i < vl; i++) { \
f9298de5 4811 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
4812 continue; \
4813 } \
50bfb45b 4814 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 4815 if (index >= vlmax) { \
50bfb45b 4816 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 4817 } else { \
50bfb45b 4818 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
4819 } \
4820 } \
f714361e 4821 env->vstart = 0; \
e4b83d5c
LZ
4822}
4823
4824/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
4825GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4826GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4827GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4828GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4829
4830GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
4831GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4832GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4833GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 4834
3479a814 4835#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
4836void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4837 CPURISCVState *env, uint32_t desc) \
4838{ \
5a9f8e15 4839 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
4840 uint32_t vm = vext_vm(desc); \
4841 uint32_t vl = env->vl; \
b11e84b8
FC
4842 uint64_t index = s1; \
4843 uint32_t i; \
e4b83d5c 4844 \
f714361e 4845 for (i = env->vstart; i < vl; i++) { \
f9298de5 4846 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
4847 continue; \
4848 } \
4849 if (index >= vlmax) { \
4850 *((ETYPE *)vd + H(i)) = 0; \
4851 } else { \
4852 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
4853 } \
4854 } \
f714361e 4855 env->vstart = 0; \
e4b83d5c
LZ
4856}
4857
4858/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
4859GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
4860GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4861GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4862GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
4863
4864/* Vector Compress Instruction */
3479a814 4865#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
4866void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4867 CPURISCVState *env, uint32_t desc) \
4868{ \
31bf42a2
LZ
4869 uint32_t vl = env->vl; \
4870 uint32_t num = 0, i; \
4871 \
f714361e 4872 for (i = env->vstart; i < vl; i++) { \
f9298de5 4873 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
4874 continue; \
4875 } \
4876 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
4877 num++; \
4878 } \
f714361e 4879 env->vstart = 0; \
31bf42a2
LZ
4880}
4881
4882/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
4883GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
4884GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4885GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4886GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 4887
f714361e 4888/* Vector Whole Register Move */
f32d82f6
WL
4889void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
4890{
f06193c4 4891 /* EEW = SEW */
f32d82f6 4892 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
4893 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
4894 uint32_t startb = env->vstart * sewb;
4895 uint32_t i = startb;
f32d82f6
WL
4896
4897 memcpy((uint8_t *)vd + H1(i),
4898 (uint8_t *)vs2 + H1(i),
f06193c4 4899 maxsz - startb);
f714361e 4900
f32d82f6
WL
4901 env->vstart = 0;
4902}
f714361e 4903
cd01340e
FC
4904/* Vector Integer Extension */
4905#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
4906void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4907 CPURISCVState *env, uint32_t desc) \
4908{ \
4909 uint32_t vl = env->vl; \
4910 uint32_t vm = vext_vm(desc); \
4911 uint32_t i; \
4912 \
f714361e 4913 for (i = env->vstart; i < vl; i++) { \
cd01340e
FC
4914 if (!vm && !vext_elem_mask(v0, i)) { \
4915 continue; \
4916 } \
4917 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
4918 } \
f714361e 4919 env->vstart = 0; \
cd01340e
FC
4920}
4921
4922GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
4923GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4924GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4925GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
4926GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4927GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
4928
4929GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
4930GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4931GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4932GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
4933GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4934GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)