]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
target/riscv: gdb: support vector registers for rv64 & rv32
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
2b7168fc 21#include "cpu.h"
751538d5 22#include "exec/memop.h"
2b7168fc
LZ
23#include "exec/exec-all.h"
24#include "exec/helper-proto.h"
ce2a0343 25#include "fpu/softfloat.h"
751538d5
LZ
26#include "tcg/tcg-gvec-desc.h"
27#include "internals.h"
2b7168fc
LZ
28#include <math.h>
29
30target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
31 target_ulong s2)
32{
33 int vlmax, vl;
34 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 35 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
36 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
37 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
38 bool vill = FIELD_EX64(s2, VTYPE, VILL);
39 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
40
d9b7609a
FC
41 if (lmul & 4) {
42 /* Fractional LMUL. */
43 if (lmul == 4 ||
44 cpu->cfg.elen >> (8 - lmul) < sew) {
45 vill = true;
46 }
47 }
48
49 if ((sew > cpu->cfg.elen)
50 || vill
51 || (ediv != 0)
52 || (reserved != 0)) {
2b7168fc
LZ
53 /* only set vill bit. */
54 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
55 env->vl = 0;
56 env->vstart = 0;
57 return 0;
58 }
59
60 vlmax = vext_get_vlmax(cpu, s2);
61 if (s1 <= vlmax) {
62 vl = s1;
63 } else {
64 vl = vlmax;
65 }
66 env->vl = vl;
67 env->vtype = s2;
68 env->vstart = 0;
69 return vl;
70}
751538d5
LZ
71
72/*
73 * Note that vector data is stored in host-endian 64-bit chunks,
74 * so addressing units smaller than that needs a host-endian fixup.
75 */
76#ifdef HOST_WORDS_BIGENDIAN
77#define H1(x) ((x) ^ 7)
78#define H1_2(x) ((x) ^ 6)
79#define H1_4(x) ((x) ^ 4)
80#define H2(x) ((x) ^ 3)
81#define H4(x) ((x) ^ 1)
82#define H8(x) ((x))
83#else
84#define H1(x) (x)
85#define H1_2(x) (x)
86#define H1_4(x) (x)
87#define H2(x) (x)
88#define H4(x) (x)
89#define H8(x) (x)
90#endif
91
92static inline uint32_t vext_nf(uint32_t desc)
93{
94 return FIELD_EX32(simd_data(desc), VDATA, NF);
95}
96
751538d5
LZ
97static inline uint32_t vext_vm(uint32_t desc)
98{
99 return FIELD_EX32(simd_data(desc), VDATA, VM);
100}
101
33f1beaf
FC
102/*
103 * Encode LMUL to lmul as following:
104 * LMUL vlmul lmul
105 * 1 000 0
106 * 2 001 1
107 * 4 010 2
108 * 8 011 3
109 * - 100 -
110 * 1/8 101 -3
111 * 1/4 110 -2
112 * 1/2 111 -1
113 */
114static inline int32_t vext_lmul(uint32_t desc)
751538d5 115{
33f1beaf 116 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
117}
118
119/*
5a9f8e15 120 * Get the maximum number of elements can be operated.
751538d5 121 *
5a9f8e15 122 * esz: log2 of element size in bytes.
751538d5 123 */
5a9f8e15 124static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
751538d5 125{
5a9f8e15 126 /*
8a4b5257 127 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
128 * so vlen in bytes (vlenb) is encoded as maxsz.
129 */
130 uint32_t vlenb = simd_maxsz(desc);
131
132 /* Return VLMAX */
133 int scale = vext_lmul(desc) - esz;
134 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
135}
136
137/*
138 * This function checks watchpoint before real load operation.
139 *
140 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
141 * In user mode, there is no watchpoint support now.
142 *
143 * It will trigger an exception if there is no mapping in TLB
144 * and page table walk can't fill the TLB entry. Then the guest
145 * software can return here after process the exception or never return.
146 */
147static void probe_pages(CPURISCVState *env, target_ulong addr,
148 target_ulong len, uintptr_t ra,
149 MMUAccessType access_type)
150{
151 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
152 target_ulong curlen = MIN(pagelen, len);
153
154 probe_access(env, addr, curlen, access_type,
155 cpu_mmu_index(env, false), ra);
156 if (len > curlen) {
157 addr += curlen;
158 curlen = len - curlen;
159 probe_access(env, addr, curlen, access_type,
160 cpu_mmu_index(env, false), ra);
161 }
162}
163
f9298de5
FC
164static inline void vext_set_elem_mask(void *v0, int index,
165 uint8_t value)
3a6f8f68 166{
f9298de5
FC
167 int idx = index / 64;
168 int pos = index % 64;
3a6f8f68 169 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 170 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 171}
751538d5 172
f9298de5
FC
173/*
174 * Earlier designs (pre-0.9) had a varying number of bits
175 * per mask value (MLEN). In the 0.9 design, MLEN=1.
176 * (Section 4.5)
177 */
178static inline int vext_elem_mask(void *v0, int index)
751538d5 179{
f9298de5
FC
180 int idx = index / 64;
181 int pos = index % 64;
751538d5
LZ
182 return (((uint64_t *)v0)[idx] >> pos) & 1;
183}
184
185/* elements operations for load and store */
186typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
187 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 188
79556fb6 189#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
190static void NAME(CPURISCVState *env, abi_ptr addr, \
191 uint32_t idx, void *vd, uintptr_t retaddr)\
192{ \
751538d5 193 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 194 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
195} \
196
79556fb6
FC
197GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
198GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
199GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
200GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
201
202#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
203static void NAME(CPURISCVState *env, abi_ptr addr, \
204 uint32_t idx, void *vd, uintptr_t retaddr)\
205{ \
206 ETYPE data = *((ETYPE *)vd + H(idx)); \
207 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
208}
209
751538d5
LZ
210GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
211GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
212GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
213GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
214
215/*
216 *** stride: access vector element from strided memory
217 */
218static void
219vext_ldst_stride(void *vd, void *v0, target_ulong base,
220 target_ulong stride, CPURISCVState *env,
221 uint32_t desc, uint32_t vm,
3479a814 222 vext_ldst_elem_fn *ldst_elem,
79556fb6 223 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
751538d5
LZ
224{
225 uint32_t i, k;
226 uint32_t nf = vext_nf(desc);
5a9f8e15 227 uint32_t max_elems = vext_max_elems(desc, esz);
751538d5 228
f714361e 229 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f9298de5 230 if (!vm && !vext_elem_mask(v0, i)) {
751538d5
LZ
231 continue;
232 }
f714361e 233
751538d5 234 k = 0;
751538d5 235 while (k < nf) {
5a9f8e15
FC
236 target_ulong addr = base + stride * i + (k << esz);
237 ldst_elem(env, addr, i + k * max_elems, vd, ra);
751538d5
LZ
238 k++;
239 }
240 }
f714361e 241 env->vstart = 0;
751538d5
LZ
242}
243
79556fb6 244#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
245void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
246 target_ulong stride, CPURISCVState *env, \
247 uint32_t desc) \
248{ \
249 uint32_t vm = vext_vm(desc); \
250 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
5a9f8e15 251 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
252}
253
79556fb6
FC
254GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
255GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
256GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
257GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
258
259#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
260void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
261 target_ulong stride, CPURISCVState *env, \
262 uint32_t desc) \
263{ \
264 uint32_t vm = vext_vm(desc); \
265 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
5a9f8e15 266 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
751538d5
LZ
267}
268
79556fb6
FC
269GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
270GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
271GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
272GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
273
274/*
275 *** unit-stride: access elements stored contiguously in memory
276 */
277
278/* unmasked unit-stride load and store operation*/
279static void
280vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
79556fb6
FC
281 vext_ldst_elem_fn *ldst_elem,
282 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
751538d5
LZ
283{
284 uint32_t i, k;
285 uint32_t nf = vext_nf(desc);
5a9f8e15 286 uint32_t max_elems = vext_max_elems(desc, esz);
751538d5 287
751538d5 288 /* load bytes from guest memory */
f714361e 289 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
751538d5
LZ
290 k = 0;
291 while (k < nf) {
5a9f8e15
FC
292 target_ulong addr = base + ((i * nf + k) << esz);
293 ldst_elem(env, addr, i + k * max_elems, vd, ra);
751538d5
LZ
294 k++;
295 }
296 }
f714361e 297 env->vstart = 0;
751538d5
LZ
298}
299
300/*
301 * masked unit-stride load and store operation will be a special case of stride,
302 * stride = NF * sizeof (MTYPE)
303 */
304
79556fb6 305#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
306void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
307 CPURISCVState *env, uint32_t desc) \
308{ \
5a9f8e15 309 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 310 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
5a9f8e15 311 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
312} \
313 \
314void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
315 CPURISCVState *env, uint32_t desc) \
316{ \
3479a814 317 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
5a9f8e15 318 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
319}
320
79556fb6
FC
321GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
322GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
323GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
324GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
325
326#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
751538d5
LZ
327void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
328 CPURISCVState *env, uint32_t desc) \
329{ \
5a9f8e15 330 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 331 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
5a9f8e15 332 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
751538d5
LZ
333} \
334 \
335void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
336 CPURISCVState *env, uint32_t desc) \
337{ \
3479a814 338 vext_ldst_us(vd, base, env, desc, STORE_FN, \
5a9f8e15 339 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
751538d5
LZ
340}
341
79556fb6
FC
342GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
343GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
344GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
345GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e
LZ
346
347/*
348 *** index: access vector element from indexed memory
349 */
350typedef target_ulong vext_get_index_addr(target_ulong base,
351 uint32_t idx, void *vs2);
352
353#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
354static target_ulong NAME(target_ulong base, \
355 uint32_t idx, void *vs2) \
356{ \
357 return (base + *((ETYPE *)vs2 + H(idx))); \
358}
359
83fcd573
FC
360GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
361GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
362GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
363GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
364
365static inline void
366vext_ldst_index(void *vd, void *v0, target_ulong base,
367 void *vs2, CPURISCVState *env, uint32_t desc,
368 vext_get_index_addr get_index_addr,
369 vext_ldst_elem_fn *ldst_elem,
08b9d0ed 370 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
f732560e
LZ
371{
372 uint32_t i, k;
373 uint32_t nf = vext_nf(desc);
374 uint32_t vm = vext_vm(desc);
5a9f8e15 375 uint32_t max_elems = vext_max_elems(desc, esz);
f732560e 376
f732560e 377 /* load bytes from guest memory */
f714361e 378 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f9298de5 379 if (!vm && !vext_elem_mask(v0, i)) {
f732560e
LZ
380 continue;
381 }
f714361e
FC
382
383 k = 0;
f732560e 384 while (k < nf) {
5a9f8e15
FC
385 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
386 ldst_elem(env, addr, i + k * max_elems, vd, ra);
f732560e
LZ
387 k++;
388 }
389 }
f714361e 390 env->vstart = 0;
f732560e
LZ
391}
392
08b9d0ed 393#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
394void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
395 void *vs2, CPURISCVState *env, uint32_t desc) \
396{ \
397 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 398 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
f732560e
LZ
399}
400
08b9d0ed
FC
401GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
402GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
403GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
404GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
405GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
406GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
407GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
408GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
409GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
410GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
411GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
412GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
413GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
414GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
415GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
416GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
417
418#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
419void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
420 void *vs2, CPURISCVState *env, uint32_t desc) \
421{ \
422 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 423 STORE_FN, ctzl(sizeof(ETYPE)), \
f732560e
LZ
424 GETPC(), MMU_DATA_STORE); \
425}
426
08b9d0ed
FC
427GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
428GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
429GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
430GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
431GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
432GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
433GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
434GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
435GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
436GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
437GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
438GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
439GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
440GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
441GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
442GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
443
444/*
445 *** unit-stride fault-only-fisrt load instructions
446 */
447static inline void
448vext_ldff(void *vd, void *v0, target_ulong base,
449 CPURISCVState *env, uint32_t desc,
450 vext_ldst_elem_fn *ldst_elem,
d3e5e2ff 451 uint32_t esz, uintptr_t ra)
022b4ecf
LZ
452{
453 void *host;
454 uint32_t i, k, vl = 0;
022b4ecf
LZ
455 uint32_t nf = vext_nf(desc);
456 uint32_t vm = vext_vm(desc);
5a9f8e15 457 uint32_t max_elems = vext_max_elems(desc, esz);
022b4ecf
LZ
458 target_ulong addr, offset, remain;
459
460 /* probe every access*/
f714361e 461 for (i = env->vstart; i < env->vl; i++) {
f9298de5 462 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
463 continue;
464 }
5a9f8e15 465 addr = base + i * (nf << esz);
022b4ecf 466 if (i == 0) {
5a9f8e15 467 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
468 } else {
469 /* if it triggers an exception, no need to check watchpoint */
5a9f8e15 470 remain = nf << esz;
022b4ecf
LZ
471 while (remain > 0) {
472 offset = -(addr | TARGET_PAGE_MASK);
473 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
474 cpu_mmu_index(env, false));
475 if (host) {
476#ifdef CONFIG_USER_ONLY
5a9f8e15 477 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
022b4ecf
LZ
478 vl = i;
479 goto ProbeSuccess;
480 }
481#else
5a9f8e15 482 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
483#endif
484 } else {
485 vl = i;
486 goto ProbeSuccess;
487 }
488 if (remain <= offset) {
489 break;
490 }
491 remain -= offset;
492 addr += offset;
493 }
494 }
495 }
496ProbeSuccess:
497 /* load bytes from guest memory */
498 if (vl != 0) {
499 env->vl = vl;
500 }
f714361e 501 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 502 k = 0;
f9298de5 503 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
504 continue;
505 }
506 while (k < nf) {
5a9f8e15
FC
507 target_ulong addr = base + ((i * nf + k) << esz);
508 ldst_elem(env, addr, i + k * max_elems, vd, ra);
022b4ecf
LZ
509 k++;
510 }
511 }
f714361e 512 env->vstart = 0;
022b4ecf
LZ
513}
514
d3e5e2ff
FC
515#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
516void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
517 CPURISCVState *env, uint32_t desc) \
518{ \
519 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 520 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
521}
522
d3e5e2ff
FC
523GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
524GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
525GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
526GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 527
268fcca6
LZ
528#define DO_SWAP(N, M) (M)
529#define DO_AND(N, M) (N & M)
530#define DO_XOR(N, M) (N ^ M)
531#define DO_OR(N, M) (N | M)
532#define DO_ADD(N, M) (N + M)
533
268fcca6
LZ
534/* Signed min/max */
535#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
536#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
537
538/* Unsigned min/max */
539#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
540#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
541
30206bd8
FC
542/*
543 *** load and store whole register instructions
544 */
545static void
546vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
547 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
548 MMUAccessType access_type)
549{
f714361e 550 uint32_t i, k, off, pos;
30206bd8
FC
551 uint32_t nf = vext_nf(desc);
552 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
553 uint32_t max_elems = vlenb >> esz;
554
f714361e
FC
555 k = env->vstart / max_elems;
556 off = env->vstart % max_elems;
30206bd8 557
f714361e
FC
558 if (off) {
559 /* load/store rest of elements of current segment pointed by vstart */
560 for (pos = off; pos < max_elems; pos++, env->vstart++) {
561 target_ulong addr = base + ((pos + k * max_elems) << esz);
562 ldst_elem(env, addr, pos + k * max_elems, vd, ra);
563 }
564 k++;
565 }
566
567 /* load/store elements for rest of segments */
568 for (; k < nf; k++) {
569 for (i = 0; i < max_elems; i++, env->vstart++) {
30206bd8
FC
570 target_ulong addr = base + ((i + k * max_elems) << esz);
571 ldst_elem(env, addr, i + k * max_elems, vd, ra);
572 }
573 }
f714361e
FC
574
575 env->vstart = 0;
30206bd8
FC
576}
577
578#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
579void HELPER(NAME)(void *vd, target_ulong base, \
580 CPURISCVState *env, uint32_t desc) \
581{ \
582 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
583 ctzl(sizeof(ETYPE)), GETPC(), \
584 MMU_DATA_LOAD); \
585}
586
587GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
588GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
589GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
590GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
591GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
592GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
593GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
594GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
595GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
596GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
597GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
598GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
599GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
600GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
601GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
602GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
603
604#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
605void HELPER(NAME)(void *vd, target_ulong base, \
606 CPURISCVState *env, uint32_t desc) \
607{ \
608 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
609 ctzl(sizeof(ETYPE)), GETPC(), \
610 MMU_DATA_STORE); \
611}
612
613GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
614GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
615GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
616GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
617
43740e3a
LZ
618/*
619 *** Vector Integer Arithmetic Instructions
620 */
621
622/* expand macro args before macro */
623#define RVVCALL(macro, ...) macro(__VA_ARGS__)
624
625/* (TD, T1, T2, TX1, TX2) */
626#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
627#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
628#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
629#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
630#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
631#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
632#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
633#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
634#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
635#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
636#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
637#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
638#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
639#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
640#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
641#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
642#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
643#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
644#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
645#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
646#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
647#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
648#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
649#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
650#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
651#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
652#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
653#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
654#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
655#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
656
657/* operation of two vector elements */
658typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
659
660#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
661static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
662{ \
663 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
664 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
665 *((TD *)vd + HD(i)) = OP(s2, s1); \
666}
667#define DO_SUB(N, M) (N - M)
668#define DO_RSUB(N, M) (M - N)
669
670RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
671RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
672RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
673RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
674RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
675RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
676RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
677RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
678
679static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
680 CPURISCVState *env, uint32_t desc,
681 uint32_t esz, uint32_t dsz,
3479a814 682 opivv2_fn *fn)
43740e3a 683{
43740e3a
LZ
684 uint32_t vm = vext_vm(desc);
685 uint32_t vl = env->vl;
686 uint32_t i;
687
f714361e 688 for (i = env->vstart; i < vl; i++) {
f9298de5 689 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
690 continue;
691 }
692 fn(vd, vs1, vs2, i);
693 }
f714361e 694 env->vstart = 0;
43740e3a
LZ
695}
696
697/* generate the helpers for OPIVV */
3479a814 698#define GEN_VEXT_VV(NAME, ESZ, DSZ) \
43740e3a
LZ
699void HELPER(NAME)(void *vd, void *v0, void *vs1, \
700 void *vs2, CPURISCVState *env, \
701 uint32_t desc) \
702{ \
703 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
3479a814 704 do_##NAME); \
43740e3a
LZ
705}
706
3479a814
FC
707GEN_VEXT_VV(vadd_vv_b, 1, 1)
708GEN_VEXT_VV(vadd_vv_h, 2, 2)
709GEN_VEXT_VV(vadd_vv_w, 4, 4)
710GEN_VEXT_VV(vadd_vv_d, 8, 8)
711GEN_VEXT_VV(vsub_vv_b, 1, 1)
712GEN_VEXT_VV(vsub_vv_h, 2, 2)
713GEN_VEXT_VV(vsub_vv_w, 4, 4)
714GEN_VEXT_VV(vsub_vv_d, 8, 8)
43740e3a
LZ
715
716typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
717
718/*
719 * (T1)s1 gives the real operator type.
720 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
721 */
722#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
723static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
724{ \
725 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
726 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
727}
728
729RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
730RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
731RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
732RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
733RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
734RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
735RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
736RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
737RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
738RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
739RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
740RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
741
742static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
743 CPURISCVState *env, uint32_t desc,
744 uint32_t esz, uint32_t dsz,
3479a814 745 opivx2_fn fn)
43740e3a 746{
43740e3a
LZ
747 uint32_t vm = vext_vm(desc);
748 uint32_t vl = env->vl;
749 uint32_t i;
750
f714361e 751 for (i = env->vstart; i < vl; i++) {
f9298de5 752 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
753 continue;
754 }
755 fn(vd, s1, vs2, i);
756 }
f714361e 757 env->vstart = 0;
43740e3a
LZ
758}
759
760/* generate the helpers for OPIVX */
3479a814 761#define GEN_VEXT_VX(NAME, ESZ, DSZ) \
43740e3a
LZ
762void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
763 void *vs2, CPURISCVState *env, \
764 uint32_t desc) \
765{ \
766 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
3479a814
FC
767 do_##NAME); \
768}
769
770GEN_VEXT_VX(vadd_vx_b, 1, 1)
771GEN_VEXT_VX(vadd_vx_h, 2, 2)
772GEN_VEXT_VX(vadd_vx_w, 4, 4)
773GEN_VEXT_VX(vadd_vx_d, 8, 8)
774GEN_VEXT_VX(vsub_vx_b, 1, 1)
775GEN_VEXT_VX(vsub_vx_h, 2, 2)
776GEN_VEXT_VX(vsub_vx_w, 4, 4)
777GEN_VEXT_VX(vsub_vx_d, 8, 8)
778GEN_VEXT_VX(vrsub_vx_b, 1, 1)
779GEN_VEXT_VX(vrsub_vx_h, 2, 2)
780GEN_VEXT_VX(vrsub_vx_w, 4, 4)
781GEN_VEXT_VX(vrsub_vx_d, 8, 8)
43740e3a
LZ
782
783void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
784{
785 intptr_t oprsz = simd_oprsz(desc);
786 intptr_t i;
787
788 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
789 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
790 }
791}
792
793void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
794{
795 intptr_t oprsz = simd_oprsz(desc);
796 intptr_t i;
797
798 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
799 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
800 }
801}
802
803void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
804{
805 intptr_t oprsz = simd_oprsz(desc);
806 intptr_t i;
807
808 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
809 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
810 }
811}
812
813void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
814{
815 intptr_t oprsz = simd_oprsz(desc);
816 intptr_t i;
817
818 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
819 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
820 }
821}
8fcdf776
LZ
822
823/* Vector Widening Integer Add/Subtract */
824#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
825#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
826#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
827#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
828#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
829#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
830#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
831#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
832#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
833#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
834#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
835#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
836RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
837RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
838RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
839RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
840RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
841RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
842RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
843RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
844RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
845RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
846RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
847RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
848RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
849RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
850RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
851RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
852RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
853RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
854RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
855RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
856RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
857RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
858RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
859RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
3479a814
FC
860GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
861GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
862GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
863GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
864GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
865GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
866GEN_VEXT_VV(vwadd_vv_b, 1, 2)
867GEN_VEXT_VV(vwadd_vv_h, 2, 4)
868GEN_VEXT_VV(vwadd_vv_w, 4, 8)
869GEN_VEXT_VV(vwsub_vv_b, 1, 2)
870GEN_VEXT_VV(vwsub_vv_h, 2, 4)
871GEN_VEXT_VV(vwsub_vv_w, 4, 8)
872GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
873GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
874GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
875GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
876GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
877GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
878GEN_VEXT_VV(vwadd_wv_b, 1, 2)
879GEN_VEXT_VV(vwadd_wv_h, 2, 4)
880GEN_VEXT_VV(vwadd_wv_w, 4, 8)
881GEN_VEXT_VV(vwsub_wv_b, 1, 2)
882GEN_VEXT_VV(vwsub_wv_h, 2, 4)
883GEN_VEXT_VV(vwsub_wv_w, 4, 8)
8fcdf776
LZ
884
885RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
886RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
887RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
888RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
889RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
890RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
891RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
892RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
893RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
894RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
895RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
896RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
897RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
898RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
899RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
900RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
901RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
902RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
903RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
904RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
905RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
906RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
907RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
908RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
3479a814
FC
909GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
910GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
911GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
912GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
913GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
914GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
915GEN_VEXT_VX(vwadd_vx_b, 1, 2)
916GEN_VEXT_VX(vwadd_vx_h, 2, 4)
917GEN_VEXT_VX(vwadd_vx_w, 4, 8)
918GEN_VEXT_VX(vwsub_vx_b, 1, 2)
919GEN_VEXT_VX(vwsub_vx_h, 2, 4)
920GEN_VEXT_VX(vwsub_vx_w, 4, 8)
921GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
922GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
923GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
924GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
925GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
926GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
927GEN_VEXT_VX(vwadd_wx_b, 1, 2)
928GEN_VEXT_VX(vwadd_wx_h, 2, 4)
929GEN_VEXT_VX(vwadd_wx_w, 4, 8)
930GEN_VEXT_VX(vwsub_wx_b, 1, 2)
931GEN_VEXT_VX(vwsub_wx_h, 2, 4)
932GEN_VEXT_VX(vwsub_wx_w, 4, 8)
3a6f8f68
LZ
933
934/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
935#define DO_VADC(N, M, C) (N + M + C)
936#define DO_VSBC(N, M, C) (N - M - C)
937
3479a814 938#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
939void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
940 CPURISCVState *env, uint32_t desc) \
941{ \
3a6f8f68 942 uint32_t vl = env->vl; \
3a6f8f68
LZ
943 uint32_t i; \
944 \
f714361e 945 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
946 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
947 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 948 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
949 \
950 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
951 } \
f714361e 952 env->vstart = 0; \
3a6f8f68
LZ
953}
954
3479a814
FC
955GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
956GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
957GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
958GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 959
3479a814
FC
960GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
961GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
962GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
963GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 964
3479a814 965#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
966void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
967 CPURISCVState *env, uint32_t desc) \
968{ \
3a6f8f68 969 uint32_t vl = env->vl; \
3a6f8f68
LZ
970 uint32_t i; \
971 \
f714361e 972 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 973 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 974 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
975 \
976 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
977 } \
f714361e 978 env->vstart = 0; \
3a6f8f68
LZ
979}
980
3479a814
FC
981GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
982GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
983GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
984GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 985
3479a814
FC
986GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
987GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
988GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
989GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
990
991#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
992 (__typeof(N))(N + M) < N)
993#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
994
995#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
996void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
997 CPURISCVState *env, uint32_t desc) \
998{ \
3a6f8f68 999 uint32_t vl = env->vl; \
bb45485a 1000 uint32_t vm = vext_vm(desc); \
3a6f8f68
LZ
1001 uint32_t i; \
1002 \
f714361e 1003 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1004 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1005 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1006 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1007 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1008 } \
f714361e 1009 env->vstart = 0; \
3a6f8f68
LZ
1010}
1011
1012GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1013GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1014GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1015GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1016
1017GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1018GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1019GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1020GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1021
1022#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1023void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1024 void *vs2, CPURISCVState *env, uint32_t desc) \
1025{ \
3a6f8f68 1026 uint32_t vl = env->vl; \
bb45485a 1027 uint32_t vm = vext_vm(desc); \
3a6f8f68
LZ
1028 uint32_t i; \
1029 \
f714361e 1030 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1031 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1032 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1033 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1034 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1035 } \
f714361e 1036 env->vstart = 0; \
3a6f8f68
LZ
1037}
1038
1039GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1040GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1041GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1042GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1043
1044GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1045GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1046GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1047GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1048
1049/* Vector Bitwise Logical Instructions */
1050RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1051RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1052RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1053RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1054RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1055RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1056RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1057RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1058RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1059RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1060RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1061RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
3479a814
FC
1062GEN_VEXT_VV(vand_vv_b, 1, 1)
1063GEN_VEXT_VV(vand_vv_h, 2, 2)
1064GEN_VEXT_VV(vand_vv_w, 4, 4)
1065GEN_VEXT_VV(vand_vv_d, 8, 8)
1066GEN_VEXT_VV(vor_vv_b, 1, 1)
1067GEN_VEXT_VV(vor_vv_h, 2, 2)
1068GEN_VEXT_VV(vor_vv_w, 4, 4)
1069GEN_VEXT_VV(vor_vv_d, 8, 8)
1070GEN_VEXT_VV(vxor_vv_b, 1, 1)
1071GEN_VEXT_VV(vxor_vv_h, 2, 2)
1072GEN_VEXT_VV(vxor_vv_w, 4, 4)
1073GEN_VEXT_VV(vxor_vv_d, 8, 8)
d3842924
LZ
1074
1075RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1076RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1077RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1078RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1079RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1080RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1081RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1082RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1083RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1084RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1085RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1086RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
3479a814
FC
1087GEN_VEXT_VX(vand_vx_b, 1, 1)
1088GEN_VEXT_VX(vand_vx_h, 2, 2)
1089GEN_VEXT_VX(vand_vx_w, 4, 4)
1090GEN_VEXT_VX(vand_vx_d, 8, 8)
1091GEN_VEXT_VX(vor_vx_b, 1, 1)
1092GEN_VEXT_VX(vor_vx_h, 2, 2)
1093GEN_VEXT_VX(vor_vx_w, 4, 4)
1094GEN_VEXT_VX(vor_vx_d, 8, 8)
1095GEN_VEXT_VX(vxor_vx_b, 1, 1)
1096GEN_VEXT_VX(vxor_vx_h, 2, 2)
1097GEN_VEXT_VX(vxor_vx_w, 4, 4)
1098GEN_VEXT_VX(vxor_vx_d, 8, 8)
3277d955
LZ
1099
1100/* Vector Single-Width Bit Shift Instructions */
1101#define DO_SLL(N, M) (N << (M))
1102#define DO_SRL(N, M) (N >> (M))
1103
1104/* generate the helpers for shift instructions with two vector operators */
3479a814 1105#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1106void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1107 void *vs2, CPURISCVState *env, uint32_t desc) \
1108{ \
3277d955
LZ
1109 uint32_t vm = vext_vm(desc); \
1110 uint32_t vl = env->vl; \
3277d955
LZ
1111 uint32_t i; \
1112 \
f714361e 1113 for (i = env->vstart; i < vl; i++) { \
f9298de5 1114 if (!vm && !vext_elem_mask(v0, i)) { \
3277d955
LZ
1115 continue; \
1116 } \
1117 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1118 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1119 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1120 } \
f714361e 1121 env->vstart = 0; \
3277d955
LZ
1122}
1123
3479a814
FC
1124GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1125GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1126GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1127GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1128
3479a814
FC
1129GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1130GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1131GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1132GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1133
3479a814
FC
1134GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1135GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1136GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1137GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955
LZ
1138
1139/* generate the helpers for shift instructions with one vector and one scalar */
3479a814
FC
1140#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1141void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1142 void *vs2, CPURISCVState *env, uint32_t desc) \
1143{ \
1144 uint32_t vm = vext_vm(desc); \
1145 uint32_t vl = env->vl; \
1146 uint32_t i; \
1147 \
f714361e 1148 for (i = env->vstart; i < vl; i++) { \
3479a814
FC
1149 if (!vm && !vext_elem_mask(v0, i)) { \
1150 continue; \
1151 } \
1152 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1153 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1154 } \
f714361e 1155 env->vstart = 0; \
3479a814
FC
1156}
1157
1158GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1159GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1160GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1161GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1162
1163GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1164GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1165GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1166GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1167
1168GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1169GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1170GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1171GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1172
1173/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1174GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1175GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1176GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1177GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1178GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1179GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1180GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1181GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1182GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1183GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1184GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1185GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1186
1187/* Vector Integer Comparison Instructions */
1188#define DO_MSEQ(N, M) (N == M)
1189#define DO_MSNE(N, M) (N != M)
1190#define DO_MSLT(N, M) (N < M)
1191#define DO_MSLE(N, M) (N <= M)
1192#define DO_MSGT(N, M) (N > M)
1193
1194#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1195void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1196 CPURISCVState *env, uint32_t desc) \
1197{ \
1366fc79
LZ
1198 uint32_t vm = vext_vm(desc); \
1199 uint32_t vl = env->vl; \
1366fc79
LZ
1200 uint32_t i; \
1201 \
f714361e 1202 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1203 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1204 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1205 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1206 continue; \
1207 } \
f9298de5 1208 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1209 } \
f714361e 1210 env->vstart = 0; \
1366fc79
LZ
1211}
1212
1213GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1214GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1215GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1216GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1217
1218GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1219GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1220GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1221GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1222
1223GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1224GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1225GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1226GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1227
1228GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1229GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1230GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1231GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1232
1233GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1234GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1235GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1236GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1237
1238GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1239GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1240GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1241GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1242
1243#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1244void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1245 CPURISCVState *env, uint32_t desc) \
1246{ \
1366fc79
LZ
1247 uint32_t vm = vext_vm(desc); \
1248 uint32_t vl = env->vl; \
1366fc79
LZ
1249 uint32_t i; \
1250 \
f714361e 1251 for (i = env->vstart; i < vl; i++) { \
1366fc79 1252 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1253 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1254 continue; \
1255 } \
f9298de5 1256 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1257 DO_OP(s2, (ETYPE)(target_long)s1)); \
1258 } \
f714361e 1259 env->vstart = 0; \
1366fc79
LZ
1260}
1261
1262GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1263GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1264GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1265GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1266
1267GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1268GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1269GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1270GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1271
1272GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1273GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1274GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1275GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1276
1277GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1278GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1279GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1280GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1281
1282GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1283GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1284GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1285GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1286
1287GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1288GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1289GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1290GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1291
1292GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1293GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1294GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1295GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1296
1297GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1298GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1299GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1300GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1301
1302/* Vector Integer Min/Max Instructions */
1303RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1304RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1305RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1306RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1307RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1308RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1309RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1310RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1311RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1312RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1313RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1314RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1315RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1316RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1317RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1318RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
3479a814
FC
1319GEN_VEXT_VV(vminu_vv_b, 1, 1)
1320GEN_VEXT_VV(vminu_vv_h, 2, 2)
1321GEN_VEXT_VV(vminu_vv_w, 4, 4)
1322GEN_VEXT_VV(vminu_vv_d, 8, 8)
1323GEN_VEXT_VV(vmin_vv_b, 1, 1)
1324GEN_VEXT_VV(vmin_vv_h, 2, 2)
1325GEN_VEXT_VV(vmin_vv_w, 4, 4)
1326GEN_VEXT_VV(vmin_vv_d, 8, 8)
1327GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1328GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1329GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1330GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1331GEN_VEXT_VV(vmax_vv_b, 1, 1)
1332GEN_VEXT_VV(vmax_vv_h, 2, 2)
1333GEN_VEXT_VV(vmax_vv_w, 4, 4)
1334GEN_VEXT_VV(vmax_vv_d, 8, 8)
558fa779
LZ
1335
1336RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1337RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1338RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1339RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1340RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1341RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1342RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1343RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1344RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1345RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1346RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1347RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1348RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1349RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1350RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1351RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
3479a814
FC
1352GEN_VEXT_VX(vminu_vx_b, 1, 1)
1353GEN_VEXT_VX(vminu_vx_h, 2, 2)
1354GEN_VEXT_VX(vminu_vx_w, 4, 4)
1355GEN_VEXT_VX(vminu_vx_d, 8, 8)
1356GEN_VEXT_VX(vmin_vx_b, 1, 1)
1357GEN_VEXT_VX(vmin_vx_h, 2, 2)
1358GEN_VEXT_VX(vmin_vx_w, 4, 4)
1359GEN_VEXT_VX(vmin_vx_d, 8, 8)
1360GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1361GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1362GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1363GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1364GEN_VEXT_VX(vmax_vx_b, 1, 1)
1365GEN_VEXT_VX(vmax_vx_h, 2, 2)
1366GEN_VEXT_VX(vmax_vx_w, 4, 4)
1367GEN_VEXT_VX(vmax_vx_d, 8, 8)
958b85f3
LZ
1368
1369/* Vector Single-Width Integer Multiply Instructions */
1370#define DO_MUL(N, M) (N * M)
1371RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1372RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1373RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1374RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
3479a814
FC
1375GEN_VEXT_VV(vmul_vv_b, 1, 1)
1376GEN_VEXT_VV(vmul_vv_h, 2, 2)
1377GEN_VEXT_VV(vmul_vv_w, 4, 4)
1378GEN_VEXT_VV(vmul_vv_d, 8, 8)
958b85f3
LZ
1379
1380static int8_t do_mulh_b(int8_t s2, int8_t s1)
1381{
1382 return (int16_t)s2 * (int16_t)s1 >> 8;
1383}
1384
1385static int16_t do_mulh_h(int16_t s2, int16_t s1)
1386{
1387 return (int32_t)s2 * (int32_t)s1 >> 16;
1388}
1389
1390static int32_t do_mulh_w(int32_t s2, int32_t s1)
1391{
1392 return (int64_t)s2 * (int64_t)s1 >> 32;
1393}
1394
1395static int64_t do_mulh_d(int64_t s2, int64_t s1)
1396{
1397 uint64_t hi_64, lo_64;
1398
1399 muls64(&lo_64, &hi_64, s1, s2);
1400 return hi_64;
1401}
1402
1403static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1404{
1405 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1406}
1407
1408static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1409{
1410 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1411}
1412
1413static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1414{
1415 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1416}
1417
1418static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1419{
1420 uint64_t hi_64, lo_64;
1421
1422 mulu64(&lo_64, &hi_64, s2, s1);
1423 return hi_64;
1424}
1425
1426static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1427{
1428 return (int16_t)s2 * (uint16_t)s1 >> 8;
1429}
1430
1431static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1432{
1433 return (int32_t)s2 * (uint32_t)s1 >> 16;
1434}
1435
1436static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1437{
1438 return (int64_t)s2 * (uint64_t)s1 >> 32;
1439}
1440
1441/*
1442 * Let A = signed operand,
1443 * B = unsigned operand
1444 * P = mulu64(A, B), unsigned product
1445 *
1446 * LET X = 2 ** 64 - A, 2's complement of A
1447 * SP = signed product
1448 * THEN
1449 * IF A < 0
1450 * SP = -X * B
1451 * = -(2 ** 64 - A) * B
1452 * = A * B - 2 ** 64 * B
1453 * = P - 2 ** 64 * B
1454 * ELSE
1455 * SP = P
1456 * THEN
1457 * HI_P -= (A < 0 ? B : 0)
1458 */
1459
1460static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1461{
1462 uint64_t hi_64, lo_64;
1463
1464 mulu64(&lo_64, &hi_64, s2, s1);
1465
1466 hi_64 -= s2 < 0 ? s1 : 0;
1467 return hi_64;
1468}
1469
1470RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1471RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1472RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1473RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1474RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1475RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1476RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1477RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1478RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1479RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1480RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1481RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
3479a814
FC
1482GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1483GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1484GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1485GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1486GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1487GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1488GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1489GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1490GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1491GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1492GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1493GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
958b85f3
LZ
1494
1495RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1496RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1497RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1498RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1499RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1500RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1501RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1502RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1503RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1504RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1505RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1506RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1507RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1508RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1509RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1510RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
3479a814
FC
1511GEN_VEXT_VX(vmul_vx_b, 1, 1)
1512GEN_VEXT_VX(vmul_vx_h, 2, 2)
1513GEN_VEXT_VX(vmul_vx_w, 4, 4)
1514GEN_VEXT_VX(vmul_vx_d, 8, 8)
1515GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1516GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1517GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1518GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1519GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1520GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1521GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1522GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1523GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1524GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1525GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1526GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
85e6658c
LZ
1527
1528/* Vector Integer Divide Instructions */
1529#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1530#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1531#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1532 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1533#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1534 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1535
1536RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1537RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1538RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1539RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1540RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1541RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1542RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1543RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1544RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1545RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1546RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1547RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1548RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1549RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1550RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1551RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
3479a814
FC
1552GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1553GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1554GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1555GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1556GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1557GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1558GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1559GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1560GEN_VEXT_VV(vremu_vv_b, 1, 1)
1561GEN_VEXT_VV(vremu_vv_h, 2, 2)
1562GEN_VEXT_VV(vremu_vv_w, 4, 4)
1563GEN_VEXT_VV(vremu_vv_d, 8, 8)
1564GEN_VEXT_VV(vrem_vv_b, 1, 1)
1565GEN_VEXT_VV(vrem_vv_h, 2, 2)
1566GEN_VEXT_VV(vrem_vv_w, 4, 4)
1567GEN_VEXT_VV(vrem_vv_d, 8, 8)
85e6658c
LZ
1568
1569RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1570RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1571RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1572RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1573RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1574RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1575RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1576RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1577RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1578RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1579RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1580RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1581RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1582RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1583RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1584RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
3479a814
FC
1585GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1586GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1587GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1588GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1589GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1590GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1591GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1592GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1593GEN_VEXT_VX(vremu_vx_b, 1, 1)
1594GEN_VEXT_VX(vremu_vx_h, 2, 2)
1595GEN_VEXT_VX(vremu_vx_w, 4, 4)
1596GEN_VEXT_VX(vremu_vx_d, 8, 8)
1597GEN_VEXT_VX(vrem_vx_b, 1, 1)
1598GEN_VEXT_VX(vrem_vx_h, 2, 2)
1599GEN_VEXT_VX(vrem_vx_w, 4, 4)
1600GEN_VEXT_VX(vrem_vx_d, 8, 8)
97b1cba3
LZ
1601
1602/* Vector Widening Integer Multiply Instructions */
1603RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1604RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1605RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1606RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1607RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1608RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1609RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1610RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1611RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
3479a814
FC
1612GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1613GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1614GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1615GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1616GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1617GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1618GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1619GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1620GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
97b1cba3
LZ
1621
1622RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1623RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1624RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1625RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1626RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1627RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1628RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1629RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1630RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
3479a814
FC
1631GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1632GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1633GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1634GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1635GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1636GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1637GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1638GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1639GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
54df813a
LZ
1640
1641/* Vector Single-Width Integer Multiply-Add Instructions */
1642#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1643static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1644{ \
1645 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1646 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1647 TD d = *((TD *)vd + HD(i)); \
1648 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1649}
1650
1651#define DO_MACC(N, M, D) (M * N + D)
1652#define DO_NMSAC(N, M, D) (-(M * N) + D)
1653#define DO_MADD(N, M, D) (M * D + N)
1654#define DO_NMSUB(N, M, D) (-(M * D) + N)
1655RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1656RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1657RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1658RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1659RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1660RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1661RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1662RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1663RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1664RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1665RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1666RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1667RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1668RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1669RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1670RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
3479a814
FC
1671GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1672GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1673GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1674GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1675GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1676GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1677GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1678GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1679GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1680GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1681GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1682GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1683GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1684GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1685GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1686GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
54df813a
LZ
1687
1688#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1689static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1690{ \
1691 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1692 TD d = *((TD *)vd + HD(i)); \
1693 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1694}
1695
1696RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1697RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1698RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1699RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1700RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1701RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1702RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1703RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1704RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1705RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1706RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1707RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1708RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1709RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1710RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1711RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
3479a814
FC
1712GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1713GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1714GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1715GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1716GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1717GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1718GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1719GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1720GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1721GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1722GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1723GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1724GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1725GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1726GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1727GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
2b587b33
LZ
1728
1729/* Vector Widening Integer Multiply-Add Instructions */
1730RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1731RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1732RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1733RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1734RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1735RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1736RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1737RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1738RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
3479a814
FC
1739GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1740GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1741GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1742GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1743GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1744GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1745GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1746GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1747GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
2b587b33
LZ
1748
1749RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1750RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1751RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1752RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1753RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1754RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1755RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1756RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1757RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1758RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1759RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1760RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
3479a814
FC
1761GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1762GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1763GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1764GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1765GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1766GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1767GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1768GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1769GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1770GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1771GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1772GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
f020a7a1
LZ
1773
1774/* Vector Integer Merge and Move Instructions */
3479a814 1775#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1776void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1777 uint32_t desc) \
1778{ \
1779 uint32_t vl = env->vl; \
f020a7a1
LZ
1780 uint32_t i; \
1781 \
f714361e 1782 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1783 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1784 *((ETYPE *)vd + H(i)) = s1; \
1785 } \
f714361e 1786 env->vstart = 0; \
f020a7a1
LZ
1787}
1788
3479a814
FC
1789GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1790GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1791GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1792GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1793
3479a814 1794#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1795void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1796 uint32_t desc) \
1797{ \
1798 uint32_t vl = env->vl; \
f020a7a1
LZ
1799 uint32_t i; \
1800 \
f714361e 1801 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1802 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1803 } \
f714361e 1804 env->vstart = 0; \
f020a7a1
LZ
1805}
1806
3479a814
FC
1807GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1808GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1809GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1810GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 1811
3479a814 1812#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1813void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1814 CPURISCVState *env, uint32_t desc) \
1815{ \
f020a7a1 1816 uint32_t vl = env->vl; \
f020a7a1
LZ
1817 uint32_t i; \
1818 \
f714361e 1819 for (i = env->vstart; i < vl; i++) { \
f9298de5 1820 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
1821 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1822 } \
f714361e 1823 env->vstart = 0; \
f020a7a1
LZ
1824}
1825
3479a814
FC
1826GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1827GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1828GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1829GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 1830
3479a814 1831#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1832void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1833 void *vs2, CPURISCVState *env, uint32_t desc) \
1834{ \
f020a7a1 1835 uint32_t vl = env->vl; \
f020a7a1
LZ
1836 uint32_t i; \
1837 \
f714361e 1838 for (i = env->vstart; i < vl; i++) { \
f020a7a1 1839 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1840 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
1841 (ETYPE)(target_long)s1); \
1842 *((ETYPE *)vd + H(i)) = d; \
1843 } \
f714361e 1844 env->vstart = 0; \
f020a7a1
LZ
1845}
1846
3479a814
FC
1847GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1848GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1849GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1850GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
1851
1852/*
1853 *** Vector Fixed-Point Arithmetic Instructions
1854 */
1855
1856/* Vector Single-Width Saturating Add and Subtract */
1857
1858/*
1859 * As fixed point instructions probably have round mode and saturation,
1860 * define common macros for fixed point here.
1861 */
1862typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1863 CPURISCVState *env, int vxrm);
1864
1865#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1866static inline void \
1867do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1868 CPURISCVState *env, int vxrm) \
1869{ \
1870 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1871 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1872 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1873}
1874
1875static inline void
1876vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1877 CPURISCVState *env,
f9298de5 1878 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
1879 opivv2_rm_fn *fn)
1880{
f714361e 1881 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 1882 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
1883 continue;
1884 }
1885 fn(vd, vs1, vs2, i, env, vxrm);
1886 }
f714361e 1887 env->vstart = 0;
eb2650e3
LZ
1888}
1889
1890static inline void
1891vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1892 CPURISCVState *env,
1893 uint32_t desc, uint32_t esz, uint32_t dsz,
3479a814 1894 opivv2_rm_fn *fn)
eb2650e3 1895{
eb2650e3
LZ
1896 uint32_t vm = vext_vm(desc);
1897 uint32_t vl = env->vl;
1898
1899 switch (env->vxrm) {
1900 case 0: /* rnu */
1901 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1902 env, vl, vm, 0, fn);
eb2650e3
LZ
1903 break;
1904 case 1: /* rne */
1905 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1906 env, vl, vm, 1, fn);
eb2650e3
LZ
1907 break;
1908 case 2: /* rdn */
1909 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1910 env, vl, vm, 2, fn);
eb2650e3
LZ
1911 break;
1912 default: /* rod */
1913 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1914 env, vl, vm, 3, fn);
eb2650e3
LZ
1915 break;
1916 }
eb2650e3
LZ
1917}
1918
1919/* generate helpers for fixed point instructions with OPIVV format */
3479a814 1920#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \
eb2650e3
LZ
1921void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1922 CPURISCVState *env, uint32_t desc) \
1923{ \
1924 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
3479a814 1925 do_##NAME); \
eb2650e3
LZ
1926}
1927
1928static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1929{
1930 uint8_t res = a + b;
1931 if (res < a) {
1932 res = UINT8_MAX;
1933 env->vxsat = 0x1;
1934 }
1935 return res;
1936}
1937
1938static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1939 uint16_t b)
1940{
1941 uint16_t res = a + b;
1942 if (res < a) {
1943 res = UINT16_MAX;
1944 env->vxsat = 0x1;
1945 }
1946 return res;
1947}
1948
1949static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1950 uint32_t b)
1951{
1952 uint32_t res = a + b;
1953 if (res < a) {
1954 res = UINT32_MAX;
1955 env->vxsat = 0x1;
1956 }
1957 return res;
1958}
1959
1960static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1961 uint64_t b)
1962{
1963 uint64_t res = a + b;
1964 if (res < a) {
1965 res = UINT64_MAX;
1966 env->vxsat = 0x1;
1967 }
1968 return res;
1969}
1970
1971RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1972RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1973RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1974RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
3479a814
FC
1975GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1976GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1977GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
1978GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
eb2650e3
LZ
1979
1980typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
1981 CPURISCVState *env, int vxrm);
1982
1983#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1984static inline void \
1985do_##NAME(void *vd, target_long s1, void *vs2, int i, \
1986 CPURISCVState *env, int vxrm) \
1987{ \
1988 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1989 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
1990}
1991
1992static inline void
1993vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
1994 CPURISCVState *env,
f9298de5 1995 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
1996 opivx2_rm_fn *fn)
1997{
f714361e 1998 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 1999 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
2000 continue;
2001 }
2002 fn(vd, s1, vs2, i, env, vxrm);
2003 }
f714361e 2004 env->vstart = 0;
eb2650e3
LZ
2005}
2006
2007static inline void
2008vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2009 CPURISCVState *env,
2010 uint32_t desc, uint32_t esz, uint32_t dsz,
3479a814 2011 opivx2_rm_fn *fn)
eb2650e3 2012{
eb2650e3
LZ
2013 uint32_t vm = vext_vm(desc);
2014 uint32_t vl = env->vl;
2015
2016 switch (env->vxrm) {
2017 case 0: /* rnu */
2018 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2019 env, vl, vm, 0, fn);
eb2650e3
LZ
2020 break;
2021 case 1: /* rne */
2022 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2023 env, vl, vm, 1, fn);
eb2650e3
LZ
2024 break;
2025 case 2: /* rdn */
2026 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2027 env, vl, vm, 2, fn);
eb2650e3
LZ
2028 break;
2029 default: /* rod */
2030 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2031 env, vl, vm, 3, fn);
eb2650e3
LZ
2032 break;
2033 }
eb2650e3
LZ
2034}
2035
2036/* generate helpers for fixed point instructions with OPIVX format */
3479a814 2037#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \
eb2650e3
LZ
2038void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2039 void *vs2, CPURISCVState *env, uint32_t desc) \
2040{ \
2041 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
3479a814 2042 do_##NAME); \
eb2650e3
LZ
2043}
2044
2045RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2046RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2047RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2048RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
3479a814
FC
2049GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2050GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2051GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2052GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
eb2650e3
LZ
2053
2054static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2055{
2056 int8_t res = a + b;
2057 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2058 res = a > 0 ? INT8_MAX : INT8_MIN;
2059 env->vxsat = 0x1;
2060 }
2061 return res;
2062}
2063
2064static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2065{
2066 int16_t res = a + b;
2067 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2068 res = a > 0 ? INT16_MAX : INT16_MIN;
2069 env->vxsat = 0x1;
2070 }
2071 return res;
2072}
2073
2074static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2075{
2076 int32_t res = a + b;
2077 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2078 res = a > 0 ? INT32_MAX : INT32_MIN;
2079 env->vxsat = 0x1;
2080 }
2081 return res;
2082}
2083
2084static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2085{
2086 int64_t res = a + b;
2087 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2088 res = a > 0 ? INT64_MAX : INT64_MIN;
2089 env->vxsat = 0x1;
2090 }
2091 return res;
2092}
2093
2094RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2095RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2096RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2097RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
3479a814
FC
2098GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2099GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2100GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2101GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
eb2650e3
LZ
2102
2103RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2104RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2105RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2106RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
3479a814
FC
2107GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2108GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2109GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2110GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
eb2650e3
LZ
2111
2112static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2113{
2114 uint8_t res = a - b;
2115 if (res > a) {
2116 res = 0;
2117 env->vxsat = 0x1;
2118 }
2119 return res;
2120}
2121
2122static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2123 uint16_t b)
2124{
2125 uint16_t res = a - b;
2126 if (res > a) {
2127 res = 0;
2128 env->vxsat = 0x1;
2129 }
2130 return res;
2131}
2132
2133static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2134 uint32_t b)
2135{
2136 uint32_t res = a - b;
2137 if (res > a) {
2138 res = 0;
2139 env->vxsat = 0x1;
2140 }
2141 return res;
2142}
2143
2144static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2145 uint64_t b)
2146{
2147 uint64_t res = a - b;
2148 if (res > a) {
2149 res = 0;
2150 env->vxsat = 0x1;
2151 }
2152 return res;
2153}
2154
2155RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2156RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2157RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2158RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
3479a814
FC
2159GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2160GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2161GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2162GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
eb2650e3
LZ
2163
2164RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2165RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2166RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2167RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
3479a814
FC
2168GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2169GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2170GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2171GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
eb2650e3
LZ
2172
2173static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2174{
2175 int8_t res = a - b;
2176 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2177 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2178 env->vxsat = 0x1;
2179 }
2180 return res;
2181}
2182
2183static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2184{
2185 int16_t res = a - b;
2186 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2187 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2188 env->vxsat = 0x1;
2189 }
2190 return res;
2191}
2192
2193static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2194{
2195 int32_t res = a - b;
2196 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2197 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2198 env->vxsat = 0x1;
2199 }
2200 return res;
2201}
2202
2203static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2204{
2205 int64_t res = a - b;
2206 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2207 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2208 env->vxsat = 0x1;
2209 }
2210 return res;
2211}
2212
2213RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2214RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2215RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2216RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
3479a814
FC
2217GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2218GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2219GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2220GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
eb2650e3
LZ
2221
2222RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2223RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2224RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2225RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
3479a814
FC
2226GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2227GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2228GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2229GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
b7aee481
LZ
2230
2231/* Vector Single-Width Averaging Add and Subtract */
2232static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2233{
2234 uint8_t d = extract64(v, shift, 1);
2235 uint8_t d1;
2236 uint64_t D1, D2;
2237
2238 if (shift == 0 || shift > 64) {
2239 return 0;
2240 }
2241
2242 d1 = extract64(v, shift - 1, 1);
2243 D1 = extract64(v, 0, shift);
2244 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2245 return d1;
2246 } else if (vxrm == 1) { /* round-to-nearest-even */
2247 if (shift > 1) {
2248 D2 = extract64(v, 0, shift - 1);
2249 return d1 & ((D2 != 0) | d);
2250 } else {
2251 return d1 & d;
2252 }
2253 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2254 return !d & (D1 != 0);
2255 }
2256 return 0; /* round-down (truncate) */
2257}
2258
2259static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2260{
2261 int64_t res = (int64_t)a + b;
2262 uint8_t round = get_round(vxrm, res, 1);
2263
2264 return (res >> 1) + round;
2265}
2266
2267static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2268{
2269 int64_t res = a + b;
2270 uint8_t round = get_round(vxrm, res, 1);
2271 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2272
2273 /* With signed overflow, bit 64 is inverse of bit 63. */
2274 return ((res >> 1) ^ over) + round;
2275}
2276
2277RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2278RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2279RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2280RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
3479a814
FC
2281GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2282GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2283GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2284GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
b7aee481
LZ
2285
2286RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2287RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2288RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2289RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
3479a814
FC
2290GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2291GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2292GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2293GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
b7aee481 2294
8b99a110
FC
2295static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2296 uint32_t a, uint32_t b)
2297{
2298 uint64_t res = (uint64_t)a + b;
2299 uint8_t round = get_round(vxrm, res, 1);
2300
2301 return (res >> 1) + round;
2302}
2303
2304static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2305 uint64_t a, uint64_t b)
2306{
2307 uint64_t res = a + b;
2308 uint8_t round = get_round(vxrm, res, 1);
2309 uint64_t over = (uint64_t)(res < a) << 63;
2310
2311 return ((res >> 1) | over) + round;
2312}
2313
2314RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2315RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2316RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2317RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2318GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2319GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2320GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2321GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2322
2323RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2324RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2325RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2326RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2327GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2328GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2329GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2330GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2331
b7aee481
LZ
2332static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2333{
2334 int64_t res = (int64_t)a - b;
2335 uint8_t round = get_round(vxrm, res, 1);
2336
2337 return (res >> 1) + round;
2338}
2339
2340static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2341{
2342 int64_t res = (int64_t)a - b;
2343 uint8_t round = get_round(vxrm, res, 1);
2344 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2345
2346 /* With signed overflow, bit 64 is inverse of bit 63. */
2347 return ((res >> 1) ^ over) + round;
2348}
2349
2350RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2351RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2352RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2353RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
3479a814
FC
2354GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2355GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2356GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2357GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
b7aee481
LZ
2358
2359RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2360RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2361RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2362RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
3479a814
FC
2363GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2364GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2365GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2366GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
9f0ff9e5 2367
8b99a110
FC
2368static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2369 uint32_t a, uint32_t b)
2370{
2371 int64_t res = (int64_t)a - b;
2372 uint8_t round = get_round(vxrm, res, 1);
2373
2374 return (res >> 1) + round;
2375}
2376
2377static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2378 uint64_t a, uint64_t b)
2379{
2380 uint64_t res = (uint64_t)a - b;
2381 uint8_t round = get_round(vxrm, res, 1);
2382 uint64_t over = (uint64_t)(res > a) << 63;
2383
2384 return ((res >> 1) | over) + round;
2385}
2386
2387RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2388RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2389RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2390RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2391GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2392GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2393GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2394GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2395
2396RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2397RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2398RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2399RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2400GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2401GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2402GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2403GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2404
9f0ff9e5
LZ
2405/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2406static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2407{
2408 uint8_t round;
2409 int16_t res;
2410
2411 res = (int16_t)a * (int16_t)b;
2412 round = get_round(vxrm, res, 7);
2413 res = (res >> 7) + round;
2414
2415 if (res > INT8_MAX) {
2416 env->vxsat = 0x1;
2417 return INT8_MAX;
2418 } else if (res < INT8_MIN) {
2419 env->vxsat = 0x1;
2420 return INT8_MIN;
2421 } else {
2422 return res;
2423 }
2424}
2425
2426static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2427{
2428 uint8_t round;
2429 int32_t res;
2430
2431 res = (int32_t)a * (int32_t)b;
2432 round = get_round(vxrm, res, 15);
2433 res = (res >> 15) + round;
2434
2435 if (res > INT16_MAX) {
2436 env->vxsat = 0x1;
2437 return INT16_MAX;
2438 } else if (res < INT16_MIN) {
2439 env->vxsat = 0x1;
2440 return INT16_MIN;
2441 } else {
2442 return res;
2443 }
2444}
2445
2446static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2447{
2448 uint8_t round;
2449 int64_t res;
2450
2451 res = (int64_t)a * (int64_t)b;
2452 round = get_round(vxrm, res, 31);
2453 res = (res >> 31) + round;
2454
2455 if (res > INT32_MAX) {
2456 env->vxsat = 0x1;
2457 return INT32_MAX;
2458 } else if (res < INT32_MIN) {
2459 env->vxsat = 0x1;
2460 return INT32_MIN;
2461 } else {
2462 return res;
2463 }
2464}
2465
2466static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2467{
2468 uint8_t round;
2469 uint64_t hi_64, lo_64;
2470 int64_t res;
2471
2472 if (a == INT64_MIN && b == INT64_MIN) {
2473 env->vxsat = 1;
2474 return INT64_MAX;
2475 }
2476
2477 muls64(&lo_64, &hi_64, a, b);
2478 round = get_round(vxrm, lo_64, 63);
2479 /*
2480 * Cannot overflow, as there are always
2481 * 2 sign bits after multiply.
2482 */
2483 res = (hi_64 << 1) | (lo_64 >> 63);
2484 if (round) {
2485 if (res == INT64_MAX) {
2486 env->vxsat = 1;
2487 } else {
2488 res += 1;
2489 }
2490 }
2491 return res;
2492}
2493
2494RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2495RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2496RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2497RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
3479a814
FC
2498GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2499GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2500GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2501GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
9f0ff9e5
LZ
2502
2503RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2504RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2505RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2506RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
3479a814
FC
2507GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2508GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2509GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2510GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
0a1eaf00 2511
04a61406
LZ
2512/* Vector Single-Width Scaling Shift Instructions */
2513static inline uint8_t
2514vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2515{
2516 uint8_t round, shift = b & 0x7;
2517 uint8_t res;
2518
2519 round = get_round(vxrm, a, shift);
2520 res = (a >> shift) + round;
2521 return res;
2522}
2523static inline uint16_t
2524vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2525{
2526 uint8_t round, shift = b & 0xf;
2527 uint16_t res;
2528
2529 round = get_round(vxrm, a, shift);
2530 res = (a >> shift) + round;
2531 return res;
2532}
2533static inline uint32_t
2534vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2535{
2536 uint8_t round, shift = b & 0x1f;
2537 uint32_t res;
2538
2539 round = get_round(vxrm, a, shift);
2540 res = (a >> shift) + round;
2541 return res;
2542}
2543static inline uint64_t
2544vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2545{
2546 uint8_t round, shift = b & 0x3f;
2547 uint64_t res;
2548
2549 round = get_round(vxrm, a, shift);
2550 res = (a >> shift) + round;
2551 return res;
2552}
2553RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2554RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2555RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2556RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
3479a814
FC
2557GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2558GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2559GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2560GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
04a61406
LZ
2561
2562RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2563RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2564RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2565RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
3479a814
FC
2566GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2567GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2568GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2569GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
04a61406
LZ
2570
2571static inline int8_t
2572vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2573{
2574 uint8_t round, shift = b & 0x7;
2575 int8_t res;
2576
2577 round = get_round(vxrm, a, shift);
2578 res = (a >> shift) + round;
2579 return res;
2580}
2581static inline int16_t
2582vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2583{
2584 uint8_t round, shift = b & 0xf;
2585 int16_t res;
2586
2587 round = get_round(vxrm, a, shift);
2588 res = (a >> shift) + round;
2589 return res;
2590}
2591static inline int32_t
2592vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2593{
2594 uint8_t round, shift = b & 0x1f;
2595 int32_t res;
2596
2597 round = get_round(vxrm, a, shift);
2598 res = (a >> shift) + round;
2599 return res;
2600}
2601static inline int64_t
2602vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2603{
2604 uint8_t round, shift = b & 0x3f;
2605 int64_t res;
2606
2607 round = get_round(vxrm, a, shift);
2608 res = (a >> shift) + round;
2609 return res;
2610}
9ff3d287 2611
04a61406
LZ
2612RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2613RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2614RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2615RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
3479a814
FC
2616GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2617GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2618GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2619GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
04a61406
LZ
2620
2621RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2622RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2623RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2624RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
3479a814
FC
2625GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2626GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2627GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2628GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
9ff3d287
LZ
2629
2630/* Vector Narrowing Fixed-Point Clip Instructions */
2631static inline int8_t
2632vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2633{
2634 uint8_t round, shift = b & 0xf;
2635 int16_t res;
2636
2637 round = get_round(vxrm, a, shift);
2638 res = (a >> shift) + round;
2639 if (res > INT8_MAX) {
2640 env->vxsat = 0x1;
2641 return INT8_MAX;
2642 } else if (res < INT8_MIN) {
2643 env->vxsat = 0x1;
2644 return INT8_MIN;
2645 } else {
2646 return res;
2647 }
2648}
2649
2650static inline int16_t
2651vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2652{
2653 uint8_t round, shift = b & 0x1f;
2654 int32_t res;
2655
2656 round = get_round(vxrm, a, shift);
2657 res = (a >> shift) + round;
2658 if (res > INT16_MAX) {
2659 env->vxsat = 0x1;
2660 return INT16_MAX;
2661 } else if (res < INT16_MIN) {
2662 env->vxsat = 0x1;
2663 return INT16_MIN;
2664 } else {
2665 return res;
2666 }
2667}
2668
2669static inline int32_t
2670vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2671{
2672 uint8_t round, shift = b & 0x3f;
2673 int64_t res;
2674
2675 round = get_round(vxrm, a, shift);
2676 res = (a >> shift) + round;
2677 if (res > INT32_MAX) {
2678 env->vxsat = 0x1;
2679 return INT32_MAX;
2680 } else if (res < INT32_MIN) {
2681 env->vxsat = 0x1;
2682 return INT32_MIN;
2683 } else {
2684 return res;
2685 }
2686}
2687
a70b3a73
FC
2688RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2689RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2690RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2691GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2692GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2693GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2694
2695RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2696RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2697RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2698GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2699GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2700GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
9ff3d287
LZ
2701
2702static inline uint8_t
2703vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2704{
2705 uint8_t round, shift = b & 0xf;
2706 uint16_t res;
2707
2708 round = get_round(vxrm, a, shift);
2709 res = (a >> shift) + round;
2710 if (res > UINT8_MAX) {
2711 env->vxsat = 0x1;
2712 return UINT8_MAX;
2713 } else {
2714 return res;
2715 }
2716}
2717
2718static inline uint16_t
2719vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2720{
2721 uint8_t round, shift = b & 0x1f;
2722 uint32_t res;
2723
2724 round = get_round(vxrm, a, shift);
2725 res = (a >> shift) + round;
2726 if (res > UINT16_MAX) {
2727 env->vxsat = 0x1;
2728 return UINT16_MAX;
2729 } else {
2730 return res;
2731 }
2732}
2733
2734static inline uint32_t
2735vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2736{
2737 uint8_t round, shift = b & 0x3f;
a70b3a73 2738 uint64_t res;
9ff3d287
LZ
2739
2740 round = get_round(vxrm, a, shift);
2741 res = (a >> shift) + round;
2742 if (res > UINT32_MAX) {
2743 env->vxsat = 0x1;
2744 return UINT32_MAX;
2745 } else {
2746 return res;
2747 }
2748}
2749
a70b3a73
FC
2750RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2751RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2752RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2753GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2754GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2755GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
9ff3d287 2756
a70b3a73
FC
2757RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2758RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2759RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2760GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2761GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2762GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
ce2a0343
LZ
2763
2764/*
2765 *** Vector Float Point Arithmetic Instructions
2766 */
2767/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2768#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2769static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2770 CPURISCVState *env) \
2771{ \
2772 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2773 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2774 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2775}
2776
3479a814 2777#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \
ce2a0343
LZ
2778void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2779 void *vs2, CPURISCVState *env, \
2780 uint32_t desc) \
2781{ \
ce2a0343
LZ
2782 uint32_t vm = vext_vm(desc); \
2783 uint32_t vl = env->vl; \
2784 uint32_t i; \
2785 \
f714361e 2786 for (i = env->vstart; i < vl; i++) { \
f9298de5 2787 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
2788 continue; \
2789 } \
2790 do_##NAME(vd, vs1, vs2, i, env); \
2791 } \
f714361e 2792 env->vstart = 0; \
ce2a0343
LZ
2793}
2794
2795RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2796RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2797RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3479a814
FC
2798GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2799GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2800GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
ce2a0343
LZ
2801
2802#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2803static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2804 CPURISCVState *env) \
2805{ \
2806 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2807 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2808}
2809
3479a814 2810#define GEN_VEXT_VF(NAME, ESZ, DSZ) \
ce2a0343
LZ
2811void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2812 void *vs2, CPURISCVState *env, \
2813 uint32_t desc) \
2814{ \
ce2a0343
LZ
2815 uint32_t vm = vext_vm(desc); \
2816 uint32_t vl = env->vl; \
2817 uint32_t i; \
2818 \
f714361e 2819 for (i = env->vstart; i < vl; i++) { \
f9298de5 2820 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
2821 continue; \
2822 } \
2823 do_##NAME(vd, s1, vs2, i, env); \
2824 } \
f714361e 2825 env->vstart = 0; \
ce2a0343
LZ
2826}
2827
2828RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2829RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2830RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3479a814
FC
2831GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2832GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2833GEN_VEXT_VF(vfadd_vf_d, 8, 8)
ce2a0343
LZ
2834
2835RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2836RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2837RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3479a814
FC
2838GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2839GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2840GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
ce2a0343
LZ
2841RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2842RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2843RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3479a814
FC
2844GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2845GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2846GEN_VEXT_VF(vfsub_vf_d, 8, 8)
ce2a0343
LZ
2847
2848static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2849{
2850 return float16_sub(b, a, s);
2851}
2852
2853static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2854{
2855 return float32_sub(b, a, s);
2856}
2857
2858static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2859{
2860 return float64_sub(b, a, s);
2861}
2862
2863RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2864RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2865RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3479a814
FC
2866GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2867GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2868GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
eeffab2e
LZ
2869
2870/* Vector Widening Floating-Point Add/Subtract Instructions */
2871static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2872{
2873 return float32_add(float16_to_float32(a, true, s),
2874 float16_to_float32(b, true, s), s);
2875}
2876
2877static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2878{
2879 return float64_add(float32_to_float64(a, s),
2880 float32_to_float64(b, s), s);
2881
2882}
2883
2884RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2885RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3479a814
FC
2886GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2887GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
eeffab2e
LZ
2888RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2889RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3479a814
FC
2890GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2891GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
eeffab2e
LZ
2892
2893static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2894{
2895 return float32_sub(float16_to_float32(a, true, s),
2896 float16_to_float32(b, true, s), s);
2897}
2898
2899static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2900{
2901 return float64_sub(float32_to_float64(a, s),
2902 float32_to_float64(b, s), s);
2903
2904}
2905
2906RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2907RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3479a814
FC
2908GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2909GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
eeffab2e
LZ
2910RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2911RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3479a814
FC
2912GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2913GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
eeffab2e
LZ
2914
2915static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2916{
2917 return float32_add(a, float16_to_float32(b, true, s), s);
2918}
2919
2920static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2921{
2922 return float64_add(a, float32_to_float64(b, s), s);
2923}
2924
2925RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2926RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3479a814
FC
2927GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2928GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
eeffab2e
LZ
2929RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2930RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3479a814
FC
2931GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2932GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
eeffab2e
LZ
2933
2934static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2935{
2936 return float32_sub(a, float16_to_float32(b, true, s), s);
2937}
2938
2939static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2940{
2941 return float64_sub(a, float32_to_float64(b, s), s);
2942}
2943
2944RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2945RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3479a814
FC
2946GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2947GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
eeffab2e
LZ
2948RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2949RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3479a814
FC
2950GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2951GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
0e0057cb
LZ
2952
2953/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2954RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2955RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2956RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3479a814
FC
2957GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2958GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2959GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
0e0057cb
LZ
2960RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2961RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2962RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3479a814
FC
2963GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2964GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2965GEN_VEXT_VF(vfmul_vf_d, 8, 8)
0e0057cb
LZ
2966
2967RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2968RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2969RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3479a814
FC
2970GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
2971GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
2972GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
0e0057cb
LZ
2973RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
2974RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
2975RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3479a814
FC
2976GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
2977GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
2978GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
0e0057cb
LZ
2979
2980static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
2981{
2982 return float16_div(b, a, s);
2983}
2984
2985static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
2986{
2987 return float32_div(b, a, s);
2988}
2989
2990static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
2991{
2992 return float64_div(b, a, s);
2993}
2994
2995RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
2996RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
2997RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3479a814
FC
2998GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
2999GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3000GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
f7c7b7cd
LZ
3001
3002/* Vector Widening Floating-Point Multiply */
3003static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3004{
3005 return float32_mul(float16_to_float32(a, true, s),
3006 float16_to_float32(b, true, s), s);
3007}
3008
3009static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3010{
3011 return float64_mul(float32_to_float64(a, s),
3012 float32_to_float64(b, s), s);
3013
3014}
3015RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3016RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3479a814
FC
3017GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3018GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
f7c7b7cd
LZ
3019RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3020RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3479a814
FC
3021GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3022GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
4aa5a8fe
LZ
3023
3024/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3025#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3026static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3027 CPURISCVState *env) \
3028{ \
3029 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3030 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3031 TD d = *((TD *)vd + HD(i)); \
3032 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3033}
3034
3035static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3036{
3037 return float16_muladd(a, b, d, 0, s);
3038}
3039
3040static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3041{
3042 return float32_muladd(a, b, d, 0, s);
3043}
3044
3045static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3046{
3047 return float64_muladd(a, b, d, 0, s);
3048}
3049
3050RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3051RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3052RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3479a814
FC
3053GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3054GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3055GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
4aa5a8fe
LZ
3056
3057#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3058static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3059 CPURISCVState *env) \
3060{ \
3061 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3062 TD d = *((TD *)vd + HD(i)); \
3063 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3064}
3065
3066RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3067RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3068RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3479a814
FC
3069GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3070GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3071GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
4aa5a8fe
LZ
3072
3073static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3074{
3075 return float16_muladd(a, b, d,
3076 float_muladd_negate_c | float_muladd_negate_product, s);
3077}
3078
3079static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3080{
3081 return float32_muladd(a, b, d,
3082 float_muladd_negate_c | float_muladd_negate_product, s);
3083}
3084
3085static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3086{
3087 return float64_muladd(a, b, d,
3088 float_muladd_negate_c | float_muladd_negate_product, s);
3089}
3090
3091RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3092RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3093RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3479a814
FC
3094GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3095GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3096GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
4aa5a8fe
LZ
3097RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3098RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3099RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3479a814
FC
3100GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3101GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3102GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
4aa5a8fe
LZ
3103
3104static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3105{
3106 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3107}
3108
3109static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3110{
3111 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3112}
3113
3114static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3115{
3116 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3117}
3118
3119RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3120RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3121RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3479a814
FC
3122GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3123GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3124GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
4aa5a8fe
LZ
3125RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3126RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3127RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3479a814
FC
3128GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3129GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3130GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
4aa5a8fe
LZ
3131
3132static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3133{
3134 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3135}
3136
3137static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3138{
3139 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3140}
3141
3142static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3143{
3144 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3145}
3146
3147RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3148RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3149RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3479a814
FC
3150GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3151GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3152GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
4aa5a8fe
LZ
3153RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3154RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3155RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3479a814
FC
3156GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3157GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3158GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
4aa5a8fe
LZ
3159
3160static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3161{
3162 return float16_muladd(d, b, a, 0, s);
3163}
3164
3165static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3166{
3167 return float32_muladd(d, b, a, 0, s);
3168}
3169
3170static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3171{
3172 return float64_muladd(d, b, a, 0, s);
3173}
3174
3175RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3176RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3177RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3479a814
FC
3178GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3179GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3180GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
4aa5a8fe
LZ
3181RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3182RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3183RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3479a814
FC
3184GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3185GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3186GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
4aa5a8fe
LZ
3187
3188static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3189{
3190 return float16_muladd(d, b, a,
3191 float_muladd_negate_c | float_muladd_negate_product, s);
3192}
3193
3194static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3195{
3196 return float32_muladd(d, b, a,
3197 float_muladd_negate_c | float_muladd_negate_product, s);
3198}
3199
3200static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3201{
3202 return float64_muladd(d, b, a,
3203 float_muladd_negate_c | float_muladd_negate_product, s);
3204}
3205
3206RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3207RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3208RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3479a814
FC
3209GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3210GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3211GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
4aa5a8fe
LZ
3212RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3213RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3214RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3479a814
FC
3215GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3216GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3217GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
4aa5a8fe
LZ
3218
3219static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3220{
3221 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3222}
3223
3224static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3225{
3226 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3227}
3228
3229static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3230{
3231 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3232}
3233
3234RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3235RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3236RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3479a814
FC
3237GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3238GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3239GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
4aa5a8fe
LZ
3240RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3241RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3242RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3479a814
FC
3243GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3244GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3245GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
4aa5a8fe
LZ
3246
3247static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3248{
3249 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3250}
3251
3252static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3253{
3254 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3255}
3256
3257static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3258{
3259 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3260}
3261
3262RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3263RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3264RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3479a814
FC
3265GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3266GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3267GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
4aa5a8fe
LZ
3268RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3269RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3270RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3479a814
FC
3271GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3272GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3273GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
0dd50959
LZ
3274
3275/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3276static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3277{
3278 return float32_muladd(float16_to_float32(a, true, s),
3279 float16_to_float32(b, true, s), d, 0, s);
3280}
3281
3282static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3283{
3284 return float64_muladd(float32_to_float64(a, s),
3285 float32_to_float64(b, s), d, 0, s);
3286}
3287
3288RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3289RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3479a814
FC
3290GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3291GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
0dd50959
LZ
3292RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3293RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3479a814
FC
3294GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3295GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
0dd50959
LZ
3296
3297static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3298{
3299 return float32_muladd(float16_to_float32(a, true, s),
3300 float16_to_float32(b, true, s), d,
3301 float_muladd_negate_c | float_muladd_negate_product, s);
3302}
3303
3304static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3305{
3306 return float64_muladd(float32_to_float64(a, s),
3307 float32_to_float64(b, s), d,
3308 float_muladd_negate_c | float_muladd_negate_product, s);
3309}
3310
3311RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3312RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3479a814
FC
3313GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3314GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
0dd50959
LZ
3315RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3316RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3479a814
FC
3317GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3318GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
0dd50959
LZ
3319
3320static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3321{
3322 return float32_muladd(float16_to_float32(a, true, s),
3323 float16_to_float32(b, true, s), d,
3324 float_muladd_negate_c, s);
3325}
3326
3327static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3328{
3329 return float64_muladd(float32_to_float64(a, s),
3330 float32_to_float64(b, s), d,
3331 float_muladd_negate_c, s);
3332}
3333
3334RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3335RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3479a814
FC
3336GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3337GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
0dd50959
LZ
3338RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3339RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3479a814
FC
3340GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3341GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
0dd50959
LZ
3342
3343static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3344{
3345 return float32_muladd(float16_to_float32(a, true, s),
3346 float16_to_float32(b, true, s), d,
3347 float_muladd_negate_product, s);
3348}
3349
3350static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3351{
3352 return float64_muladd(float32_to_float64(a, s),
3353 float32_to_float64(b, s), d,
3354 float_muladd_negate_product, s);
3355}
3356
3357RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3358RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3479a814
FC
3359GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3360GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
0dd50959
LZ
3361RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3362RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3479a814
FC
3363GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3364GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
d9e4ce72
LZ
3365
3366/* Vector Floating-Point Square-Root Instruction */
3367/* (TD, T2, TX2) */
3368#define OP_UU_H uint16_t, uint16_t, uint16_t
3369#define OP_UU_W uint32_t, uint32_t, uint32_t
3370#define OP_UU_D uint64_t, uint64_t, uint64_t
3371
3372#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3373static void do_##NAME(void *vd, void *vs2, int i, \
3374 CPURISCVState *env) \
3375{ \
3376 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3377 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3378}
3379
3479a814 3380#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \
d9e4ce72
LZ
3381void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3382 CPURISCVState *env, uint32_t desc) \
3383{ \
d9e4ce72
LZ
3384 uint32_t vm = vext_vm(desc); \
3385 uint32_t vl = env->vl; \
3386 uint32_t i; \
3387 \
3388 if (vl == 0) { \
3389 return; \
3390 } \
f714361e 3391 for (i = env->vstart; i < vl; i++) { \
f9298de5 3392 if (!vm && !vext_elem_mask(v0, i)) { \
d9e4ce72
LZ
3393 continue; \
3394 } \
3395 do_##NAME(vd, vs2, i, env); \
3396 } \
f714361e 3397 env->vstart = 0; \
d9e4ce72
LZ
3398}
3399
3400RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3401RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3402RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3479a814
FC
3403GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3404GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3405GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
230b53dd
LZ
3406
3407/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
3408RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3409RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3410RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3479a814
FC
3411GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3412GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3413GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
49c5611a
FC
3414RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3415RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3416RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3479a814
FC
3417GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3418GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3419GEN_VEXT_VF(vfmin_vf_d, 8, 8)
230b53dd 3420
49c5611a
FC
3421RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3422RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3423RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3479a814
FC
3424GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3425GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3426GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
49c5611a
FC
3427RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3428RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3429RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3479a814
FC
3430GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3431GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3432GEN_VEXT_VF(vfmax_vf_d, 8, 8)
1d426b81
LZ
3433
3434/* Vector Floating-Point Sign-Injection Instructions */
3435static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3436{
3437 return deposit64(b, 0, 15, a);
3438}
3439
3440static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3441{
3442 return deposit64(b, 0, 31, a);
3443}
3444
3445static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3446{
3447 return deposit64(b, 0, 63, a);
3448}
3449
3450RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3451RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3452RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3479a814
FC
3453GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3454GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3455GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
1d426b81
LZ
3456RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3457RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3458RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3479a814
FC
3459GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3460GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3461GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
1d426b81
LZ
3462
3463static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3464{
3465 return deposit64(~b, 0, 15, a);
3466}
3467
3468static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3469{
3470 return deposit64(~b, 0, 31, a);
3471}
3472
3473static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3474{
3475 return deposit64(~b, 0, 63, a);
3476}
3477
3478RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3479RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3480RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3479a814
FC
3481GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3482GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3483GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
1d426b81
LZ
3484RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3485RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3486RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3479a814
FC
3487GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3488GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3489GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
1d426b81
LZ
3490
3491static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3492{
3493 return deposit64(b ^ a, 0, 15, a);
3494}
3495
3496static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3497{
3498 return deposit64(b ^ a, 0, 31, a);
3499}
3500
3501static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3502{
3503 return deposit64(b ^ a, 0, 63, a);
3504}
3505
3506RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3507RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3508RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3479a814
FC
3509GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3510GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3511GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
1d426b81
LZ
3512RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3513RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3514RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3479a814
FC
3515GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3516GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3517GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
2a68e9e5
LZ
3518
3519/* Vector Floating-Point Compare Instructions */
3520#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
3521void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
3522 CPURISCVState *env, uint32_t desc) \
3523{ \
2a68e9e5
LZ
3524 uint32_t vm = vext_vm(desc); \
3525 uint32_t vl = env->vl; \
2a68e9e5
LZ
3526 uint32_t i; \
3527 \
f714361e 3528 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
3529 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
3530 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 3531 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
3532 continue; \
3533 } \
f9298de5 3534 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
3535 DO_OP(s2, s1, &env->fp_status)); \
3536 } \
f714361e 3537 env->vstart = 0; \
2a68e9e5
LZ
3538}
3539
2a68e9e5
LZ
3540GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3541GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3542GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3543
3544#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
3545void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3546 CPURISCVState *env, uint32_t desc) \
3547{ \
2a68e9e5
LZ
3548 uint32_t vm = vext_vm(desc); \
3549 uint32_t vl = env->vl; \
2a68e9e5
LZ
3550 uint32_t i; \
3551 \
f714361e 3552 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 3553 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 3554 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
3555 continue; \
3556 } \
f9298de5 3557 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
3558 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
3559 } \
f714361e 3560 env->vstart = 0; \
2a68e9e5
LZ
3561}
3562
3563GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3564GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3565GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3566
3567static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3568{
3569 FloatRelation compare = float16_compare_quiet(a, b, s);
3570 return compare != float_relation_equal;
3571}
3572
3573static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3574{
3575 FloatRelation compare = float32_compare_quiet(a, b, s);
3576 return compare != float_relation_equal;
3577}
3578
3579static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3580{
3581 FloatRelation compare = float64_compare_quiet(a, b, s);
3582 return compare != float_relation_equal;
3583}
3584
3585GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3586GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3587GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3588GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3589GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3590GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3591
2a68e9e5
LZ
3592GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3593GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3594GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3595GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3596GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3597GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3598
2a68e9e5
LZ
3599GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3600GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3601GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3602GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3603GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3604GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
3605
3606static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
3607{
3608 FloatRelation compare = float16_compare(a, b, s);
3609 return compare == float_relation_greater;
3610}
3611
3612static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
3613{
3614 FloatRelation compare = float32_compare(a, b, s);
3615 return compare == float_relation_greater;
3616}
3617
3618static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
3619{
3620 FloatRelation compare = float64_compare(a, b, s);
3621 return compare == float_relation_greater;
3622}
3623
3624GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
3625GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
3626GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
3627
3628static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
3629{
3630 FloatRelation compare = float16_compare(a, b, s);
3631 return compare == float_relation_greater ||
3632 compare == float_relation_equal;
3633}
3634
3635static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
3636{
3637 FloatRelation compare = float32_compare(a, b, s);
3638 return compare == float_relation_greater ||
3639 compare == float_relation_equal;
3640}
3641
3642static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
3643{
3644 FloatRelation compare = float64_compare(a, b, s);
3645 return compare == float_relation_greater ||
3646 compare == float_relation_equal;
3647}
3648
3649GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
3650GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
3651GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
3652
121ddbb3
LZ
3653/* Vector Floating-Point Classify Instruction */
3654#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3655static void do_##NAME(void *vd, void *vs2, int i) \
3656{ \
3657 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3658 *((TD *)vd + HD(i)) = OP(s2); \
3659}
3660
3479a814 3661#define GEN_VEXT_V(NAME, ESZ, DSZ) \
121ddbb3
LZ
3662void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3663 CPURISCVState *env, uint32_t desc) \
3664{ \
121ddbb3
LZ
3665 uint32_t vm = vext_vm(desc); \
3666 uint32_t vl = env->vl; \
3667 uint32_t i; \
3668 \
f714361e 3669 for (i = env->vstart; i < vl; i++) { \
f9298de5 3670 if (!vm && !vext_elem_mask(v0, i)) { \
121ddbb3
LZ
3671 continue; \
3672 } \
3673 do_##NAME(vd, vs2, i); \
3674 } \
f714361e 3675 env->vstart = 0; \
121ddbb3
LZ
3676}
3677
3678target_ulong fclass_h(uint64_t frs1)
3679{
3680 float16 f = frs1;
3681 bool sign = float16_is_neg(f);
3682
3683 if (float16_is_infinity(f)) {
3684 return sign ? 1 << 0 : 1 << 7;
3685 } else if (float16_is_zero(f)) {
3686 return sign ? 1 << 3 : 1 << 4;
3687 } else if (float16_is_zero_or_denormal(f)) {
3688 return sign ? 1 << 2 : 1 << 5;
3689 } else if (float16_is_any_nan(f)) {
3690 float_status s = { }; /* for snan_bit_is_one */
3691 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3692 } else {
3693 return sign ? 1 << 1 : 1 << 6;
3694 }
3695}
3696
3697target_ulong fclass_s(uint64_t frs1)
3698{
3699 float32 f = frs1;
3700 bool sign = float32_is_neg(f);
3701
3702 if (float32_is_infinity(f)) {
3703 return sign ? 1 << 0 : 1 << 7;
3704 } else if (float32_is_zero(f)) {
3705 return sign ? 1 << 3 : 1 << 4;
3706 } else if (float32_is_zero_or_denormal(f)) {
3707 return sign ? 1 << 2 : 1 << 5;
3708 } else if (float32_is_any_nan(f)) {
3709 float_status s = { }; /* for snan_bit_is_one */
3710 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3711 } else {
3712 return sign ? 1 << 1 : 1 << 6;
3713 }
3714}
3715
3716target_ulong fclass_d(uint64_t frs1)
3717{
3718 float64 f = frs1;
3719 bool sign = float64_is_neg(f);
3720
3721 if (float64_is_infinity(f)) {
3722 return sign ? 1 << 0 : 1 << 7;
3723 } else if (float64_is_zero(f)) {
3724 return sign ? 1 << 3 : 1 << 4;
3725 } else if (float64_is_zero_or_denormal(f)) {
3726 return sign ? 1 << 2 : 1 << 5;
3727 } else if (float64_is_any_nan(f)) {
3728 float_status s = { }; /* for snan_bit_is_one */
3729 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3730 } else {
3731 return sign ? 1 << 1 : 1 << 6;
3732 }
3733}
3734
3735RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
3736RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
3737RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
3479a814
FC
3738GEN_VEXT_V(vfclass_v_h, 2, 2)
3739GEN_VEXT_V(vfclass_v_w, 4, 4)
3740GEN_VEXT_V(vfclass_v_d, 8, 8)
64ab5846
LZ
3741
3742/* Vector Floating-Point Merge Instruction */
3479a814 3743#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
3744void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3745 CPURISCVState *env, uint32_t desc) \
3746{ \
64ab5846
LZ
3747 uint32_t vm = vext_vm(desc); \
3748 uint32_t vl = env->vl; \
64ab5846
LZ
3749 uint32_t i; \
3750 \
f714361e 3751 for (i = env->vstart; i < vl; i++) { \
64ab5846
LZ
3752 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
3753 *((ETYPE *)vd + H(i)) \
f9298de5 3754 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 3755 } \
f714361e 3756 env->vstart = 0; \
64ab5846
LZ
3757}
3758
3479a814
FC
3759GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
3760GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
3761GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
3762
3763/* Single-Width Floating-Point/Integer Type-Convert Instructions */
3764/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3765RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
3766RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
3767RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
3479a814
FC
3768GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
3769GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
3770GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
92100973
LZ
3771
3772/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
3773RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
3774RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
3775RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
3479a814
FC
3776GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
3777GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
3778GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
92100973
LZ
3779
3780/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
3781RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
3782RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
3783RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
3479a814
FC
3784GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
3785GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
3786GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
92100973
LZ
3787
3788/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
3789RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
3790RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
3791RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
3479a814
FC
3792GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
3793GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
3794GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4514b7b1
LZ
3795
3796/* Widening Floating-Point/Integer Type-Convert Instructions */
3797/* (TD, T2, TX2) */
3ce4c09d 3798#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
3799#define WOP_UU_H uint32_t, uint16_t, uint16_t
3800#define WOP_UU_W uint64_t, uint32_t, uint32_t
3801/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
3802RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
3803RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
3479a814
FC
3804GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
3805GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4514b7b1
LZ
3806
3807/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
3808RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
3809RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
3479a814
FC
3810GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
3811GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4514b7b1
LZ
3812
3813/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3ce4c09d 3814RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
3815RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
3816RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
3ce4c09d 3817GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
3479a814
FC
3818GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
3819GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4514b7b1
LZ
3820
3821/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 3822RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
3823RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
3824RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
3ce4c09d 3825GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
3479a814
FC
3826GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
3827GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4514b7b1
LZ
3828
3829/*
3ce4c09d 3830 * vfwcvt.f.f.v vd, vs2, vm
4514b7b1
LZ
3831 * Convert single-width float to double-width float.
3832 */
3833static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
3834{
3835 return float16_to_float32(a, true, s);
3836}
3837
3838RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
3839RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
3479a814
FC
3840GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
3841GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
878d406e
LZ
3842
3843/* Narrowing Floating-Point/Integer Type-Convert Instructions */
3844/* (TD, T2, TX2) */
ff679b58 3845#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
3846#define NOP_UU_H uint16_t, uint32_t, uint32_t
3847#define NOP_UU_W uint32_t, uint64_t, uint64_t
3848/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
3849RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
3850RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
3851RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
3852GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
3853GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
3854GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
878d406e
LZ
3855
3856/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
3857RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
3858RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
3859RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
3860GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
3861GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
3862GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
878d406e
LZ
3863
3864/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
ff679b58
FC
3865RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
3866RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
3867GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
3868GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
878d406e
LZ
3869
3870/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
3871RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
3872RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
3873GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
3874GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
878d406e
LZ
3875
3876/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
3877static uint16_t vfncvtffv16(uint32_t a, float_status *s)
3878{
3879 return float32_to_float16(a, true, s);
3880}
3881
ff679b58
FC
3882RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
3883RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
3884GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
3885GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
fe5c9ab1
LZ
3886
3887/*
3888 *** Vector Reduction Operations
3889 */
3890/* Vector Single-Width Integer Reduction Instructions */
3479a814 3891#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1
LZ
3892void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3893 void *vs2, CPURISCVState *env, uint32_t desc) \
3894{ \
fe5c9ab1
LZ
3895 uint32_t vm = vext_vm(desc); \
3896 uint32_t vl = env->vl; \
3897 uint32_t i; \
fe5c9ab1
LZ
3898 TD s1 = *((TD *)vs1 + HD(0)); \
3899 \
f714361e 3900 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 3901 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 3902 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
3903 continue; \
3904 } \
3905 s1 = OP(s1, (TD)s2); \
3906 } \
3907 *((TD *)vd + HD(0)) = s1; \
f714361e 3908 env->vstart = 0; \
fe5c9ab1
LZ
3909}
3910
3911/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
3912GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
3913GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
3914GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
3915GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
3916
3917/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
3918GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
3919GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
3920GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
3921GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
3922
3923/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
3924GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
3925GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
3926GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
3927GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
3928
3929/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
3930GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
3931GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
3932GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
3933GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
3934
3935/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
3936GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
3937GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
3938GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
3939GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
3940
3941/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
3942GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
3943GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
3944GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
3945GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
3946
3947/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
3948GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
3949GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
3950GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
3951GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
3952
3953/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
3954GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
3955GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
3956GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
3957GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
3958
3959/* Vector Widening Integer Reduction Instructions */
3960/* signed sum reduction into double-width accumulator */
3479a814
FC
3961GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
3962GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
3963GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
3964
3965/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
3966GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
3967GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
3968GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
3969
3970/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 3971#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
3972void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3973 void *vs2, CPURISCVState *env, \
3974 uint32_t desc) \
3975{ \
523547f1
LZ
3976 uint32_t vm = vext_vm(desc); \
3977 uint32_t vl = env->vl; \
3978 uint32_t i; \
523547f1
LZ
3979 TD s1 = *((TD *)vs1 + HD(0)); \
3980 \
f714361e 3981 for (i = env->vstart; i < vl; i++) { \
523547f1 3982 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 3983 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
3984 continue; \
3985 } \
3986 s1 = OP(s1, (TD)s2, &env->fp_status); \
3987 } \
3988 *((TD *)vd + HD(0)) = s1; \
f714361e 3989 env->vstart = 0; \
523547f1
LZ
3990}
3991
3992/* Unordered sum */
3479a814
FC
3993GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
3994GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
3995GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
3996
3997/* Maximum value */
08b60eeb
FC
3998GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
3999GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4000GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
523547f1
LZ
4001
4002/* Minimum value */
08b60eeb
FC
4003GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4004GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4005GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
696b0c26
LZ
4006
4007/* Vector Widening Floating-Point Reduction Instructions */
4008/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4009void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4010 void *vs2, CPURISCVState *env, uint32_t desc)
4011{
696b0c26
LZ
4012 uint32_t vm = vext_vm(desc);
4013 uint32_t vl = env->vl;
4014 uint32_t i;
696b0c26
LZ
4015 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4016
f714361e 4017 for (i = env->vstart; i < vl; i++) {
696b0c26 4018 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
f9298de5 4019 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4020 continue;
4021 }
4022 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4023 &env->fp_status);
4024 }
4025 *((uint32_t *)vd + H4(0)) = s1;
f714361e 4026 env->vstart = 0;
696b0c26
LZ
4027}
4028
4029void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4030 void *vs2, CPURISCVState *env, uint32_t desc)
4031{
696b0c26
LZ
4032 uint32_t vm = vext_vm(desc);
4033 uint32_t vl = env->vl;
4034 uint32_t i;
696b0c26
LZ
4035 uint64_t s1 = *((uint64_t *)vs1);
4036
f714361e 4037 for (i = env->vstart; i < vl; i++) {
696b0c26 4038 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
f9298de5 4039 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4040 continue;
4041 }
4042 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4043 &env->fp_status);
4044 }
4045 *((uint64_t *)vd) = s1;
f714361e 4046 env->vstart = 0;
696b0c26 4047}
c21f34ae
LZ
4048
4049/*
4050 *** Vector Mask Operations
4051 */
4052/* Vector Mask-Register Logical Instructions */
4053#define GEN_VEXT_MASK_VV(NAME, OP) \
4054void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4055 void *vs2, CPURISCVState *env, \
4056 uint32_t desc) \
4057{ \
c21f34ae
LZ
4058 uint32_t vl = env->vl; \
4059 uint32_t i; \
4060 int a, b; \
4061 \
f714361e 4062 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4063 a = vext_elem_mask(vs1, i); \
4064 b = vext_elem_mask(vs2, i); \
4065 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4066 } \
f714361e 4067 env->vstart = 0; \
c21f34ae
LZ
4068}
4069
4070#define DO_NAND(N, M) (!(N & M))
4071#define DO_ANDNOT(N, M) (N & !M)
4072#define DO_NOR(N, M) (!(N | M))
4073#define DO_ORNOT(N, M) (N | !M)
4074#define DO_XNOR(N, M) (!(N ^ M))
4075
4076GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4077GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4078GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4079GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4080GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4081GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4082GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4083GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4084
0014aa74
FC
4085/* Vector count population in mask vcpop */
4086target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4087 uint32_t desc)
2e88f551
LZ
4088{
4089 target_ulong cnt = 0;
2e88f551
LZ
4090 uint32_t vm = vext_vm(desc);
4091 uint32_t vl = env->vl;
4092 int i;
4093
f714361e 4094 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4095 if (vm || vext_elem_mask(v0, i)) {
4096 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4097 cnt++;
4098 }
4099 }
4100 }
f714361e 4101 env->vstart = 0;
2e88f551
LZ
4102 return cnt;
4103}
0db67e1c 4104
d71a24fc
FC
4105/* vfirst find-first-set mask bit*/
4106target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4107 uint32_t desc)
0db67e1c 4108{
0db67e1c
LZ
4109 uint32_t vm = vext_vm(desc);
4110 uint32_t vl = env->vl;
4111 int i;
4112
f714361e 4113 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4114 if (vm || vext_elem_mask(v0, i)) {
4115 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4116 return i;
4117 }
4118 }
4119 }
f714361e 4120 env->vstart = 0;
0db67e1c
LZ
4121 return -1LL;
4122}
81fbf7da
LZ
4123
4124enum set_mask_type {
4125 ONLY_FIRST = 1,
4126 INCLUDE_FIRST,
4127 BEFORE_FIRST,
4128};
4129
4130static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4131 uint32_t desc, enum set_mask_type type)
4132{
81fbf7da
LZ
4133 uint32_t vm = vext_vm(desc);
4134 uint32_t vl = env->vl;
4135 int i;
4136 bool first_mask_bit = false;
4137
f714361e 4138 for (i = env->vstart; i < vl; i++) {
f9298de5 4139 if (!vm && !vext_elem_mask(v0, i)) {
81fbf7da
LZ
4140 continue;
4141 }
4142 /* write a zero to all following active elements */
4143 if (first_mask_bit) {
f9298de5 4144 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4145 continue;
4146 }
f9298de5 4147 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4148 first_mask_bit = true;
4149 if (type == BEFORE_FIRST) {
f9298de5 4150 vext_set_elem_mask(vd, i, 0);
81fbf7da 4151 } else {
f9298de5 4152 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4153 }
4154 } else {
4155 if (type == ONLY_FIRST) {
f9298de5 4156 vext_set_elem_mask(vd, i, 0);
81fbf7da 4157 } else {
f9298de5 4158 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4159 }
4160 }
4161 }
f714361e 4162 env->vstart = 0;
81fbf7da
LZ
4163}
4164
4165void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4166 uint32_t desc)
4167{
4168 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4169}
4170
4171void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4172 uint32_t desc)
4173{
4174 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4175}
4176
4177void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4178 uint32_t desc)
4179{
4180 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4181}
78d90cfe
LZ
4182
4183/* Vector Iota Instruction */
3479a814 4184#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4185void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4186 uint32_t desc) \
4187{ \
78d90cfe
LZ
4188 uint32_t vm = vext_vm(desc); \
4189 uint32_t vl = env->vl; \
4190 uint32_t sum = 0; \
4191 int i; \
4192 \
f714361e 4193 for (i = env->vstart; i < vl; i++) { \
f9298de5 4194 if (!vm && !vext_elem_mask(v0, i)) { \
78d90cfe
LZ
4195 continue; \
4196 } \
4197 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4198 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4199 sum++; \
4200 } \
4201 } \
f714361e 4202 env->vstart = 0; \
78d90cfe
LZ
4203}
4204
3479a814
FC
4205GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4206GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4207GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4208GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4209
4210/* Vector Element Index Instruction */
3479a814 4211#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4212void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4213{ \
126bec3f
LZ
4214 uint32_t vm = vext_vm(desc); \
4215 uint32_t vl = env->vl; \
4216 int i; \
4217 \
f714361e 4218 for (i = env->vstart; i < vl; i++) { \
f9298de5 4219 if (!vm && !vext_elem_mask(v0, i)) { \
126bec3f
LZ
4220 continue; \
4221 } \
4222 *((ETYPE *)vd + H(i)) = i; \
4223 } \
f714361e 4224 env->vstart = 0; \
126bec3f
LZ
4225}
4226
3479a814
FC
4227GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4228GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4229GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4230GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4231
4232/*
4233 *** Vector Permutation Instructions
4234 */
4235
4236/* Vector Slide Instructions */
3479a814 4237#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4238void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4239 CPURISCVState *env, uint32_t desc) \
4240{ \
ec17e036
LZ
4241 uint32_t vm = vext_vm(desc); \
4242 uint32_t vl = env->vl; \
f714361e 4243 target_ulong offset = s1, i_min, i; \
ec17e036 4244 \
f714361e
FC
4245 i_min = MAX(env->vstart, offset); \
4246 for (i = i_min; i < vl; i++) { \
f9298de5 4247 if (!vm && !vext_elem_mask(v0, i)) { \
ec17e036
LZ
4248 continue; \
4249 } \
4250 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4251 } \
ec17e036
LZ
4252}
4253
4254/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4255GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4256GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4257GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4258GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4259
3479a814 4260#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4261void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4262 CPURISCVState *env, uint32_t desc) \
4263{ \
6438ed61 4264 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4265 uint32_t vm = vext_vm(desc); \
4266 uint32_t vl = env->vl; \
6438ed61 4267 target_ulong i_max, i; \
ec17e036 4268 \
f714361e
FC
4269 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4270 for (i = env->vstart; i < i_max; ++i) { \
6438ed61
FC
4271 if (vm || vext_elem_mask(v0, i)) { \
4272 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4273 } \
4274 } \
4275 \
4276 for (i = i_max; i < vl; ++i) { \
4277 if (vm || vext_elem_mask(v0, i)) { \
4278 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4279 } \
ec17e036 4280 } \
f714361e
FC
4281 \
4282 env->vstart = 0; \
ec17e036
LZ
4283}
4284
4285/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4286GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4287GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4288GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4289GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4290
8500d4ab
FC
4291#define GEN_VEXT_VSLIE1UP(ESZ, H) \
4292static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4293 CPURISCVState *env, uint32_t desc) \
4294{ \
4295 typedef uint##ESZ##_t ETYPE; \
4296 uint32_t vm = vext_vm(desc); \
4297 uint32_t vl = env->vl; \
4298 uint32_t i; \
4299 \
f714361e 4300 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
4301 if (!vm && !vext_elem_mask(v0, i)) { \
4302 continue; \
4303 } \
4304 if (i == 0) { \
4305 *((ETYPE *)vd + H(i)) = s1; \
4306 } else { \
4307 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4308 } \
4309 } \
f714361e 4310 env->vstart = 0; \
8500d4ab
FC
4311}
4312
4313GEN_VEXT_VSLIE1UP(8, H1)
4314GEN_VEXT_VSLIE1UP(16, H2)
4315GEN_VEXT_VSLIE1UP(32, H4)
4316GEN_VEXT_VSLIE1UP(64, H8)
4317
4318#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \
4319void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4320 CPURISCVState *env, uint32_t desc) \
4321{ \
4322 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4323}
4324
4325/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
4326GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4327GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4328GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4329GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4330
4331#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \
4332static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4333 CPURISCVState *env, uint32_t desc) \
4334{ \
4335 typedef uint##ESZ##_t ETYPE; \
4336 uint32_t vm = vext_vm(desc); \
4337 uint32_t vl = env->vl; \
4338 uint32_t i; \
4339 \
f714361e 4340 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
4341 if (!vm && !vext_elem_mask(v0, i)) { \
4342 continue; \
4343 } \
4344 if (i == vl - 1) { \
4345 *((ETYPE *)vd + H(i)) = s1; \
4346 } else { \
4347 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4348 } \
4349 } \
f714361e 4350 env->vstart = 0; \
8500d4ab
FC
4351}
4352
4353GEN_VEXT_VSLIDE1DOWN(8, H1)
4354GEN_VEXT_VSLIDE1DOWN(16, H2)
4355GEN_VEXT_VSLIDE1DOWN(32, H4)
4356GEN_VEXT_VSLIDE1DOWN(64, H8)
4357
4358#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \
4359void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4360 CPURISCVState *env, uint32_t desc) \
4361{ \
4362 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4363}
4364
4365/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
4366GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4367GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4368GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4369GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4370
4371/* Vector Floating-Point Slide Instructions */
4372#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \
4373void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4374 CPURISCVState *env, uint32_t desc) \
4375{ \
4376 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
4377}
4378
4379/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4380GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4381GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4382GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4383
4384#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \
4385void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4386 CPURISCVState *env, uint32_t desc) \
4387{ \
4388 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
4389}
4390
4391/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4392GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4393GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4394GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
4395
4396/* Vector Register Gather Instruction */
50bfb45b 4397#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
4398void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4399 CPURISCVState *env, uint32_t desc) \
4400{ \
f714361e 4401 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
4402 uint32_t vm = vext_vm(desc); \
4403 uint32_t vl = env->vl; \
b11e84b8
FC
4404 uint64_t index; \
4405 uint32_t i; \
e4b83d5c 4406 \
f714361e 4407 for (i = env->vstart; i < vl; i++) { \
f9298de5 4408 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
4409 continue; \
4410 } \
50bfb45b 4411 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 4412 if (index >= vlmax) { \
50bfb45b 4413 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 4414 } else { \
50bfb45b 4415 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
4416 } \
4417 } \
f714361e 4418 env->vstart = 0; \
e4b83d5c
LZ
4419}
4420
4421/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
4422GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4423GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4424GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4425GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4426
4427GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
4428GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4429GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4430GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 4431
3479a814 4432#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
4433void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4434 CPURISCVState *env, uint32_t desc) \
4435{ \
5a9f8e15 4436 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
4437 uint32_t vm = vext_vm(desc); \
4438 uint32_t vl = env->vl; \
b11e84b8
FC
4439 uint64_t index = s1; \
4440 uint32_t i; \
e4b83d5c 4441 \
f714361e 4442 for (i = env->vstart; i < vl; i++) { \
f9298de5 4443 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
4444 continue; \
4445 } \
4446 if (index >= vlmax) { \
4447 *((ETYPE *)vd + H(i)) = 0; \
4448 } else { \
4449 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
4450 } \
4451 } \
f714361e 4452 env->vstart = 0; \
e4b83d5c
LZ
4453}
4454
4455/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
4456GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
4457GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4458GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4459GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
4460
4461/* Vector Compress Instruction */
3479a814 4462#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
4463void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4464 CPURISCVState *env, uint32_t desc) \
4465{ \
31bf42a2
LZ
4466 uint32_t vl = env->vl; \
4467 uint32_t num = 0, i; \
4468 \
f714361e 4469 for (i = env->vstart; i < vl; i++) { \
f9298de5 4470 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
4471 continue; \
4472 } \
4473 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
4474 num++; \
4475 } \
f714361e 4476 env->vstart = 0; \
31bf42a2
LZ
4477}
4478
4479/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
4480GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
4481GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4482GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4483GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 4484
f714361e
FC
4485/* Vector Whole Register Move */
4486#define GEN_VEXT_VMV_WHOLE(NAME, LEN) \
4487void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4488 uint32_t desc) \
4489{ \
4490 /* EEW = 8 */ \
4491 uint32_t maxsz = simd_maxsz(desc); \
4492 uint32_t i = env->vstart; \
4493 \
4494 memcpy((uint8_t *)vd + H1(i), \
4495 (uint8_t *)vs2 + H1(i), \
4496 maxsz - env->vstart); \
4497 \
4498 env->vstart = 0; \
4499}
4500
4501GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4502GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4503GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4504GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4505
cd01340e
FC
4506/* Vector Integer Extension */
4507#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
4508void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4509 CPURISCVState *env, uint32_t desc) \
4510{ \
4511 uint32_t vl = env->vl; \
4512 uint32_t vm = vext_vm(desc); \
4513 uint32_t i; \
4514 \
f714361e 4515 for (i = env->vstart; i < vl; i++) { \
cd01340e
FC
4516 if (!vm && !vext_elem_mask(v0, i)) { \
4517 continue; \
4518 } \
4519 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
4520 } \
f714361e 4521 env->vstart = 0; \
cd01340e
FC
4522}
4523
4524GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
4525GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4526GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4527GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
4528GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4529GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
4530
4531GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
4532GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4533GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4534GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
4535GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4536GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)