]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
target/riscv: add "set round to odd" rounding mode helper function
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
2b7168fc 21#include "cpu.h"
751538d5 22#include "exec/memop.h"
2b7168fc
LZ
23#include "exec/exec-all.h"
24#include "exec/helper-proto.h"
ce2a0343 25#include "fpu/softfloat.h"
751538d5
LZ
26#include "tcg/tcg-gvec-desc.h"
27#include "internals.h"
2b7168fc
LZ
28#include <math.h>
29
30target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
31 target_ulong s2)
32{
33 int vlmax, vl;
34 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 35 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
36 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
37 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
38 bool vill = FIELD_EX64(s2, VTYPE, VILL);
39 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
40
d9b7609a
FC
41 if (lmul & 4) {
42 /* Fractional LMUL. */
43 if (lmul == 4 ||
44 cpu->cfg.elen >> (8 - lmul) < sew) {
45 vill = true;
46 }
47 }
48
49 if ((sew > cpu->cfg.elen)
50 || vill
51 || (ediv != 0)
52 || (reserved != 0)) {
2b7168fc
LZ
53 /* only set vill bit. */
54 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
55 env->vl = 0;
56 env->vstart = 0;
57 return 0;
58 }
59
60 vlmax = vext_get_vlmax(cpu, s2);
61 if (s1 <= vlmax) {
62 vl = s1;
63 } else {
64 vl = vlmax;
65 }
66 env->vl = vl;
67 env->vtype = s2;
68 env->vstart = 0;
69 return vl;
70}
751538d5
LZ
71
72/*
73 * Note that vector data is stored in host-endian 64-bit chunks,
74 * so addressing units smaller than that needs a host-endian fixup.
75 */
76#ifdef HOST_WORDS_BIGENDIAN
77#define H1(x) ((x) ^ 7)
78#define H1_2(x) ((x) ^ 6)
79#define H1_4(x) ((x) ^ 4)
80#define H2(x) ((x) ^ 3)
81#define H4(x) ((x) ^ 1)
82#define H8(x) ((x))
83#else
84#define H1(x) (x)
85#define H1_2(x) (x)
86#define H1_4(x) (x)
87#define H2(x) (x)
88#define H4(x) (x)
89#define H8(x) (x)
90#endif
91
92static inline uint32_t vext_nf(uint32_t desc)
93{
94 return FIELD_EX32(simd_data(desc), VDATA, NF);
95}
96
751538d5
LZ
97static inline uint32_t vext_vm(uint32_t desc)
98{
99 return FIELD_EX32(simd_data(desc), VDATA, VM);
100}
101
33f1beaf
FC
102/*
103 * Encode LMUL to lmul as following:
104 * LMUL vlmul lmul
105 * 1 000 0
106 * 2 001 1
107 * 4 010 2
108 * 8 011 3
109 * - 100 -
110 * 1/8 101 -3
111 * 1/4 110 -2
112 * 1/2 111 -1
113 */
114static inline int32_t vext_lmul(uint32_t desc)
751538d5 115{
33f1beaf 116 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
117}
118
119/*
5a9f8e15 120 * Get the maximum number of elements can be operated.
751538d5 121 *
5a9f8e15 122 * esz: log2 of element size in bytes.
751538d5 123 */
5a9f8e15 124static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
751538d5 125{
5a9f8e15
FC
126 /*
127 * As simd_desc support at most 256 bytes, the max vlen is 256 bits.
128 * so vlen in bytes (vlenb) is encoded as maxsz.
129 */
130 uint32_t vlenb = simd_maxsz(desc);
131
132 /* Return VLMAX */
133 int scale = vext_lmul(desc) - esz;
134 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
135}
136
137/*
138 * This function checks watchpoint before real load operation.
139 *
140 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
141 * In user mode, there is no watchpoint support now.
142 *
143 * It will trigger an exception if there is no mapping in TLB
144 * and page table walk can't fill the TLB entry. Then the guest
145 * software can return here after process the exception or never return.
146 */
147static void probe_pages(CPURISCVState *env, target_ulong addr,
148 target_ulong len, uintptr_t ra,
149 MMUAccessType access_type)
150{
151 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
152 target_ulong curlen = MIN(pagelen, len);
153
154 probe_access(env, addr, curlen, access_type,
155 cpu_mmu_index(env, false), ra);
156 if (len > curlen) {
157 addr += curlen;
158 curlen = len - curlen;
159 probe_access(env, addr, curlen, access_type,
160 cpu_mmu_index(env, false), ra);
161 }
162}
163
f9298de5
FC
164static inline void vext_set_elem_mask(void *v0, int index,
165 uint8_t value)
3a6f8f68 166{
f9298de5
FC
167 int idx = index / 64;
168 int pos = index % 64;
3a6f8f68 169 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 170 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 171}
751538d5 172
f9298de5
FC
173/*
174 * Earlier designs (pre-0.9) had a varying number of bits
175 * per mask value (MLEN). In the 0.9 design, MLEN=1.
176 * (Section 4.5)
177 */
178static inline int vext_elem_mask(void *v0, int index)
751538d5 179{
f9298de5
FC
180 int idx = index / 64;
181 int pos = index % 64;
751538d5
LZ
182 return (((uint64_t *)v0)[idx] >> pos) & 1;
183}
184
185/* elements operations for load and store */
186typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
187 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 188
79556fb6 189#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
190static void NAME(CPURISCVState *env, abi_ptr addr, \
191 uint32_t idx, void *vd, uintptr_t retaddr)\
192{ \
751538d5 193 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 194 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
195} \
196
79556fb6
FC
197GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
198GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
199GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
200GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
201
202#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
203static void NAME(CPURISCVState *env, abi_ptr addr, \
204 uint32_t idx, void *vd, uintptr_t retaddr)\
205{ \
206 ETYPE data = *((ETYPE *)vd + H(idx)); \
207 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
208}
209
751538d5
LZ
210GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
211GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
212GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
213GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
214
215/*
216 *** stride: access vector element from strided memory
217 */
218static void
219vext_ldst_stride(void *vd, void *v0, target_ulong base,
220 target_ulong stride, CPURISCVState *env,
221 uint32_t desc, uint32_t vm,
3479a814 222 vext_ldst_elem_fn *ldst_elem,
79556fb6 223 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
751538d5
LZ
224{
225 uint32_t i, k;
226 uint32_t nf = vext_nf(desc);
5a9f8e15 227 uint32_t max_elems = vext_max_elems(desc, esz);
751538d5
LZ
228
229 /* probe every access*/
230 for (i = 0; i < env->vl; i++) {
f9298de5 231 if (!vm && !vext_elem_mask(v0, i)) {
751538d5
LZ
232 continue;
233 }
5a9f8e15 234 probe_pages(env, base + stride * i, nf << esz, ra, access_type);
751538d5
LZ
235 }
236 /* do real access */
237 for (i = 0; i < env->vl; i++) {
238 k = 0;
f9298de5 239 if (!vm && !vext_elem_mask(v0, i)) {
751538d5
LZ
240 continue;
241 }
242 while (k < nf) {
5a9f8e15
FC
243 target_ulong addr = base + stride * i + (k << esz);
244 ldst_elem(env, addr, i + k * max_elems, vd, ra);
751538d5
LZ
245 k++;
246 }
247 }
751538d5
LZ
248}
249
79556fb6 250#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
251void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
252 target_ulong stride, CPURISCVState *env, \
253 uint32_t desc) \
254{ \
255 uint32_t vm = vext_vm(desc); \
256 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
5a9f8e15 257 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
258}
259
79556fb6
FC
260GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
261GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
262GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
263GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
264
265#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
266void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
267 target_ulong stride, CPURISCVState *env, \
268 uint32_t desc) \
269{ \
270 uint32_t vm = vext_vm(desc); \
271 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
5a9f8e15 272 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
751538d5
LZ
273}
274
79556fb6
FC
275GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
276GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
277GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
278GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
279
280/*
281 *** unit-stride: access elements stored contiguously in memory
282 */
283
284/* unmasked unit-stride load and store operation*/
285static void
286vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
79556fb6
FC
287 vext_ldst_elem_fn *ldst_elem,
288 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
751538d5
LZ
289{
290 uint32_t i, k;
291 uint32_t nf = vext_nf(desc);
5a9f8e15 292 uint32_t max_elems = vext_max_elems(desc, esz);
751538d5
LZ
293
294 /* probe every access */
5a9f8e15 295 probe_pages(env, base, env->vl * (nf << esz), ra, access_type);
751538d5
LZ
296 /* load bytes from guest memory */
297 for (i = 0; i < env->vl; i++) {
298 k = 0;
299 while (k < nf) {
5a9f8e15
FC
300 target_ulong addr = base + ((i * nf + k) << esz);
301 ldst_elem(env, addr, i + k * max_elems, vd, ra);
751538d5
LZ
302 k++;
303 }
304 }
751538d5
LZ
305}
306
307/*
308 * masked unit-stride load and store operation will be a special case of stride,
309 * stride = NF * sizeof (MTYPE)
310 */
311
79556fb6 312#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
313void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
314 CPURISCVState *env, uint32_t desc) \
315{ \
5a9f8e15 316 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 317 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
5a9f8e15 318 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
319} \
320 \
321void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
322 CPURISCVState *env, uint32_t desc) \
323{ \
3479a814 324 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
5a9f8e15 325 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
751538d5
LZ
326}
327
79556fb6
FC
328GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
329GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
330GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
331GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
332
333#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
751538d5
LZ
334void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
335 CPURISCVState *env, uint32_t desc) \
336{ \
5a9f8e15 337 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 338 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
5a9f8e15 339 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
751538d5
LZ
340} \
341 \
342void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
343 CPURISCVState *env, uint32_t desc) \
344{ \
3479a814 345 vext_ldst_us(vd, base, env, desc, STORE_FN, \
5a9f8e15 346 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
751538d5
LZ
347}
348
79556fb6
FC
349GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
350GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
351GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
352GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e
LZ
353
354/*
355 *** index: access vector element from indexed memory
356 */
357typedef target_ulong vext_get_index_addr(target_ulong base,
358 uint32_t idx, void *vs2);
359
360#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
361static target_ulong NAME(target_ulong base, \
362 uint32_t idx, void *vs2) \
363{ \
364 return (base + *((ETYPE *)vs2 + H(idx))); \
365}
366
83fcd573
FC
367GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
368GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
369GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
370GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
371
372static inline void
373vext_ldst_index(void *vd, void *v0, target_ulong base,
374 void *vs2, CPURISCVState *env, uint32_t desc,
375 vext_get_index_addr get_index_addr,
376 vext_ldst_elem_fn *ldst_elem,
08b9d0ed 377 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
f732560e
LZ
378{
379 uint32_t i, k;
380 uint32_t nf = vext_nf(desc);
381 uint32_t vm = vext_vm(desc);
5a9f8e15 382 uint32_t max_elems = vext_max_elems(desc, esz);
f732560e
LZ
383
384 /* probe every access*/
385 for (i = 0; i < env->vl; i++) {
f9298de5 386 if (!vm && !vext_elem_mask(v0, i)) {
f732560e
LZ
387 continue;
388 }
5a9f8e15 389 probe_pages(env, get_index_addr(base, i, vs2), nf << esz, ra,
f732560e
LZ
390 access_type);
391 }
392 /* load bytes from guest memory */
393 for (i = 0; i < env->vl; i++) {
394 k = 0;
f9298de5 395 if (!vm && !vext_elem_mask(v0, i)) {
f732560e
LZ
396 continue;
397 }
398 while (k < nf) {
5a9f8e15
FC
399 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
400 ldst_elem(env, addr, i + k * max_elems, vd, ra);
f732560e
LZ
401 k++;
402 }
403 }
f732560e
LZ
404}
405
08b9d0ed 406#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
407void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
408 void *vs2, CPURISCVState *env, uint32_t desc) \
409{ \
410 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 411 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
f732560e
LZ
412}
413
08b9d0ed
FC
414GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
415GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
416GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
417GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
418GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
419GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
420GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
421GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
422GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
423GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
424GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
425GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
426GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
427GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
428GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
429GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
430
431#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
432void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
433 void *vs2, CPURISCVState *env, uint32_t desc) \
434{ \
435 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 436 STORE_FN, ctzl(sizeof(ETYPE)), \
f732560e
LZ
437 GETPC(), MMU_DATA_STORE); \
438}
439
08b9d0ed
FC
440GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
441GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
442GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
443GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
444GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
445GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
446GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
447GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
448GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
449GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
450GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
451GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
452GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
453GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
454GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
455GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
456
457/*
458 *** unit-stride fault-only-fisrt load instructions
459 */
460static inline void
461vext_ldff(void *vd, void *v0, target_ulong base,
462 CPURISCVState *env, uint32_t desc,
463 vext_ldst_elem_fn *ldst_elem,
d3e5e2ff 464 uint32_t esz, uintptr_t ra)
022b4ecf
LZ
465{
466 void *host;
467 uint32_t i, k, vl = 0;
022b4ecf
LZ
468 uint32_t nf = vext_nf(desc);
469 uint32_t vm = vext_vm(desc);
5a9f8e15 470 uint32_t max_elems = vext_max_elems(desc, esz);
022b4ecf
LZ
471 target_ulong addr, offset, remain;
472
473 /* probe every access*/
474 for (i = 0; i < env->vl; i++) {
f9298de5 475 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
476 continue;
477 }
5a9f8e15 478 addr = base + i * (nf << esz);
022b4ecf 479 if (i == 0) {
5a9f8e15 480 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
481 } else {
482 /* if it triggers an exception, no need to check watchpoint */
5a9f8e15 483 remain = nf << esz;
022b4ecf
LZ
484 while (remain > 0) {
485 offset = -(addr | TARGET_PAGE_MASK);
486 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
487 cpu_mmu_index(env, false));
488 if (host) {
489#ifdef CONFIG_USER_ONLY
5a9f8e15 490 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) {
022b4ecf
LZ
491 vl = i;
492 goto ProbeSuccess;
493 }
494#else
5a9f8e15 495 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
496#endif
497 } else {
498 vl = i;
499 goto ProbeSuccess;
500 }
501 if (remain <= offset) {
502 break;
503 }
504 remain -= offset;
505 addr += offset;
506 }
507 }
508 }
509ProbeSuccess:
510 /* load bytes from guest memory */
511 if (vl != 0) {
512 env->vl = vl;
513 }
514 for (i = 0; i < env->vl; i++) {
515 k = 0;
f9298de5 516 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
517 continue;
518 }
519 while (k < nf) {
5a9f8e15
FC
520 target_ulong addr = base + ((i * nf + k) << esz);
521 ldst_elem(env, addr, i + k * max_elems, vd, ra);
022b4ecf
LZ
522 k++;
523 }
524 }
022b4ecf
LZ
525}
526
d3e5e2ff
FC
527#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
528void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
529 CPURISCVState *env, uint32_t desc) \
530{ \
531 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 532 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
533}
534
d3e5e2ff
FC
535GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
536GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
537GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
538GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 539
268fcca6
LZ
540#define DO_SWAP(N, M) (M)
541#define DO_AND(N, M) (N & M)
542#define DO_XOR(N, M) (N ^ M)
543#define DO_OR(N, M) (N | M)
544#define DO_ADD(N, M) (N + M)
545
268fcca6
LZ
546/* Signed min/max */
547#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
548#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
549
550/* Unsigned min/max */
551#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
552#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
553
30206bd8
FC
554/*
555 *** load and store whole register instructions
556 */
557static void
558vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
559 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
560 MMUAccessType access_type)
561{
562 uint32_t i, k;
563 uint32_t nf = vext_nf(desc);
564 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
565 uint32_t max_elems = vlenb >> esz;
566
567 /* probe every access */
568 probe_pages(env, base, vlenb * nf, ra, access_type);
569
570 /* load bytes from guest memory */
571 for (k = 0; k < nf; k++) {
572 for (i = 0; i < max_elems; i++) {
573 target_ulong addr = base + ((i + k * max_elems) << esz);
574 ldst_elem(env, addr, i + k * max_elems, vd, ra);
575 }
576 }
577}
578
579#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
580void HELPER(NAME)(void *vd, target_ulong base, \
581 CPURISCVState *env, uint32_t desc) \
582{ \
583 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
584 ctzl(sizeof(ETYPE)), GETPC(), \
585 MMU_DATA_LOAD); \
586}
587
588GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
589GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
590GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
591GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
592GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
593GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
594GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
595GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
596GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
597GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
598GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
599GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
600GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
601GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
602GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
603GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
604
605#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
606void HELPER(NAME)(void *vd, target_ulong base, \
607 CPURISCVState *env, uint32_t desc) \
608{ \
609 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
610 ctzl(sizeof(ETYPE)), GETPC(), \
611 MMU_DATA_STORE); \
612}
613
614GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
615GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
616GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
617GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
618
43740e3a
LZ
619/*
620 *** Vector Integer Arithmetic Instructions
621 */
622
623/* expand macro args before macro */
624#define RVVCALL(macro, ...) macro(__VA_ARGS__)
625
626/* (TD, T1, T2, TX1, TX2) */
627#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
628#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
629#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
630#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
631#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
632#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
633#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
634#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
635#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
636#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
637#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
638#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
639#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
640#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
641#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
642#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
643#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
644#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
645#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
646#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
647#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
648#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
649#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
650#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
651#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
652#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
653#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
654#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
655#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
656#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
657
658/* operation of two vector elements */
659typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
660
661#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
662static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
663{ \
664 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
665 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
666 *((TD *)vd + HD(i)) = OP(s2, s1); \
667}
668#define DO_SUB(N, M) (N - M)
669#define DO_RSUB(N, M) (M - N)
670
671RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
672RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
673RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
674RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
675RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
676RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
677RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
678RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
679
680static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
681 CPURISCVState *env, uint32_t desc,
682 uint32_t esz, uint32_t dsz,
3479a814 683 opivv2_fn *fn)
43740e3a 684{
43740e3a
LZ
685 uint32_t vm = vext_vm(desc);
686 uint32_t vl = env->vl;
687 uint32_t i;
688
689 for (i = 0; i < vl; i++) {
f9298de5 690 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
691 continue;
692 }
693 fn(vd, vs1, vs2, i);
694 }
43740e3a
LZ
695}
696
697/* generate the helpers for OPIVV */
3479a814 698#define GEN_VEXT_VV(NAME, ESZ, DSZ) \
43740e3a
LZ
699void HELPER(NAME)(void *vd, void *v0, void *vs1, \
700 void *vs2, CPURISCVState *env, \
701 uint32_t desc) \
702{ \
703 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
3479a814 704 do_##NAME); \
43740e3a
LZ
705}
706
3479a814
FC
707GEN_VEXT_VV(vadd_vv_b, 1, 1)
708GEN_VEXT_VV(vadd_vv_h, 2, 2)
709GEN_VEXT_VV(vadd_vv_w, 4, 4)
710GEN_VEXT_VV(vadd_vv_d, 8, 8)
711GEN_VEXT_VV(vsub_vv_b, 1, 1)
712GEN_VEXT_VV(vsub_vv_h, 2, 2)
713GEN_VEXT_VV(vsub_vv_w, 4, 4)
714GEN_VEXT_VV(vsub_vv_d, 8, 8)
43740e3a
LZ
715
716typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
717
718/*
719 * (T1)s1 gives the real operator type.
720 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
721 */
722#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
723static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
724{ \
725 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
726 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
727}
728
729RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
730RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
731RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
732RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
733RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
734RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
735RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
736RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
737RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
738RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
739RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
740RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
741
742static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
743 CPURISCVState *env, uint32_t desc,
744 uint32_t esz, uint32_t dsz,
3479a814 745 opivx2_fn fn)
43740e3a 746{
43740e3a
LZ
747 uint32_t vm = vext_vm(desc);
748 uint32_t vl = env->vl;
749 uint32_t i;
750
751 for (i = 0; i < vl; i++) {
f9298de5 752 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
753 continue;
754 }
755 fn(vd, s1, vs2, i);
756 }
43740e3a
LZ
757}
758
759/* generate the helpers for OPIVX */
3479a814 760#define GEN_VEXT_VX(NAME, ESZ, DSZ) \
43740e3a
LZ
761void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
762 void *vs2, CPURISCVState *env, \
763 uint32_t desc) \
764{ \
765 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
3479a814
FC
766 do_##NAME); \
767}
768
769GEN_VEXT_VX(vadd_vx_b, 1, 1)
770GEN_VEXT_VX(vadd_vx_h, 2, 2)
771GEN_VEXT_VX(vadd_vx_w, 4, 4)
772GEN_VEXT_VX(vadd_vx_d, 8, 8)
773GEN_VEXT_VX(vsub_vx_b, 1, 1)
774GEN_VEXT_VX(vsub_vx_h, 2, 2)
775GEN_VEXT_VX(vsub_vx_w, 4, 4)
776GEN_VEXT_VX(vsub_vx_d, 8, 8)
777GEN_VEXT_VX(vrsub_vx_b, 1, 1)
778GEN_VEXT_VX(vrsub_vx_h, 2, 2)
779GEN_VEXT_VX(vrsub_vx_w, 4, 4)
780GEN_VEXT_VX(vrsub_vx_d, 8, 8)
43740e3a
LZ
781
782void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
783{
784 intptr_t oprsz = simd_oprsz(desc);
785 intptr_t i;
786
787 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
788 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
789 }
790}
791
792void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
793{
794 intptr_t oprsz = simd_oprsz(desc);
795 intptr_t i;
796
797 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
798 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
799 }
800}
801
802void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
803{
804 intptr_t oprsz = simd_oprsz(desc);
805 intptr_t i;
806
807 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
808 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
809 }
810}
811
812void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
813{
814 intptr_t oprsz = simd_oprsz(desc);
815 intptr_t i;
816
817 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
818 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
819 }
820}
8fcdf776
LZ
821
822/* Vector Widening Integer Add/Subtract */
823#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
824#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
825#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
826#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
827#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
828#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
829#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
830#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
831#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
832#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
833#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
834#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
835RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
836RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
837RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
838RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
839RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
840RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
841RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
842RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
843RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
844RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
845RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
846RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
847RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
848RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
849RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
850RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
851RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
852RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
853RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
854RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
855RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
856RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
857RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
858RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
3479a814
FC
859GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
860GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
861GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
862GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
863GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
864GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
865GEN_VEXT_VV(vwadd_vv_b, 1, 2)
866GEN_VEXT_VV(vwadd_vv_h, 2, 4)
867GEN_VEXT_VV(vwadd_vv_w, 4, 8)
868GEN_VEXT_VV(vwsub_vv_b, 1, 2)
869GEN_VEXT_VV(vwsub_vv_h, 2, 4)
870GEN_VEXT_VV(vwsub_vv_w, 4, 8)
871GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
872GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
873GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
874GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
875GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
876GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
877GEN_VEXT_VV(vwadd_wv_b, 1, 2)
878GEN_VEXT_VV(vwadd_wv_h, 2, 4)
879GEN_VEXT_VV(vwadd_wv_w, 4, 8)
880GEN_VEXT_VV(vwsub_wv_b, 1, 2)
881GEN_VEXT_VV(vwsub_wv_h, 2, 4)
882GEN_VEXT_VV(vwsub_wv_w, 4, 8)
8fcdf776
LZ
883
884RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
885RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
886RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
887RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
888RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
889RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
890RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
891RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
892RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
893RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
894RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
895RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
896RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
897RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
898RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
899RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
900RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
901RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
902RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
903RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
904RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
905RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
906RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
907RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
3479a814
FC
908GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
909GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
910GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
911GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
912GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
913GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
914GEN_VEXT_VX(vwadd_vx_b, 1, 2)
915GEN_VEXT_VX(vwadd_vx_h, 2, 4)
916GEN_VEXT_VX(vwadd_vx_w, 4, 8)
917GEN_VEXT_VX(vwsub_vx_b, 1, 2)
918GEN_VEXT_VX(vwsub_vx_h, 2, 4)
919GEN_VEXT_VX(vwsub_vx_w, 4, 8)
920GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
921GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
922GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
923GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
924GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
925GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
926GEN_VEXT_VX(vwadd_wx_b, 1, 2)
927GEN_VEXT_VX(vwadd_wx_h, 2, 4)
928GEN_VEXT_VX(vwadd_wx_w, 4, 8)
929GEN_VEXT_VX(vwsub_wx_b, 1, 2)
930GEN_VEXT_VX(vwsub_wx_h, 2, 4)
931GEN_VEXT_VX(vwsub_wx_w, 4, 8)
3a6f8f68
LZ
932
933/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
934#define DO_VADC(N, M, C) (N + M + C)
935#define DO_VSBC(N, M, C) (N - M - C)
936
3479a814 937#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
938void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
939 CPURISCVState *env, uint32_t desc) \
940{ \
3a6f8f68 941 uint32_t vl = env->vl; \
3a6f8f68
LZ
942 uint32_t i; \
943 \
944 for (i = 0; i < vl; i++) { \
945 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
946 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 947 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
948 \
949 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
950 } \
3a6f8f68
LZ
951}
952
3479a814
FC
953GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
954GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
955GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
956GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 957
3479a814
FC
958GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
959GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
960GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
961GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 962
3479a814 963#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
964void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
965 CPURISCVState *env, uint32_t desc) \
966{ \
3a6f8f68 967 uint32_t vl = env->vl; \
3a6f8f68
LZ
968 uint32_t i; \
969 \
970 for (i = 0; i < vl; i++) { \
971 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 972 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
973 \
974 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
975 } \
3a6f8f68
LZ
976}
977
3479a814
FC
978GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
979GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
980GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
981GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 982
3479a814
FC
983GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
984GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
985GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
986GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
987
988#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
989 (__typeof(N))(N + M) < N)
990#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
991
992#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
993void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
994 CPURISCVState *env, uint32_t desc) \
995{ \
3a6f8f68 996 uint32_t vl = env->vl; \
bb45485a 997 uint32_t vm = vext_vm(desc); \
3a6f8f68
LZ
998 uint32_t i; \
999 \
1000 for (i = 0; i < vl; i++) { \
1001 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1002 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1003 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1004 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1005 } \
3a6f8f68
LZ
1006}
1007
1008GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1009GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1010GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1011GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1012
1013GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1014GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1015GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1016GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1017
1018#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1019void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1020 void *vs2, CPURISCVState *env, uint32_t desc) \
1021{ \
3a6f8f68 1022 uint32_t vl = env->vl; \
bb45485a 1023 uint32_t vm = vext_vm(desc); \
3a6f8f68
LZ
1024 uint32_t i; \
1025 \
1026 for (i = 0; i < vl; i++) { \
1027 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1028 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1029 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1030 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1031 } \
3a6f8f68
LZ
1032}
1033
1034GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1035GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1036GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1037GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1038
1039GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1040GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1041GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1042GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1043
1044/* Vector Bitwise Logical Instructions */
1045RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1046RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1047RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1048RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1049RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1050RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1051RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1052RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1053RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1054RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1055RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1056RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
3479a814
FC
1057GEN_VEXT_VV(vand_vv_b, 1, 1)
1058GEN_VEXT_VV(vand_vv_h, 2, 2)
1059GEN_VEXT_VV(vand_vv_w, 4, 4)
1060GEN_VEXT_VV(vand_vv_d, 8, 8)
1061GEN_VEXT_VV(vor_vv_b, 1, 1)
1062GEN_VEXT_VV(vor_vv_h, 2, 2)
1063GEN_VEXT_VV(vor_vv_w, 4, 4)
1064GEN_VEXT_VV(vor_vv_d, 8, 8)
1065GEN_VEXT_VV(vxor_vv_b, 1, 1)
1066GEN_VEXT_VV(vxor_vv_h, 2, 2)
1067GEN_VEXT_VV(vxor_vv_w, 4, 4)
1068GEN_VEXT_VV(vxor_vv_d, 8, 8)
d3842924
LZ
1069
1070RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1071RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1072RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1073RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1074RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1075RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1076RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1077RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1078RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1079RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1080RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1081RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
3479a814
FC
1082GEN_VEXT_VX(vand_vx_b, 1, 1)
1083GEN_VEXT_VX(vand_vx_h, 2, 2)
1084GEN_VEXT_VX(vand_vx_w, 4, 4)
1085GEN_VEXT_VX(vand_vx_d, 8, 8)
1086GEN_VEXT_VX(vor_vx_b, 1, 1)
1087GEN_VEXT_VX(vor_vx_h, 2, 2)
1088GEN_VEXT_VX(vor_vx_w, 4, 4)
1089GEN_VEXT_VX(vor_vx_d, 8, 8)
1090GEN_VEXT_VX(vxor_vx_b, 1, 1)
1091GEN_VEXT_VX(vxor_vx_h, 2, 2)
1092GEN_VEXT_VX(vxor_vx_w, 4, 4)
1093GEN_VEXT_VX(vxor_vx_d, 8, 8)
3277d955
LZ
1094
1095/* Vector Single-Width Bit Shift Instructions */
1096#define DO_SLL(N, M) (N << (M))
1097#define DO_SRL(N, M) (N >> (M))
1098
1099/* generate the helpers for shift instructions with two vector operators */
3479a814 1100#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1101void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1102 void *vs2, CPURISCVState *env, uint32_t desc) \
1103{ \
3277d955
LZ
1104 uint32_t vm = vext_vm(desc); \
1105 uint32_t vl = env->vl; \
3277d955
LZ
1106 uint32_t i; \
1107 \
1108 for (i = 0; i < vl; i++) { \
f9298de5 1109 if (!vm && !vext_elem_mask(v0, i)) { \
3277d955
LZ
1110 continue; \
1111 } \
1112 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1113 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1114 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1115 } \
3277d955
LZ
1116}
1117
3479a814
FC
1118GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1119GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1120GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1121GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1122
3479a814
FC
1123GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1124GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1125GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1126GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1127
3479a814
FC
1128GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1129GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1130GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1131GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955
LZ
1132
1133/* generate the helpers for shift instructions with one vector and one scalar */
3479a814
FC
1134#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1135void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1136 void *vs2, CPURISCVState *env, uint32_t desc) \
1137{ \
1138 uint32_t vm = vext_vm(desc); \
1139 uint32_t vl = env->vl; \
1140 uint32_t i; \
1141 \
1142 for (i = 0; i < vl; i++) { \
1143 if (!vm && !vext_elem_mask(v0, i)) { \
1144 continue; \
1145 } \
1146 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1147 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1148 } \
1149}
1150
1151GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1152GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1153GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1154GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1155
1156GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1157GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1158GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1159GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1160
1161GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1162GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1163GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1164GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1165
1166/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1167GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1168GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1169GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1170GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1171GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1172GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1173GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1174GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1175GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1176GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1177GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1178GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1179
1180/* Vector Integer Comparison Instructions */
1181#define DO_MSEQ(N, M) (N == M)
1182#define DO_MSNE(N, M) (N != M)
1183#define DO_MSLT(N, M) (N < M)
1184#define DO_MSLE(N, M) (N <= M)
1185#define DO_MSGT(N, M) (N > M)
1186
1187#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1188void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1189 CPURISCVState *env, uint32_t desc) \
1190{ \
1366fc79
LZ
1191 uint32_t vm = vext_vm(desc); \
1192 uint32_t vl = env->vl; \
1366fc79
LZ
1193 uint32_t i; \
1194 \
1195 for (i = 0; i < vl; i++) { \
1196 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1197 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1198 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1199 continue; \
1200 } \
f9298de5 1201 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1202 } \
1366fc79
LZ
1203}
1204
1205GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1206GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1207GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1208GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1209
1210GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1211GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1212GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1213GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1214
1215GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1216GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1217GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1218GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1219
1220GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1221GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1222GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1223GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1224
1225GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1226GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1227GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1228GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1229
1230GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1231GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1232GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1233GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1234
1235#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1236void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1237 CPURISCVState *env, uint32_t desc) \
1238{ \
1366fc79
LZ
1239 uint32_t vm = vext_vm(desc); \
1240 uint32_t vl = env->vl; \
1366fc79
LZ
1241 uint32_t i; \
1242 \
1243 for (i = 0; i < vl; i++) { \
1244 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1245 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1246 continue; \
1247 } \
f9298de5 1248 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1249 DO_OP(s2, (ETYPE)(target_long)s1)); \
1250 } \
1366fc79
LZ
1251}
1252
1253GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1254GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1255GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1256GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1257
1258GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1259GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1260GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1261GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1262
1263GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1264GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1265GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1266GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1267
1268GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1269GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1270GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1271GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1272
1273GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1274GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1275GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1276GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1277
1278GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1279GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1280GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1281GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1282
1283GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1284GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1285GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1286GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1287
1288GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1289GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1290GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1291GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1292
1293/* Vector Integer Min/Max Instructions */
1294RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1295RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1296RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1297RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1298RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1299RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1300RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1301RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1302RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1303RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1304RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1305RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1306RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1307RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1308RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1309RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
3479a814
FC
1310GEN_VEXT_VV(vminu_vv_b, 1, 1)
1311GEN_VEXT_VV(vminu_vv_h, 2, 2)
1312GEN_VEXT_VV(vminu_vv_w, 4, 4)
1313GEN_VEXT_VV(vminu_vv_d, 8, 8)
1314GEN_VEXT_VV(vmin_vv_b, 1, 1)
1315GEN_VEXT_VV(vmin_vv_h, 2, 2)
1316GEN_VEXT_VV(vmin_vv_w, 4, 4)
1317GEN_VEXT_VV(vmin_vv_d, 8, 8)
1318GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1319GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1320GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1321GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1322GEN_VEXT_VV(vmax_vv_b, 1, 1)
1323GEN_VEXT_VV(vmax_vv_h, 2, 2)
1324GEN_VEXT_VV(vmax_vv_w, 4, 4)
1325GEN_VEXT_VV(vmax_vv_d, 8, 8)
558fa779
LZ
1326
1327RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1328RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1329RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1330RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1331RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1332RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1333RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1334RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1335RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1336RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1337RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1338RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1339RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1340RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1341RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1342RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
3479a814
FC
1343GEN_VEXT_VX(vminu_vx_b, 1, 1)
1344GEN_VEXT_VX(vminu_vx_h, 2, 2)
1345GEN_VEXT_VX(vminu_vx_w, 4, 4)
1346GEN_VEXT_VX(vminu_vx_d, 8, 8)
1347GEN_VEXT_VX(vmin_vx_b, 1, 1)
1348GEN_VEXT_VX(vmin_vx_h, 2, 2)
1349GEN_VEXT_VX(vmin_vx_w, 4, 4)
1350GEN_VEXT_VX(vmin_vx_d, 8, 8)
1351GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1352GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1353GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1354GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1355GEN_VEXT_VX(vmax_vx_b, 1, 1)
1356GEN_VEXT_VX(vmax_vx_h, 2, 2)
1357GEN_VEXT_VX(vmax_vx_w, 4, 4)
1358GEN_VEXT_VX(vmax_vx_d, 8, 8)
958b85f3
LZ
1359
1360/* Vector Single-Width Integer Multiply Instructions */
1361#define DO_MUL(N, M) (N * M)
1362RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1363RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1364RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1365RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
3479a814
FC
1366GEN_VEXT_VV(vmul_vv_b, 1, 1)
1367GEN_VEXT_VV(vmul_vv_h, 2, 2)
1368GEN_VEXT_VV(vmul_vv_w, 4, 4)
1369GEN_VEXT_VV(vmul_vv_d, 8, 8)
958b85f3
LZ
1370
1371static int8_t do_mulh_b(int8_t s2, int8_t s1)
1372{
1373 return (int16_t)s2 * (int16_t)s1 >> 8;
1374}
1375
1376static int16_t do_mulh_h(int16_t s2, int16_t s1)
1377{
1378 return (int32_t)s2 * (int32_t)s1 >> 16;
1379}
1380
1381static int32_t do_mulh_w(int32_t s2, int32_t s1)
1382{
1383 return (int64_t)s2 * (int64_t)s1 >> 32;
1384}
1385
1386static int64_t do_mulh_d(int64_t s2, int64_t s1)
1387{
1388 uint64_t hi_64, lo_64;
1389
1390 muls64(&lo_64, &hi_64, s1, s2);
1391 return hi_64;
1392}
1393
1394static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1395{
1396 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1397}
1398
1399static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1400{
1401 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1402}
1403
1404static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1405{
1406 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1407}
1408
1409static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1410{
1411 uint64_t hi_64, lo_64;
1412
1413 mulu64(&lo_64, &hi_64, s2, s1);
1414 return hi_64;
1415}
1416
1417static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1418{
1419 return (int16_t)s2 * (uint16_t)s1 >> 8;
1420}
1421
1422static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1423{
1424 return (int32_t)s2 * (uint32_t)s1 >> 16;
1425}
1426
1427static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1428{
1429 return (int64_t)s2 * (uint64_t)s1 >> 32;
1430}
1431
1432/*
1433 * Let A = signed operand,
1434 * B = unsigned operand
1435 * P = mulu64(A, B), unsigned product
1436 *
1437 * LET X = 2 ** 64 - A, 2's complement of A
1438 * SP = signed product
1439 * THEN
1440 * IF A < 0
1441 * SP = -X * B
1442 * = -(2 ** 64 - A) * B
1443 * = A * B - 2 ** 64 * B
1444 * = P - 2 ** 64 * B
1445 * ELSE
1446 * SP = P
1447 * THEN
1448 * HI_P -= (A < 0 ? B : 0)
1449 */
1450
1451static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1452{
1453 uint64_t hi_64, lo_64;
1454
1455 mulu64(&lo_64, &hi_64, s2, s1);
1456
1457 hi_64 -= s2 < 0 ? s1 : 0;
1458 return hi_64;
1459}
1460
1461RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1462RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1463RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1464RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1465RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1466RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1467RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1468RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1469RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1470RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1471RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1472RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
3479a814
FC
1473GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1474GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1475GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1476GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1477GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1478GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1479GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1480GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1481GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1482GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1483GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1484GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
958b85f3
LZ
1485
1486RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1487RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1488RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1489RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1490RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1491RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1492RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1493RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1494RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1495RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1496RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1497RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1498RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1499RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1500RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1501RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
3479a814
FC
1502GEN_VEXT_VX(vmul_vx_b, 1, 1)
1503GEN_VEXT_VX(vmul_vx_h, 2, 2)
1504GEN_VEXT_VX(vmul_vx_w, 4, 4)
1505GEN_VEXT_VX(vmul_vx_d, 8, 8)
1506GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1507GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1508GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1509GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1510GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1511GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1512GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1513GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1514GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1515GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1516GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1517GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
85e6658c
LZ
1518
1519/* Vector Integer Divide Instructions */
1520#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1521#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1522#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1523 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1524#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1525 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1526
1527RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1528RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1529RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1530RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1531RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1532RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1533RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1534RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1535RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1536RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1537RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1538RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1539RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1540RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1541RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1542RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
3479a814
FC
1543GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1544GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1545GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1546GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1547GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1548GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1549GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1550GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1551GEN_VEXT_VV(vremu_vv_b, 1, 1)
1552GEN_VEXT_VV(vremu_vv_h, 2, 2)
1553GEN_VEXT_VV(vremu_vv_w, 4, 4)
1554GEN_VEXT_VV(vremu_vv_d, 8, 8)
1555GEN_VEXT_VV(vrem_vv_b, 1, 1)
1556GEN_VEXT_VV(vrem_vv_h, 2, 2)
1557GEN_VEXT_VV(vrem_vv_w, 4, 4)
1558GEN_VEXT_VV(vrem_vv_d, 8, 8)
85e6658c
LZ
1559
1560RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1561RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1562RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1563RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1564RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1565RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1566RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1567RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1568RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1569RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1570RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1571RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1572RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1573RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1574RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1575RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
3479a814
FC
1576GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1577GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1578GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1579GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1580GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1581GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1582GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1583GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1584GEN_VEXT_VX(vremu_vx_b, 1, 1)
1585GEN_VEXT_VX(vremu_vx_h, 2, 2)
1586GEN_VEXT_VX(vremu_vx_w, 4, 4)
1587GEN_VEXT_VX(vremu_vx_d, 8, 8)
1588GEN_VEXT_VX(vrem_vx_b, 1, 1)
1589GEN_VEXT_VX(vrem_vx_h, 2, 2)
1590GEN_VEXT_VX(vrem_vx_w, 4, 4)
1591GEN_VEXT_VX(vrem_vx_d, 8, 8)
97b1cba3
LZ
1592
1593/* Vector Widening Integer Multiply Instructions */
1594RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1595RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1596RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1597RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1598RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1599RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1600RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1601RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1602RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
3479a814
FC
1603GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1604GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1605GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1606GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1607GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1608GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1609GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1610GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1611GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
97b1cba3
LZ
1612
1613RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1614RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1615RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1616RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1617RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1618RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1619RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1620RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1621RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
3479a814
FC
1622GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1623GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1624GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1625GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1626GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1627GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1628GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1629GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1630GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
54df813a
LZ
1631
1632/* Vector Single-Width Integer Multiply-Add Instructions */
1633#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1634static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1635{ \
1636 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1637 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1638 TD d = *((TD *)vd + HD(i)); \
1639 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1640}
1641
1642#define DO_MACC(N, M, D) (M * N + D)
1643#define DO_NMSAC(N, M, D) (-(M * N) + D)
1644#define DO_MADD(N, M, D) (M * D + N)
1645#define DO_NMSUB(N, M, D) (-(M * D) + N)
1646RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1647RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1648RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1649RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1650RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1651RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1652RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1653RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1654RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1655RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1656RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1657RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1658RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1659RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1660RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1661RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
3479a814
FC
1662GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1663GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1664GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1665GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1666GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1667GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1668GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1669GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1670GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1671GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1672GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1673GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1674GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1675GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1676GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1677GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
54df813a
LZ
1678
1679#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1680static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1681{ \
1682 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1683 TD d = *((TD *)vd + HD(i)); \
1684 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1685}
1686
1687RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1688RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1689RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1690RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1691RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1692RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1693RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1694RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1695RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1696RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1697RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1698RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1699RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1700RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1701RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1702RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
3479a814
FC
1703GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1704GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1705GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1706GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1707GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1708GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1709GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1710GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1711GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1712GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1713GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1714GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1715GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1716GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1717GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1718GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
2b587b33
LZ
1719
1720/* Vector Widening Integer Multiply-Add Instructions */
1721RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1722RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1723RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1724RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1725RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1726RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1727RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1728RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1729RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
3479a814
FC
1730GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1731GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1732GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1733GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1734GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1735GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1736GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1737GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1738GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
2b587b33
LZ
1739
1740RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1741RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1742RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1743RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1744RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1745RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1746RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1747RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1748RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1749RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1750RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1751RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
3479a814
FC
1752GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1753GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1754GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1755GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1756GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1757GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1758GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1759GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1760GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1761GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1762GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1763GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
f020a7a1
LZ
1764
1765/* Vector Integer Merge and Move Instructions */
3479a814 1766#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1767void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1768 uint32_t desc) \
1769{ \
1770 uint32_t vl = env->vl; \
f020a7a1
LZ
1771 uint32_t i; \
1772 \
1773 for (i = 0; i < vl; i++) { \
1774 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1775 *((ETYPE *)vd + H(i)) = s1; \
1776 } \
f020a7a1
LZ
1777}
1778
3479a814
FC
1779GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1780GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1781GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1782GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1783
3479a814 1784#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1785void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1786 uint32_t desc) \
1787{ \
1788 uint32_t vl = env->vl; \
f020a7a1
LZ
1789 uint32_t i; \
1790 \
1791 for (i = 0; i < vl; i++) { \
1792 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1793 } \
f020a7a1
LZ
1794}
1795
3479a814
FC
1796GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1797GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1798GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1799GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 1800
3479a814 1801#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1802void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1803 CPURISCVState *env, uint32_t desc) \
1804{ \
f020a7a1 1805 uint32_t vl = env->vl; \
f020a7a1
LZ
1806 uint32_t i; \
1807 \
1808 for (i = 0; i < vl; i++) { \
f9298de5 1809 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
1810 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1811 } \
f020a7a1
LZ
1812}
1813
3479a814
FC
1814GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1815GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1816GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1817GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 1818
3479a814 1819#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1820void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1821 void *vs2, CPURISCVState *env, uint32_t desc) \
1822{ \
f020a7a1 1823 uint32_t vl = env->vl; \
f020a7a1
LZ
1824 uint32_t i; \
1825 \
1826 for (i = 0; i < vl; i++) { \
1827 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1828 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
1829 (ETYPE)(target_long)s1); \
1830 *((ETYPE *)vd + H(i)) = d; \
1831 } \
f020a7a1
LZ
1832}
1833
3479a814
FC
1834GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1835GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1836GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1837GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
1838
1839/*
1840 *** Vector Fixed-Point Arithmetic Instructions
1841 */
1842
1843/* Vector Single-Width Saturating Add and Subtract */
1844
1845/*
1846 * As fixed point instructions probably have round mode and saturation,
1847 * define common macros for fixed point here.
1848 */
1849typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1850 CPURISCVState *env, int vxrm);
1851
1852#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1853static inline void \
1854do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1855 CPURISCVState *env, int vxrm) \
1856{ \
1857 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1858 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1859 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1860}
1861
1862static inline void
1863vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1864 CPURISCVState *env,
f9298de5 1865 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
1866 opivv2_rm_fn *fn)
1867{
1868 for (uint32_t i = 0; i < vl; i++) {
f9298de5 1869 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
1870 continue;
1871 }
1872 fn(vd, vs1, vs2, i, env, vxrm);
1873 }
1874}
1875
1876static inline void
1877vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1878 CPURISCVState *env,
1879 uint32_t desc, uint32_t esz, uint32_t dsz,
3479a814 1880 opivv2_rm_fn *fn)
eb2650e3 1881{
eb2650e3
LZ
1882 uint32_t vm = vext_vm(desc);
1883 uint32_t vl = env->vl;
1884
1885 switch (env->vxrm) {
1886 case 0: /* rnu */
1887 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1888 env, vl, vm, 0, fn);
eb2650e3
LZ
1889 break;
1890 case 1: /* rne */
1891 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1892 env, vl, vm, 1, fn);
eb2650e3
LZ
1893 break;
1894 case 2: /* rdn */
1895 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1896 env, vl, vm, 2, fn);
eb2650e3
LZ
1897 break;
1898 default: /* rod */
1899 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 1900 env, vl, vm, 3, fn);
eb2650e3
LZ
1901 break;
1902 }
eb2650e3
LZ
1903}
1904
1905/* generate helpers for fixed point instructions with OPIVV format */
3479a814 1906#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \
eb2650e3
LZ
1907void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1908 CPURISCVState *env, uint32_t desc) \
1909{ \
1910 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
3479a814 1911 do_##NAME); \
eb2650e3
LZ
1912}
1913
1914static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1915{
1916 uint8_t res = a + b;
1917 if (res < a) {
1918 res = UINT8_MAX;
1919 env->vxsat = 0x1;
1920 }
1921 return res;
1922}
1923
1924static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1925 uint16_t b)
1926{
1927 uint16_t res = a + b;
1928 if (res < a) {
1929 res = UINT16_MAX;
1930 env->vxsat = 0x1;
1931 }
1932 return res;
1933}
1934
1935static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1936 uint32_t b)
1937{
1938 uint32_t res = a + b;
1939 if (res < a) {
1940 res = UINT32_MAX;
1941 env->vxsat = 0x1;
1942 }
1943 return res;
1944}
1945
1946static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1947 uint64_t b)
1948{
1949 uint64_t res = a + b;
1950 if (res < a) {
1951 res = UINT64_MAX;
1952 env->vxsat = 0x1;
1953 }
1954 return res;
1955}
1956
1957RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
1958RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
1959RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
1960RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
3479a814
FC
1961GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
1962GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
1963GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
1964GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
eb2650e3
LZ
1965
1966typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
1967 CPURISCVState *env, int vxrm);
1968
1969#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1970static inline void \
1971do_##NAME(void *vd, target_long s1, void *vs2, int i, \
1972 CPURISCVState *env, int vxrm) \
1973{ \
1974 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1975 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
1976}
1977
1978static inline void
1979vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
1980 CPURISCVState *env,
f9298de5 1981 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
1982 opivx2_rm_fn *fn)
1983{
1984 for (uint32_t i = 0; i < vl; i++) {
f9298de5 1985 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
1986 continue;
1987 }
1988 fn(vd, s1, vs2, i, env, vxrm);
1989 }
1990}
1991
1992static inline void
1993vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
1994 CPURISCVState *env,
1995 uint32_t desc, uint32_t esz, uint32_t dsz,
3479a814 1996 opivx2_rm_fn *fn)
eb2650e3 1997{
eb2650e3
LZ
1998 uint32_t vm = vext_vm(desc);
1999 uint32_t vl = env->vl;
2000
2001 switch (env->vxrm) {
2002 case 0: /* rnu */
2003 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2004 env, vl, vm, 0, fn);
eb2650e3
LZ
2005 break;
2006 case 1: /* rne */
2007 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2008 env, vl, vm, 1, fn);
eb2650e3
LZ
2009 break;
2010 case 2: /* rdn */
2011 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2012 env, vl, vm, 2, fn);
eb2650e3
LZ
2013 break;
2014 default: /* rod */
2015 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2016 env, vl, vm, 3, fn);
eb2650e3
LZ
2017 break;
2018 }
eb2650e3
LZ
2019}
2020
2021/* generate helpers for fixed point instructions with OPIVX format */
3479a814 2022#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \
eb2650e3
LZ
2023void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2024 void *vs2, CPURISCVState *env, uint32_t desc) \
2025{ \
2026 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
3479a814 2027 do_##NAME); \
eb2650e3
LZ
2028}
2029
2030RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2031RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2032RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2033RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
3479a814
FC
2034GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2035GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2036GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2037GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
eb2650e3
LZ
2038
2039static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2040{
2041 int8_t res = a + b;
2042 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2043 res = a > 0 ? INT8_MAX : INT8_MIN;
2044 env->vxsat = 0x1;
2045 }
2046 return res;
2047}
2048
2049static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2050{
2051 int16_t res = a + b;
2052 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2053 res = a > 0 ? INT16_MAX : INT16_MIN;
2054 env->vxsat = 0x1;
2055 }
2056 return res;
2057}
2058
2059static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2060{
2061 int32_t res = a + b;
2062 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2063 res = a > 0 ? INT32_MAX : INT32_MIN;
2064 env->vxsat = 0x1;
2065 }
2066 return res;
2067}
2068
2069static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2070{
2071 int64_t res = a + b;
2072 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2073 res = a > 0 ? INT64_MAX : INT64_MIN;
2074 env->vxsat = 0x1;
2075 }
2076 return res;
2077}
2078
2079RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2080RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2081RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2082RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
3479a814
FC
2083GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2084GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2085GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2086GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
eb2650e3
LZ
2087
2088RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2089RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2090RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2091RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
3479a814
FC
2092GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2093GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2094GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2095GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
eb2650e3
LZ
2096
2097static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2098{
2099 uint8_t res = a - b;
2100 if (res > a) {
2101 res = 0;
2102 env->vxsat = 0x1;
2103 }
2104 return res;
2105}
2106
2107static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2108 uint16_t b)
2109{
2110 uint16_t res = a - b;
2111 if (res > a) {
2112 res = 0;
2113 env->vxsat = 0x1;
2114 }
2115 return res;
2116}
2117
2118static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2119 uint32_t b)
2120{
2121 uint32_t res = a - b;
2122 if (res > a) {
2123 res = 0;
2124 env->vxsat = 0x1;
2125 }
2126 return res;
2127}
2128
2129static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2130 uint64_t b)
2131{
2132 uint64_t res = a - b;
2133 if (res > a) {
2134 res = 0;
2135 env->vxsat = 0x1;
2136 }
2137 return res;
2138}
2139
2140RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2141RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2142RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2143RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
3479a814
FC
2144GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2145GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2146GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2147GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
eb2650e3
LZ
2148
2149RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2150RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2151RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2152RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
3479a814
FC
2153GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2154GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2155GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2156GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
eb2650e3
LZ
2157
2158static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2159{
2160 int8_t res = a - b;
2161 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2162 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2163 env->vxsat = 0x1;
2164 }
2165 return res;
2166}
2167
2168static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2169{
2170 int16_t res = a - b;
2171 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2172 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2173 env->vxsat = 0x1;
2174 }
2175 return res;
2176}
2177
2178static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2179{
2180 int32_t res = a - b;
2181 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2182 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2183 env->vxsat = 0x1;
2184 }
2185 return res;
2186}
2187
2188static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2189{
2190 int64_t res = a - b;
2191 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2192 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2193 env->vxsat = 0x1;
2194 }
2195 return res;
2196}
2197
2198RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2199RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2200RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2201RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
3479a814
FC
2202GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2203GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2204GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2205GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
eb2650e3
LZ
2206
2207RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2208RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2209RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2210RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
3479a814
FC
2211GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2212GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2213GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2214GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
b7aee481
LZ
2215
2216/* Vector Single-Width Averaging Add and Subtract */
2217static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2218{
2219 uint8_t d = extract64(v, shift, 1);
2220 uint8_t d1;
2221 uint64_t D1, D2;
2222
2223 if (shift == 0 || shift > 64) {
2224 return 0;
2225 }
2226
2227 d1 = extract64(v, shift - 1, 1);
2228 D1 = extract64(v, 0, shift);
2229 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2230 return d1;
2231 } else if (vxrm == 1) { /* round-to-nearest-even */
2232 if (shift > 1) {
2233 D2 = extract64(v, 0, shift - 1);
2234 return d1 & ((D2 != 0) | d);
2235 } else {
2236 return d1 & d;
2237 }
2238 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2239 return !d & (D1 != 0);
2240 }
2241 return 0; /* round-down (truncate) */
2242}
2243
2244static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2245{
2246 int64_t res = (int64_t)a + b;
2247 uint8_t round = get_round(vxrm, res, 1);
2248
2249 return (res >> 1) + round;
2250}
2251
2252static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2253{
2254 int64_t res = a + b;
2255 uint8_t round = get_round(vxrm, res, 1);
2256 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2257
2258 /* With signed overflow, bit 64 is inverse of bit 63. */
2259 return ((res >> 1) ^ over) + round;
2260}
2261
2262RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2263RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2264RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2265RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
3479a814
FC
2266GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2267GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2268GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2269GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
b7aee481
LZ
2270
2271RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2272RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2273RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2274RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
3479a814
FC
2275GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2276GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2277GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2278GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
b7aee481 2279
8b99a110
FC
2280static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2281 uint32_t a, uint32_t b)
2282{
2283 uint64_t res = (uint64_t)a + b;
2284 uint8_t round = get_round(vxrm, res, 1);
2285
2286 return (res >> 1) + round;
2287}
2288
2289static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2290 uint64_t a, uint64_t b)
2291{
2292 uint64_t res = a + b;
2293 uint8_t round = get_round(vxrm, res, 1);
2294 uint64_t over = (uint64_t)(res < a) << 63;
2295
2296 return ((res >> 1) | over) + round;
2297}
2298
2299RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2300RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2301RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2302RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2303GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2304GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2305GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2306GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2307
2308RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2309RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2310RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2311RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2312GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2313GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2314GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2315GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2316
b7aee481
LZ
2317static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2318{
2319 int64_t res = (int64_t)a - b;
2320 uint8_t round = get_round(vxrm, res, 1);
2321
2322 return (res >> 1) + round;
2323}
2324
2325static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2326{
2327 int64_t res = (int64_t)a - b;
2328 uint8_t round = get_round(vxrm, res, 1);
2329 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2330
2331 /* With signed overflow, bit 64 is inverse of bit 63. */
2332 return ((res >> 1) ^ over) + round;
2333}
2334
2335RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2336RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2337RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2338RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
3479a814
FC
2339GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2340GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2341GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2342GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
b7aee481
LZ
2343
2344RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2345RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2346RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2347RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
3479a814
FC
2348GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2349GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2350GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2351GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
9f0ff9e5 2352
8b99a110
FC
2353static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2354 uint32_t a, uint32_t b)
2355{
2356 int64_t res = (int64_t)a - b;
2357 uint8_t round = get_round(vxrm, res, 1);
2358
2359 return (res >> 1) + round;
2360}
2361
2362static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2363 uint64_t a, uint64_t b)
2364{
2365 uint64_t res = (uint64_t)a - b;
2366 uint8_t round = get_round(vxrm, res, 1);
2367 uint64_t over = (uint64_t)(res > a) << 63;
2368
2369 return ((res >> 1) | over) + round;
2370}
2371
2372RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2373RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2374RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2375RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2376GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2377GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2378GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2379GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2380
2381RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2382RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2383RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2384RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2385GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2386GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2387GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2388GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2389
9f0ff9e5
LZ
2390/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2391static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2392{
2393 uint8_t round;
2394 int16_t res;
2395
2396 res = (int16_t)a * (int16_t)b;
2397 round = get_round(vxrm, res, 7);
2398 res = (res >> 7) + round;
2399
2400 if (res > INT8_MAX) {
2401 env->vxsat = 0x1;
2402 return INT8_MAX;
2403 } else if (res < INT8_MIN) {
2404 env->vxsat = 0x1;
2405 return INT8_MIN;
2406 } else {
2407 return res;
2408 }
2409}
2410
2411static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2412{
2413 uint8_t round;
2414 int32_t res;
2415
2416 res = (int32_t)a * (int32_t)b;
2417 round = get_round(vxrm, res, 15);
2418 res = (res >> 15) + round;
2419
2420 if (res > INT16_MAX) {
2421 env->vxsat = 0x1;
2422 return INT16_MAX;
2423 } else if (res < INT16_MIN) {
2424 env->vxsat = 0x1;
2425 return INT16_MIN;
2426 } else {
2427 return res;
2428 }
2429}
2430
2431static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2432{
2433 uint8_t round;
2434 int64_t res;
2435
2436 res = (int64_t)a * (int64_t)b;
2437 round = get_round(vxrm, res, 31);
2438 res = (res >> 31) + round;
2439
2440 if (res > INT32_MAX) {
2441 env->vxsat = 0x1;
2442 return INT32_MAX;
2443 } else if (res < INT32_MIN) {
2444 env->vxsat = 0x1;
2445 return INT32_MIN;
2446 } else {
2447 return res;
2448 }
2449}
2450
2451static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2452{
2453 uint8_t round;
2454 uint64_t hi_64, lo_64;
2455 int64_t res;
2456
2457 if (a == INT64_MIN && b == INT64_MIN) {
2458 env->vxsat = 1;
2459 return INT64_MAX;
2460 }
2461
2462 muls64(&lo_64, &hi_64, a, b);
2463 round = get_round(vxrm, lo_64, 63);
2464 /*
2465 * Cannot overflow, as there are always
2466 * 2 sign bits after multiply.
2467 */
2468 res = (hi_64 << 1) | (lo_64 >> 63);
2469 if (round) {
2470 if (res == INT64_MAX) {
2471 env->vxsat = 1;
2472 } else {
2473 res += 1;
2474 }
2475 }
2476 return res;
2477}
2478
2479RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2480RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2481RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2482RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
3479a814
FC
2483GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2484GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2485GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2486GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
9f0ff9e5
LZ
2487
2488RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2489RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2490RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2491RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
3479a814
FC
2492GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2493GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2494GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2495GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
0a1eaf00 2496
04a61406
LZ
2497/* Vector Single-Width Scaling Shift Instructions */
2498static inline uint8_t
2499vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2500{
2501 uint8_t round, shift = b & 0x7;
2502 uint8_t res;
2503
2504 round = get_round(vxrm, a, shift);
2505 res = (a >> shift) + round;
2506 return res;
2507}
2508static inline uint16_t
2509vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2510{
2511 uint8_t round, shift = b & 0xf;
2512 uint16_t res;
2513
2514 round = get_round(vxrm, a, shift);
2515 res = (a >> shift) + round;
2516 return res;
2517}
2518static inline uint32_t
2519vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2520{
2521 uint8_t round, shift = b & 0x1f;
2522 uint32_t res;
2523
2524 round = get_round(vxrm, a, shift);
2525 res = (a >> shift) + round;
2526 return res;
2527}
2528static inline uint64_t
2529vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2530{
2531 uint8_t round, shift = b & 0x3f;
2532 uint64_t res;
2533
2534 round = get_round(vxrm, a, shift);
2535 res = (a >> shift) + round;
2536 return res;
2537}
2538RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2539RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2540RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2541RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
3479a814
FC
2542GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2543GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2544GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2545GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
04a61406
LZ
2546
2547RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2548RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2549RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2550RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
3479a814
FC
2551GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2552GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2553GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2554GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
04a61406
LZ
2555
2556static inline int8_t
2557vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2558{
2559 uint8_t round, shift = b & 0x7;
2560 int8_t res;
2561
2562 round = get_round(vxrm, a, shift);
2563 res = (a >> shift) + round;
2564 return res;
2565}
2566static inline int16_t
2567vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2568{
2569 uint8_t round, shift = b & 0xf;
2570 int16_t res;
2571
2572 round = get_round(vxrm, a, shift);
2573 res = (a >> shift) + round;
2574 return res;
2575}
2576static inline int32_t
2577vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2578{
2579 uint8_t round, shift = b & 0x1f;
2580 int32_t res;
2581
2582 round = get_round(vxrm, a, shift);
2583 res = (a >> shift) + round;
2584 return res;
2585}
2586static inline int64_t
2587vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2588{
2589 uint8_t round, shift = b & 0x3f;
2590 int64_t res;
2591
2592 round = get_round(vxrm, a, shift);
2593 res = (a >> shift) + round;
2594 return res;
2595}
9ff3d287 2596
04a61406
LZ
2597RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2598RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2599RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2600RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
3479a814
FC
2601GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2602GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2603GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2604GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
04a61406
LZ
2605
2606RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2607RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2608RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2609RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
3479a814
FC
2610GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2611GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2612GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2613GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
9ff3d287
LZ
2614
2615/* Vector Narrowing Fixed-Point Clip Instructions */
2616static inline int8_t
2617vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2618{
2619 uint8_t round, shift = b & 0xf;
2620 int16_t res;
2621
2622 round = get_round(vxrm, a, shift);
2623 res = (a >> shift) + round;
2624 if (res > INT8_MAX) {
2625 env->vxsat = 0x1;
2626 return INT8_MAX;
2627 } else if (res < INT8_MIN) {
2628 env->vxsat = 0x1;
2629 return INT8_MIN;
2630 } else {
2631 return res;
2632 }
2633}
2634
2635static inline int16_t
2636vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2637{
2638 uint8_t round, shift = b & 0x1f;
2639 int32_t res;
2640
2641 round = get_round(vxrm, a, shift);
2642 res = (a >> shift) + round;
2643 if (res > INT16_MAX) {
2644 env->vxsat = 0x1;
2645 return INT16_MAX;
2646 } else if (res < INT16_MIN) {
2647 env->vxsat = 0x1;
2648 return INT16_MIN;
2649 } else {
2650 return res;
2651 }
2652}
2653
2654static inline int32_t
2655vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2656{
2657 uint8_t round, shift = b & 0x3f;
2658 int64_t res;
2659
2660 round = get_round(vxrm, a, shift);
2661 res = (a >> shift) + round;
2662 if (res > INT32_MAX) {
2663 env->vxsat = 0x1;
2664 return INT32_MAX;
2665 } else if (res < INT32_MIN) {
2666 env->vxsat = 0x1;
2667 return INT32_MIN;
2668 } else {
2669 return res;
2670 }
2671}
2672
a70b3a73
FC
2673RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2674RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2675RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2676GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2677GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2678GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2679
2680RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2681RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2682RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2683GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2684GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2685GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
9ff3d287
LZ
2686
2687static inline uint8_t
2688vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2689{
2690 uint8_t round, shift = b & 0xf;
2691 uint16_t res;
2692
2693 round = get_round(vxrm, a, shift);
2694 res = (a >> shift) + round;
2695 if (res > UINT8_MAX) {
2696 env->vxsat = 0x1;
2697 return UINT8_MAX;
2698 } else {
2699 return res;
2700 }
2701}
2702
2703static inline uint16_t
2704vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2705{
2706 uint8_t round, shift = b & 0x1f;
2707 uint32_t res;
2708
2709 round = get_round(vxrm, a, shift);
2710 res = (a >> shift) + round;
2711 if (res > UINT16_MAX) {
2712 env->vxsat = 0x1;
2713 return UINT16_MAX;
2714 } else {
2715 return res;
2716 }
2717}
2718
2719static inline uint32_t
2720vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2721{
2722 uint8_t round, shift = b & 0x3f;
a70b3a73 2723 uint64_t res;
9ff3d287
LZ
2724
2725 round = get_round(vxrm, a, shift);
2726 res = (a >> shift) + round;
2727 if (res > UINT32_MAX) {
2728 env->vxsat = 0x1;
2729 return UINT32_MAX;
2730 } else {
2731 return res;
2732 }
2733}
2734
a70b3a73
FC
2735RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2736RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2737RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2738GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2739GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2740GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
9ff3d287 2741
a70b3a73
FC
2742RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2743RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2744RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2745GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2746GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2747GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
ce2a0343
LZ
2748
2749/*
2750 *** Vector Float Point Arithmetic Instructions
2751 */
2752/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2753#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2754static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2755 CPURISCVState *env) \
2756{ \
2757 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2758 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2759 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2760}
2761
3479a814 2762#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \
ce2a0343
LZ
2763void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2764 void *vs2, CPURISCVState *env, \
2765 uint32_t desc) \
2766{ \
ce2a0343
LZ
2767 uint32_t vm = vext_vm(desc); \
2768 uint32_t vl = env->vl; \
2769 uint32_t i; \
2770 \
2771 for (i = 0; i < vl; i++) { \
f9298de5 2772 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
2773 continue; \
2774 } \
2775 do_##NAME(vd, vs1, vs2, i, env); \
2776 } \
ce2a0343
LZ
2777}
2778
2779RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2780RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2781RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3479a814
FC
2782GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2783GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2784GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
ce2a0343
LZ
2785
2786#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2787static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2788 CPURISCVState *env) \
2789{ \
2790 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2791 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2792}
2793
3479a814 2794#define GEN_VEXT_VF(NAME, ESZ, DSZ) \
ce2a0343
LZ
2795void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2796 void *vs2, CPURISCVState *env, \
2797 uint32_t desc) \
2798{ \
ce2a0343
LZ
2799 uint32_t vm = vext_vm(desc); \
2800 uint32_t vl = env->vl; \
2801 uint32_t i; \
2802 \
2803 for (i = 0; i < vl; i++) { \
f9298de5 2804 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
2805 continue; \
2806 } \
2807 do_##NAME(vd, s1, vs2, i, env); \
2808 } \
ce2a0343
LZ
2809}
2810
2811RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2812RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2813RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3479a814
FC
2814GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2815GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2816GEN_VEXT_VF(vfadd_vf_d, 8, 8)
ce2a0343
LZ
2817
2818RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2819RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2820RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3479a814
FC
2821GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2822GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2823GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
ce2a0343
LZ
2824RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2825RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2826RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3479a814
FC
2827GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2828GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2829GEN_VEXT_VF(vfsub_vf_d, 8, 8)
ce2a0343
LZ
2830
2831static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2832{
2833 return float16_sub(b, a, s);
2834}
2835
2836static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2837{
2838 return float32_sub(b, a, s);
2839}
2840
2841static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2842{
2843 return float64_sub(b, a, s);
2844}
2845
2846RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2847RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2848RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3479a814
FC
2849GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2850GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2851GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
eeffab2e
LZ
2852
2853/* Vector Widening Floating-Point Add/Subtract Instructions */
2854static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2855{
2856 return float32_add(float16_to_float32(a, true, s),
2857 float16_to_float32(b, true, s), s);
2858}
2859
2860static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2861{
2862 return float64_add(float32_to_float64(a, s),
2863 float32_to_float64(b, s), s);
2864
2865}
2866
2867RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2868RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3479a814
FC
2869GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2870GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
eeffab2e
LZ
2871RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2872RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3479a814
FC
2873GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2874GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
eeffab2e
LZ
2875
2876static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2877{
2878 return float32_sub(float16_to_float32(a, true, s),
2879 float16_to_float32(b, true, s), s);
2880}
2881
2882static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2883{
2884 return float64_sub(float32_to_float64(a, s),
2885 float32_to_float64(b, s), s);
2886
2887}
2888
2889RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2890RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3479a814
FC
2891GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2892GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
eeffab2e
LZ
2893RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2894RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3479a814
FC
2895GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2896GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
eeffab2e
LZ
2897
2898static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2899{
2900 return float32_add(a, float16_to_float32(b, true, s), s);
2901}
2902
2903static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2904{
2905 return float64_add(a, float32_to_float64(b, s), s);
2906}
2907
2908RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2909RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3479a814
FC
2910GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2911GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
eeffab2e
LZ
2912RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2913RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3479a814
FC
2914GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2915GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
eeffab2e
LZ
2916
2917static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2918{
2919 return float32_sub(a, float16_to_float32(b, true, s), s);
2920}
2921
2922static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2923{
2924 return float64_sub(a, float32_to_float64(b, s), s);
2925}
2926
2927RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2928RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3479a814
FC
2929GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2930GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
eeffab2e
LZ
2931RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2932RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3479a814
FC
2933GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2934GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
0e0057cb
LZ
2935
2936/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2937RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2938RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2939RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3479a814
FC
2940GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2941GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2942GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
0e0057cb
LZ
2943RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2944RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2945RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3479a814
FC
2946GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2947GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2948GEN_VEXT_VF(vfmul_vf_d, 8, 8)
0e0057cb
LZ
2949
2950RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2951RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
2952RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3479a814
FC
2953GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
2954GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
2955GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
0e0057cb
LZ
2956RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
2957RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
2958RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3479a814
FC
2959GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
2960GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
2961GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
0e0057cb
LZ
2962
2963static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
2964{
2965 return float16_div(b, a, s);
2966}
2967
2968static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
2969{
2970 return float32_div(b, a, s);
2971}
2972
2973static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
2974{
2975 return float64_div(b, a, s);
2976}
2977
2978RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
2979RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
2980RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3479a814
FC
2981GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
2982GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
2983GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
f7c7b7cd
LZ
2984
2985/* Vector Widening Floating-Point Multiply */
2986static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
2987{
2988 return float32_mul(float16_to_float32(a, true, s),
2989 float16_to_float32(b, true, s), s);
2990}
2991
2992static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
2993{
2994 return float64_mul(float32_to_float64(a, s),
2995 float32_to_float64(b, s), s);
2996
2997}
2998RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
2999RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3479a814
FC
3000GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3001GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
f7c7b7cd
LZ
3002RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3003RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3479a814
FC
3004GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3005GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
4aa5a8fe
LZ
3006
3007/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3008#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3009static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3010 CPURISCVState *env) \
3011{ \
3012 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3013 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3014 TD d = *((TD *)vd + HD(i)); \
3015 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3016}
3017
3018static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3019{
3020 return float16_muladd(a, b, d, 0, s);
3021}
3022
3023static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3024{
3025 return float32_muladd(a, b, d, 0, s);
3026}
3027
3028static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3029{
3030 return float64_muladd(a, b, d, 0, s);
3031}
3032
3033RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3034RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3035RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3479a814
FC
3036GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3037GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3038GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
4aa5a8fe
LZ
3039
3040#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3041static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3042 CPURISCVState *env) \
3043{ \
3044 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3045 TD d = *((TD *)vd + HD(i)); \
3046 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3047}
3048
3049RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3050RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3051RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3479a814
FC
3052GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3053GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3054GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
4aa5a8fe
LZ
3055
3056static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3057{
3058 return float16_muladd(a, b, d,
3059 float_muladd_negate_c | float_muladd_negate_product, s);
3060}
3061
3062static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3063{
3064 return float32_muladd(a, b, d,
3065 float_muladd_negate_c | float_muladd_negate_product, s);
3066}
3067
3068static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3069{
3070 return float64_muladd(a, b, d,
3071 float_muladd_negate_c | float_muladd_negate_product, s);
3072}
3073
3074RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3075RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3076RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3479a814
FC
3077GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3078GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3079GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
4aa5a8fe
LZ
3080RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3081RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3082RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3479a814
FC
3083GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3084GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3085GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
4aa5a8fe
LZ
3086
3087static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3088{
3089 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3090}
3091
3092static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3093{
3094 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3095}
3096
3097static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3098{
3099 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3100}
3101
3102RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3103RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3104RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3479a814
FC
3105GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3106GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3107GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
4aa5a8fe
LZ
3108RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3109RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3110RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3479a814
FC
3111GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3112GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3113GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
4aa5a8fe
LZ
3114
3115static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3116{
3117 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3118}
3119
3120static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3121{
3122 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3123}
3124
3125static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3126{
3127 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3128}
3129
3130RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3131RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3132RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3479a814
FC
3133GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3134GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3135GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
4aa5a8fe
LZ
3136RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3137RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3138RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3479a814
FC
3139GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3140GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3141GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
4aa5a8fe
LZ
3142
3143static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3144{
3145 return float16_muladd(d, b, a, 0, s);
3146}
3147
3148static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3149{
3150 return float32_muladd(d, b, a, 0, s);
3151}
3152
3153static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3154{
3155 return float64_muladd(d, b, a, 0, s);
3156}
3157
3158RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3159RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3160RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3479a814
FC
3161GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3162GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3163GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
4aa5a8fe
LZ
3164RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3165RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3166RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3479a814
FC
3167GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3168GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3169GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
4aa5a8fe
LZ
3170
3171static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3172{
3173 return float16_muladd(d, b, a,
3174 float_muladd_negate_c | float_muladd_negate_product, s);
3175}
3176
3177static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3178{
3179 return float32_muladd(d, b, a,
3180 float_muladd_negate_c | float_muladd_negate_product, s);
3181}
3182
3183static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3184{
3185 return float64_muladd(d, b, a,
3186 float_muladd_negate_c | float_muladd_negate_product, s);
3187}
3188
3189RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3190RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3191RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3479a814
FC
3192GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3193GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3194GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
4aa5a8fe
LZ
3195RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3196RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3197RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3479a814
FC
3198GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3199GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3200GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
4aa5a8fe
LZ
3201
3202static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3203{
3204 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3205}
3206
3207static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3208{
3209 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3210}
3211
3212static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3213{
3214 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3215}
3216
3217RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3218RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3219RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3479a814
FC
3220GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3221GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3222GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
4aa5a8fe
LZ
3223RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3224RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3225RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3479a814
FC
3226GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3227GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3228GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
4aa5a8fe
LZ
3229
3230static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3231{
3232 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3233}
3234
3235static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3236{
3237 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3238}
3239
3240static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3241{
3242 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3243}
3244
3245RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3246RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3247RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3479a814
FC
3248GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3249GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3250GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
4aa5a8fe
LZ
3251RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3252RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3253RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3479a814
FC
3254GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3255GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3256GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
0dd50959
LZ
3257
3258/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3259static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3260{
3261 return float32_muladd(float16_to_float32(a, true, s),
3262 float16_to_float32(b, true, s), d, 0, s);
3263}
3264
3265static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3266{
3267 return float64_muladd(float32_to_float64(a, s),
3268 float32_to_float64(b, s), d, 0, s);
3269}
3270
3271RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3272RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3479a814
FC
3273GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3274GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
0dd50959
LZ
3275RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3276RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3479a814
FC
3277GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3278GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
0dd50959
LZ
3279
3280static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3281{
3282 return float32_muladd(float16_to_float32(a, true, s),
3283 float16_to_float32(b, true, s), d,
3284 float_muladd_negate_c | float_muladd_negate_product, s);
3285}
3286
3287static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3288{
3289 return float64_muladd(float32_to_float64(a, s),
3290 float32_to_float64(b, s), d,
3291 float_muladd_negate_c | float_muladd_negate_product, s);
3292}
3293
3294RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3295RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3479a814
FC
3296GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3297GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
0dd50959
LZ
3298RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3299RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3479a814
FC
3300GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3301GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
0dd50959
LZ
3302
3303static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3304{
3305 return float32_muladd(float16_to_float32(a, true, s),
3306 float16_to_float32(b, true, s), d,
3307 float_muladd_negate_c, s);
3308}
3309
3310static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3311{
3312 return float64_muladd(float32_to_float64(a, s),
3313 float32_to_float64(b, s), d,
3314 float_muladd_negate_c, s);
3315}
3316
3317RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3318RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3479a814
FC
3319GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3320GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
0dd50959
LZ
3321RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3322RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3479a814
FC
3323GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3324GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
0dd50959
LZ
3325
3326static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3327{
3328 return float32_muladd(float16_to_float32(a, true, s),
3329 float16_to_float32(b, true, s), d,
3330 float_muladd_negate_product, s);
3331}
3332
3333static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3334{
3335 return float64_muladd(float32_to_float64(a, s),
3336 float32_to_float64(b, s), d,
3337 float_muladd_negate_product, s);
3338}
3339
3340RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3341RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3479a814
FC
3342GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3343GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
0dd50959
LZ
3344RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3345RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3479a814
FC
3346GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3347GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
d9e4ce72
LZ
3348
3349/* Vector Floating-Point Square-Root Instruction */
3350/* (TD, T2, TX2) */
3351#define OP_UU_H uint16_t, uint16_t, uint16_t
3352#define OP_UU_W uint32_t, uint32_t, uint32_t
3353#define OP_UU_D uint64_t, uint64_t, uint64_t
3354
3355#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3356static void do_##NAME(void *vd, void *vs2, int i, \
3357 CPURISCVState *env) \
3358{ \
3359 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3360 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3361}
3362
3479a814 3363#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \
d9e4ce72
LZ
3364void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3365 CPURISCVState *env, uint32_t desc) \
3366{ \
d9e4ce72
LZ
3367 uint32_t vm = vext_vm(desc); \
3368 uint32_t vl = env->vl; \
3369 uint32_t i; \
3370 \
3371 if (vl == 0) { \
3372 return; \
3373 } \
3374 for (i = 0; i < vl; i++) { \
f9298de5 3375 if (!vm && !vext_elem_mask(v0, i)) { \
d9e4ce72
LZ
3376 continue; \
3377 } \
3378 do_##NAME(vd, vs2, i, env); \
3379 } \
d9e4ce72
LZ
3380}
3381
3382RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3383RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3384RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3479a814
FC
3385GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3386GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3387GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
230b53dd
LZ
3388
3389/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
3390RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3391RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3392RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3479a814
FC
3393GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3394GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3395GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
49c5611a
FC
3396RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3397RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3398RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3479a814
FC
3399GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3400GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3401GEN_VEXT_VF(vfmin_vf_d, 8, 8)
230b53dd 3402
49c5611a
FC
3403RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3404RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3405RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3479a814
FC
3406GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3407GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3408GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
49c5611a
FC
3409RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3410RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3411RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3479a814
FC
3412GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3413GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3414GEN_VEXT_VF(vfmax_vf_d, 8, 8)
1d426b81
LZ
3415
3416/* Vector Floating-Point Sign-Injection Instructions */
3417static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3418{
3419 return deposit64(b, 0, 15, a);
3420}
3421
3422static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3423{
3424 return deposit64(b, 0, 31, a);
3425}
3426
3427static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3428{
3429 return deposit64(b, 0, 63, a);
3430}
3431
3432RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3433RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3434RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3479a814
FC
3435GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3436GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3437GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
1d426b81
LZ
3438RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3439RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3440RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3479a814
FC
3441GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3442GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3443GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
1d426b81
LZ
3444
3445static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3446{
3447 return deposit64(~b, 0, 15, a);
3448}
3449
3450static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3451{
3452 return deposit64(~b, 0, 31, a);
3453}
3454
3455static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3456{
3457 return deposit64(~b, 0, 63, a);
3458}
3459
3460RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3461RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3462RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3479a814
FC
3463GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3464GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3465GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
1d426b81
LZ
3466RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3467RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3468RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3479a814
FC
3469GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3470GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3471GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
1d426b81
LZ
3472
3473static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3474{
3475 return deposit64(b ^ a, 0, 15, a);
3476}
3477
3478static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3479{
3480 return deposit64(b ^ a, 0, 31, a);
3481}
3482
3483static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3484{
3485 return deposit64(b ^ a, 0, 63, a);
3486}
3487
3488RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3489RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3490RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3479a814
FC
3491GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3492GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3493GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
1d426b81
LZ
3494RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3495RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3496RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3479a814
FC
3497GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3498GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3499GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
2a68e9e5
LZ
3500
3501/* Vector Floating-Point Compare Instructions */
3502#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
3503void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
3504 CPURISCVState *env, uint32_t desc) \
3505{ \
2a68e9e5
LZ
3506 uint32_t vm = vext_vm(desc); \
3507 uint32_t vl = env->vl; \
2a68e9e5
LZ
3508 uint32_t i; \
3509 \
3510 for (i = 0; i < vl; i++) { \
3511 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
3512 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 3513 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
3514 continue; \
3515 } \
f9298de5 3516 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
3517 DO_OP(s2, s1, &env->fp_status)); \
3518 } \
2a68e9e5
LZ
3519}
3520
2a68e9e5
LZ
3521GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3522GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3523GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3524
3525#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
3526void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3527 CPURISCVState *env, uint32_t desc) \
3528{ \
2a68e9e5
LZ
3529 uint32_t vm = vext_vm(desc); \
3530 uint32_t vl = env->vl; \
2a68e9e5
LZ
3531 uint32_t i; \
3532 \
3533 for (i = 0; i < vl; i++) { \
3534 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 3535 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
3536 continue; \
3537 } \
f9298de5 3538 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
3539 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
3540 } \
2a68e9e5
LZ
3541}
3542
3543GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3544GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3545GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3546
3547static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3548{
3549 FloatRelation compare = float16_compare_quiet(a, b, s);
3550 return compare != float_relation_equal;
3551}
3552
3553static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3554{
3555 FloatRelation compare = float32_compare_quiet(a, b, s);
3556 return compare != float_relation_equal;
3557}
3558
3559static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3560{
3561 FloatRelation compare = float64_compare_quiet(a, b, s);
3562 return compare != float_relation_equal;
3563}
3564
3565GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3566GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3567GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3568GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3569GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3570GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3571
2a68e9e5
LZ
3572GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3573GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3574GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3575GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
3576GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
3577GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
3578
2a68e9e5
LZ
3579GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
3580GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
3581GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
3582GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
3583GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
3584GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
3585
3586static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
3587{
3588 FloatRelation compare = float16_compare(a, b, s);
3589 return compare == float_relation_greater;
3590}
3591
3592static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
3593{
3594 FloatRelation compare = float32_compare(a, b, s);
3595 return compare == float_relation_greater;
3596}
3597
3598static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
3599{
3600 FloatRelation compare = float64_compare(a, b, s);
3601 return compare == float_relation_greater;
3602}
3603
3604GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
3605GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
3606GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
3607
3608static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
3609{
3610 FloatRelation compare = float16_compare(a, b, s);
3611 return compare == float_relation_greater ||
3612 compare == float_relation_equal;
3613}
3614
3615static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
3616{
3617 FloatRelation compare = float32_compare(a, b, s);
3618 return compare == float_relation_greater ||
3619 compare == float_relation_equal;
3620}
3621
3622static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
3623{
3624 FloatRelation compare = float64_compare(a, b, s);
3625 return compare == float_relation_greater ||
3626 compare == float_relation_equal;
3627}
3628
3629GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
3630GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
3631GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
3632
121ddbb3
LZ
3633/* Vector Floating-Point Classify Instruction */
3634#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3635static void do_##NAME(void *vd, void *vs2, int i) \
3636{ \
3637 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3638 *((TD *)vd + HD(i)) = OP(s2); \
3639}
3640
3479a814 3641#define GEN_VEXT_V(NAME, ESZ, DSZ) \
121ddbb3
LZ
3642void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3643 CPURISCVState *env, uint32_t desc) \
3644{ \
121ddbb3
LZ
3645 uint32_t vm = vext_vm(desc); \
3646 uint32_t vl = env->vl; \
3647 uint32_t i; \
3648 \
3649 for (i = 0; i < vl; i++) { \
f9298de5 3650 if (!vm && !vext_elem_mask(v0, i)) { \
121ddbb3
LZ
3651 continue; \
3652 } \
3653 do_##NAME(vd, vs2, i); \
3654 } \
121ddbb3
LZ
3655}
3656
3657target_ulong fclass_h(uint64_t frs1)
3658{
3659 float16 f = frs1;
3660 bool sign = float16_is_neg(f);
3661
3662 if (float16_is_infinity(f)) {
3663 return sign ? 1 << 0 : 1 << 7;
3664 } else if (float16_is_zero(f)) {
3665 return sign ? 1 << 3 : 1 << 4;
3666 } else if (float16_is_zero_or_denormal(f)) {
3667 return sign ? 1 << 2 : 1 << 5;
3668 } else if (float16_is_any_nan(f)) {
3669 float_status s = { }; /* for snan_bit_is_one */
3670 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3671 } else {
3672 return sign ? 1 << 1 : 1 << 6;
3673 }
3674}
3675
3676target_ulong fclass_s(uint64_t frs1)
3677{
3678 float32 f = frs1;
3679 bool sign = float32_is_neg(f);
3680
3681 if (float32_is_infinity(f)) {
3682 return sign ? 1 << 0 : 1 << 7;
3683 } else if (float32_is_zero(f)) {
3684 return sign ? 1 << 3 : 1 << 4;
3685 } else if (float32_is_zero_or_denormal(f)) {
3686 return sign ? 1 << 2 : 1 << 5;
3687 } else if (float32_is_any_nan(f)) {
3688 float_status s = { }; /* for snan_bit_is_one */
3689 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3690 } else {
3691 return sign ? 1 << 1 : 1 << 6;
3692 }
3693}
3694
3695target_ulong fclass_d(uint64_t frs1)
3696{
3697 float64 f = frs1;
3698 bool sign = float64_is_neg(f);
3699
3700 if (float64_is_infinity(f)) {
3701 return sign ? 1 << 0 : 1 << 7;
3702 } else if (float64_is_zero(f)) {
3703 return sign ? 1 << 3 : 1 << 4;
3704 } else if (float64_is_zero_or_denormal(f)) {
3705 return sign ? 1 << 2 : 1 << 5;
3706 } else if (float64_is_any_nan(f)) {
3707 float_status s = { }; /* for snan_bit_is_one */
3708 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
3709 } else {
3710 return sign ? 1 << 1 : 1 << 6;
3711 }
3712}
3713
3714RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
3715RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
3716RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
3479a814
FC
3717GEN_VEXT_V(vfclass_v_h, 2, 2)
3718GEN_VEXT_V(vfclass_v_w, 4, 4)
3719GEN_VEXT_V(vfclass_v_d, 8, 8)
64ab5846
LZ
3720
3721/* Vector Floating-Point Merge Instruction */
3479a814 3722#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
3723void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3724 CPURISCVState *env, uint32_t desc) \
3725{ \
64ab5846
LZ
3726 uint32_t vm = vext_vm(desc); \
3727 uint32_t vl = env->vl; \
64ab5846
LZ
3728 uint32_t i; \
3729 \
3730 for (i = 0; i < vl; i++) { \
3731 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
3732 *((ETYPE *)vd + H(i)) \
f9298de5 3733 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 3734 } \
64ab5846
LZ
3735}
3736
3479a814
FC
3737GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
3738GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
3739GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
3740
3741/* Single-Width Floating-Point/Integer Type-Convert Instructions */
3742/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3743RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
3744RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
3745RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
3479a814
FC
3746GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
3747GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
3748GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
92100973
LZ
3749
3750/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
3751RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
3752RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
3753RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
3479a814
FC
3754GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
3755GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
3756GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
92100973
LZ
3757
3758/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
3759RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
3760RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
3761RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
3479a814
FC
3762GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
3763GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
3764GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
92100973
LZ
3765
3766/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
3767RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
3768RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
3769RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
3479a814
FC
3770GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
3771GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
3772GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4514b7b1
LZ
3773
3774/* Widening Floating-Point/Integer Type-Convert Instructions */
3775/* (TD, T2, TX2) */
3ce4c09d 3776#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
3777#define WOP_UU_H uint32_t, uint16_t, uint16_t
3778#define WOP_UU_W uint64_t, uint32_t, uint32_t
3779/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
3780RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
3781RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
3479a814
FC
3782GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
3783GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4514b7b1
LZ
3784
3785/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
3786RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
3787RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
3479a814
FC
3788GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
3789GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4514b7b1
LZ
3790
3791/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3ce4c09d 3792RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
3793RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
3794RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
3ce4c09d 3795GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
3479a814
FC
3796GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
3797GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4514b7b1
LZ
3798
3799/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 3800RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
3801RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
3802RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
3ce4c09d 3803GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
3479a814
FC
3804GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
3805GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4514b7b1
LZ
3806
3807/*
3ce4c09d 3808 * vfwcvt.f.f.v vd, vs2, vm
4514b7b1
LZ
3809 * Convert single-width float to double-width float.
3810 */
3811static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
3812{
3813 return float16_to_float32(a, true, s);
3814}
3815
3816RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
3817RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
3479a814
FC
3818GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
3819GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
878d406e
LZ
3820
3821/* Narrowing Floating-Point/Integer Type-Convert Instructions */
3822/* (TD, T2, TX2) */
3823#define NOP_UU_H uint16_t, uint32_t, uint32_t
3824#define NOP_UU_W uint32_t, uint64_t, uint64_t
3825/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
3826RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
3827RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
3479a814
FC
3828GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2)
3829GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4)
878d406e
LZ
3830
3831/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
3832RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
3833RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
3479a814
FC
3834GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2)
3835GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4)
878d406e
LZ
3836
3837/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
3838RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
3839RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
3479a814
FC
3840GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2)
3841GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4)
878d406e
LZ
3842
3843/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
3844RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
3845RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
3479a814
FC
3846GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2)
3847GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4)
878d406e
LZ
3848
3849/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
3850static uint16_t vfncvtffv16(uint32_t a, float_status *s)
3851{
3852 return float32_to_float16(a, true, s);
3853}
3854
3855RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
3856RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
3479a814
FC
3857GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2)
3858GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4)
fe5c9ab1
LZ
3859
3860/*
3861 *** Vector Reduction Operations
3862 */
3863/* Vector Single-Width Integer Reduction Instructions */
3479a814 3864#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1
LZ
3865void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3866 void *vs2, CPURISCVState *env, uint32_t desc) \
3867{ \
fe5c9ab1
LZ
3868 uint32_t vm = vext_vm(desc); \
3869 uint32_t vl = env->vl; \
3870 uint32_t i; \
fe5c9ab1
LZ
3871 TD s1 = *((TD *)vs1 + HD(0)); \
3872 \
3873 for (i = 0; i < vl; i++) { \
3874 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 3875 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
3876 continue; \
3877 } \
3878 s1 = OP(s1, (TD)s2); \
3879 } \
3880 *((TD *)vd + HD(0)) = s1; \
fe5c9ab1
LZ
3881}
3882
3883/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
3884GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
3885GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
3886GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
3887GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
3888
3889/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
3890GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
3891GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
3892GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
3893GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
3894
3895/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
3896GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
3897GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
3898GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
3899GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
3900
3901/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
3902GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
3903GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
3904GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
3905GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
3906
3907/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
3908GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
3909GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
3910GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
3911GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
3912
3913/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
3914GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
3915GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
3916GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
3917GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
3918
3919/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
3920GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
3921GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
3922GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
3923GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
3924
3925/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
3926GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
3927GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
3928GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
3929GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
3930
3931/* Vector Widening Integer Reduction Instructions */
3932/* signed sum reduction into double-width accumulator */
3479a814
FC
3933GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
3934GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
3935GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
3936
3937/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
3938GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
3939GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
3940GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
3941
3942/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 3943#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
3944void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3945 void *vs2, CPURISCVState *env, \
3946 uint32_t desc) \
3947{ \
523547f1
LZ
3948 uint32_t vm = vext_vm(desc); \
3949 uint32_t vl = env->vl; \
3950 uint32_t i; \
523547f1
LZ
3951 TD s1 = *((TD *)vs1 + HD(0)); \
3952 \
3953 for (i = 0; i < vl; i++) { \
3954 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 3955 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
3956 continue; \
3957 } \
3958 s1 = OP(s1, (TD)s2, &env->fp_status); \
3959 } \
3960 *((TD *)vd + HD(0)) = s1; \
523547f1
LZ
3961}
3962
3963/* Unordered sum */
3479a814
FC
3964GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
3965GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
3966GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
3967
3968/* Maximum value */
08b60eeb
FC
3969GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
3970GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
3971GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
523547f1
LZ
3972
3973/* Minimum value */
08b60eeb
FC
3974GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
3975GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
3976GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
696b0c26
LZ
3977
3978/* Vector Widening Floating-Point Reduction Instructions */
3979/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
3980void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
3981 void *vs2, CPURISCVState *env, uint32_t desc)
3982{
696b0c26
LZ
3983 uint32_t vm = vext_vm(desc);
3984 uint32_t vl = env->vl;
3985 uint32_t i;
696b0c26
LZ
3986 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
3987
3988 for (i = 0; i < vl; i++) {
3989 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
f9298de5 3990 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
3991 continue;
3992 }
3993 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
3994 &env->fp_status);
3995 }
3996 *((uint32_t *)vd + H4(0)) = s1;
696b0c26
LZ
3997}
3998
3999void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4000 void *vs2, CPURISCVState *env, uint32_t desc)
4001{
696b0c26
LZ
4002 uint32_t vm = vext_vm(desc);
4003 uint32_t vl = env->vl;
4004 uint32_t i;
696b0c26
LZ
4005 uint64_t s1 = *((uint64_t *)vs1);
4006
4007 for (i = 0; i < vl; i++) {
4008 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
f9298de5 4009 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4010 continue;
4011 }
4012 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4013 &env->fp_status);
4014 }
4015 *((uint64_t *)vd) = s1;
696b0c26 4016}
c21f34ae
LZ
4017
4018/*
4019 *** Vector Mask Operations
4020 */
4021/* Vector Mask-Register Logical Instructions */
4022#define GEN_VEXT_MASK_VV(NAME, OP) \
4023void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4024 void *vs2, CPURISCVState *env, \
4025 uint32_t desc) \
4026{ \
c21f34ae
LZ
4027 uint32_t vl = env->vl; \
4028 uint32_t i; \
4029 int a, b; \
4030 \
4031 for (i = 0; i < vl; i++) { \
f9298de5
FC
4032 a = vext_elem_mask(vs1, i); \
4033 b = vext_elem_mask(vs2, i); \
4034 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4035 } \
c21f34ae
LZ
4036}
4037
4038#define DO_NAND(N, M) (!(N & M))
4039#define DO_ANDNOT(N, M) (N & !M)
4040#define DO_NOR(N, M) (!(N | M))
4041#define DO_ORNOT(N, M) (N | !M)
4042#define DO_XNOR(N, M) (!(N ^ M))
4043
4044GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4045GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4046GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4047GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4048GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4049GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4050GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4051GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4052
0014aa74
FC
4053/* Vector count population in mask vcpop */
4054target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4055 uint32_t desc)
2e88f551
LZ
4056{
4057 target_ulong cnt = 0;
2e88f551
LZ
4058 uint32_t vm = vext_vm(desc);
4059 uint32_t vl = env->vl;
4060 int i;
4061
4062 for (i = 0; i < vl; i++) {
f9298de5
FC
4063 if (vm || vext_elem_mask(v0, i)) {
4064 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4065 cnt++;
4066 }
4067 }
4068 }
4069 return cnt;
4070}
0db67e1c 4071
d71a24fc
FC
4072/* vfirst find-first-set mask bit*/
4073target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4074 uint32_t desc)
0db67e1c 4075{
0db67e1c
LZ
4076 uint32_t vm = vext_vm(desc);
4077 uint32_t vl = env->vl;
4078 int i;
4079
4080 for (i = 0; i < vl; i++) {
f9298de5
FC
4081 if (vm || vext_elem_mask(v0, i)) {
4082 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4083 return i;
4084 }
4085 }
4086 }
4087 return -1LL;
4088}
81fbf7da
LZ
4089
4090enum set_mask_type {
4091 ONLY_FIRST = 1,
4092 INCLUDE_FIRST,
4093 BEFORE_FIRST,
4094};
4095
4096static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4097 uint32_t desc, enum set_mask_type type)
4098{
81fbf7da
LZ
4099 uint32_t vm = vext_vm(desc);
4100 uint32_t vl = env->vl;
4101 int i;
4102 bool first_mask_bit = false;
4103
4104 for (i = 0; i < vl; i++) {
f9298de5 4105 if (!vm && !vext_elem_mask(v0, i)) {
81fbf7da
LZ
4106 continue;
4107 }
4108 /* write a zero to all following active elements */
4109 if (first_mask_bit) {
f9298de5 4110 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4111 continue;
4112 }
f9298de5 4113 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4114 first_mask_bit = true;
4115 if (type == BEFORE_FIRST) {
f9298de5 4116 vext_set_elem_mask(vd, i, 0);
81fbf7da 4117 } else {
f9298de5 4118 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4119 }
4120 } else {
4121 if (type == ONLY_FIRST) {
f9298de5 4122 vext_set_elem_mask(vd, i, 0);
81fbf7da 4123 } else {
f9298de5 4124 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4125 }
4126 }
4127 }
81fbf7da
LZ
4128}
4129
4130void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4131 uint32_t desc)
4132{
4133 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4134}
4135
4136void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4137 uint32_t desc)
4138{
4139 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4140}
4141
4142void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4143 uint32_t desc)
4144{
4145 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4146}
78d90cfe
LZ
4147
4148/* Vector Iota Instruction */
3479a814 4149#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4150void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4151 uint32_t desc) \
4152{ \
78d90cfe
LZ
4153 uint32_t vm = vext_vm(desc); \
4154 uint32_t vl = env->vl; \
4155 uint32_t sum = 0; \
4156 int i; \
4157 \
4158 for (i = 0; i < vl; i++) { \
f9298de5 4159 if (!vm && !vext_elem_mask(v0, i)) { \
78d90cfe
LZ
4160 continue; \
4161 } \
4162 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4163 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4164 sum++; \
4165 } \
4166 } \
78d90cfe
LZ
4167}
4168
3479a814
FC
4169GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4170GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4171GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4172GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4173
4174/* Vector Element Index Instruction */
3479a814 4175#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4176void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4177{ \
126bec3f
LZ
4178 uint32_t vm = vext_vm(desc); \
4179 uint32_t vl = env->vl; \
4180 int i; \
4181 \
4182 for (i = 0; i < vl; i++) { \
f9298de5 4183 if (!vm && !vext_elem_mask(v0, i)) { \
126bec3f
LZ
4184 continue; \
4185 } \
4186 *((ETYPE *)vd + H(i)) = i; \
4187 } \
126bec3f
LZ
4188}
4189
3479a814
FC
4190GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4191GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4192GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4193GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4194
4195/*
4196 *** Vector Permutation Instructions
4197 */
4198
4199/* Vector Slide Instructions */
3479a814 4200#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4201void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4202 CPURISCVState *env, uint32_t desc) \
4203{ \
ec17e036
LZ
4204 uint32_t vm = vext_vm(desc); \
4205 uint32_t vl = env->vl; \
4206 target_ulong offset = s1, i; \
4207 \
4208 for (i = offset; i < vl; i++) { \
f9298de5 4209 if (!vm && !vext_elem_mask(v0, i)) { \
ec17e036
LZ
4210 continue; \
4211 } \
4212 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4213 } \
ec17e036
LZ
4214}
4215
4216/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4217GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4218GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4219GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4220GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4221
3479a814 4222#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4223void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4224 CPURISCVState *env, uint32_t desc) \
4225{ \
6438ed61 4226 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4227 uint32_t vm = vext_vm(desc); \
4228 uint32_t vl = env->vl; \
6438ed61 4229 target_ulong i_max, i; \
ec17e036 4230 \
6438ed61
FC
4231 i_max = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
4232 for (i = 0; i < i_max; ++i) { \
4233 if (vm || vext_elem_mask(v0, i)) { \
4234 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4235 } \
4236 } \
4237 \
4238 for (i = i_max; i < vl; ++i) { \
4239 if (vm || vext_elem_mask(v0, i)) { \
4240 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4241 } \
ec17e036 4242 } \
ec17e036
LZ
4243}
4244
4245/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4246GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4247GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4248GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4249GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4250
8500d4ab
FC
4251#define GEN_VEXT_VSLIE1UP(ESZ, H) \
4252static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4253 CPURISCVState *env, uint32_t desc) \
4254{ \
4255 typedef uint##ESZ##_t ETYPE; \
4256 uint32_t vm = vext_vm(desc); \
4257 uint32_t vl = env->vl; \
4258 uint32_t i; \
4259 \
4260 for (i = 0; i < vl; i++) { \
4261 if (!vm && !vext_elem_mask(v0, i)) { \
4262 continue; \
4263 } \
4264 if (i == 0) { \
4265 *((ETYPE *)vd + H(i)) = s1; \
4266 } else { \
4267 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4268 } \
4269 } \
4270}
4271
4272GEN_VEXT_VSLIE1UP(8, H1)
4273GEN_VEXT_VSLIE1UP(16, H2)
4274GEN_VEXT_VSLIE1UP(32, H4)
4275GEN_VEXT_VSLIE1UP(64, H8)
4276
4277#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \
4278void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4279 CPURISCVState *env, uint32_t desc) \
4280{ \
4281 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4282}
4283
4284/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
4285GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4286GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4287GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4288GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4289
4290#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \
4291static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4292 CPURISCVState *env, uint32_t desc) \
4293{ \
4294 typedef uint##ESZ##_t ETYPE; \
4295 uint32_t vm = vext_vm(desc); \
4296 uint32_t vl = env->vl; \
4297 uint32_t i; \
4298 \
4299 for (i = 0; i < vl; i++) { \
4300 if (!vm && !vext_elem_mask(v0, i)) { \
4301 continue; \
4302 } \
4303 if (i == vl - 1) { \
4304 *((ETYPE *)vd + H(i)) = s1; \
4305 } else { \
4306 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4307 } \
4308 } \
4309}
4310
4311GEN_VEXT_VSLIDE1DOWN(8, H1)
4312GEN_VEXT_VSLIDE1DOWN(16, H2)
4313GEN_VEXT_VSLIDE1DOWN(32, H4)
4314GEN_VEXT_VSLIDE1DOWN(64, H8)
4315
4316#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \
4317void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4318 CPURISCVState *env, uint32_t desc) \
4319{ \
4320 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4321}
4322
4323/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
4324GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4325GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4326GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4327GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4328
4329/* Vector Floating-Point Slide Instructions */
4330#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \
4331void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4332 CPURISCVState *env, uint32_t desc) \
4333{ \
4334 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
4335}
4336
4337/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4338GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4339GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4340GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4341
4342#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \
4343void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4344 CPURISCVState *env, uint32_t desc) \
4345{ \
4346 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
4347}
4348
4349/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4350GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4351GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4352GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
4353
4354/* Vector Register Gather Instruction */
50bfb45b 4355#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
4356void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4357 CPURISCVState *env, uint32_t desc) \
4358{ \
50bfb45b 4359 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS1))); \
e4b83d5c
LZ
4360 uint32_t vm = vext_vm(desc); \
4361 uint32_t vl = env->vl; \
b11e84b8
FC
4362 uint64_t index; \
4363 uint32_t i; \
e4b83d5c
LZ
4364 \
4365 for (i = 0; i < vl; i++) { \
f9298de5 4366 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
4367 continue; \
4368 } \
50bfb45b 4369 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 4370 if (index >= vlmax) { \
50bfb45b 4371 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 4372 } else { \
50bfb45b 4373 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
4374 } \
4375 } \
e4b83d5c
LZ
4376}
4377
4378/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
4379GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4380GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4381GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4382GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4383
4384GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
4385GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4386GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4387GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 4388
3479a814 4389#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
4390void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4391 CPURISCVState *env, uint32_t desc) \
4392{ \
5a9f8e15 4393 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
4394 uint32_t vm = vext_vm(desc); \
4395 uint32_t vl = env->vl; \
b11e84b8
FC
4396 uint64_t index = s1; \
4397 uint32_t i; \
e4b83d5c
LZ
4398 \
4399 for (i = 0; i < vl; i++) { \
f9298de5 4400 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
4401 continue; \
4402 } \
4403 if (index >= vlmax) { \
4404 *((ETYPE *)vd + H(i)) = 0; \
4405 } else { \
4406 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
4407 } \
4408 } \
e4b83d5c
LZ
4409}
4410
4411/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
4412GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
4413GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4414GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4415GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
4416
4417/* Vector Compress Instruction */
3479a814 4418#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
4419void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4420 CPURISCVState *env, uint32_t desc) \
4421{ \
31bf42a2
LZ
4422 uint32_t vl = env->vl; \
4423 uint32_t num = 0, i; \
4424 \
4425 for (i = 0; i < vl; i++) { \
f9298de5 4426 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
4427 continue; \
4428 } \
4429 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
4430 num++; \
4431 } \
31bf42a2
LZ
4432}
4433
4434/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
4435GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
4436GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4437GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4438GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e
FC
4439
4440/* Vector Integer Extension */
4441#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
4442void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4443 CPURISCVState *env, uint32_t desc) \
4444{ \
4445 uint32_t vl = env->vl; \
4446 uint32_t vm = vext_vm(desc); \
4447 uint32_t i; \
4448 \
4449 for (i = 0; i < vl; i++) { \
4450 if (!vm && !vext_elem_mask(v0, i)) { \
4451 continue; \
4452 } \
4453 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
4454 } \
4455}
4456
4457GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
4458GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4459GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4460GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
4461GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4462GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
4463
4464GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
4465GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4466GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4467GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
4468GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4469GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)