]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
target/riscv: rvv: Add mask agnostic for vx instructions
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc
LZ
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
ce2a0343 26#include "fpu/softfloat.h"
751538d5
LZ
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
2b7168fc
LZ
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 44
d9b7609a
FC
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
2b7168fc 57 /* only set vill bit. */
d96a271a
LZ
58 env->vill = 1;
59 env->vtype = 0;
2b7168fc
LZ
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
ac6bcf4d 74 env->vill = 0;
2b7168fc
LZ
75 return vl;
76}
751538d5
LZ
77
78/*
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
81 */
e03b5686 82#if HOST_BIG_ENDIAN
751538d5
LZ
83#define H1(x) ((x) ^ 7)
84#define H1_2(x) ((x) ^ 6)
85#define H1_4(x) ((x) ^ 4)
86#define H2(x) ((x) ^ 3)
87#define H4(x) ((x) ^ 1)
88#define H8(x) ((x))
89#else
90#define H1(x) (x)
91#define H1_2(x) (x)
92#define H1_4(x) (x)
93#define H2(x) (x)
94#define H4(x) (x)
95#define H8(x) (x)
96#endif
97
98static inline uint32_t vext_nf(uint32_t desc)
99{
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101}
102
751538d5
LZ
103static inline uint32_t vext_vm(uint32_t desc)
104{
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106}
107
33f1beaf
FC
108/*
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
119 */
120static inline int32_t vext_lmul(uint32_t desc)
751538d5 121{
33f1beaf 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
123}
124
f1eed927 125static inline uint32_t vext_vta(uint32_t desc)
126{
127 return FIELD_EX32(simd_data(desc), VDATA, VTA);
128}
129
355d5584
YTC
130static inline uint32_t vext_vma(uint32_t desc)
131{
132 return FIELD_EX32(simd_data(desc), VDATA, VMA);
133}
134
5c19fc15 135static inline uint32_t vext_vta_all_1s(uint32_t desc)
136{
137 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
138}
139
751538d5 140/*
5a9f8e15 141 * Get the maximum number of elements can be operated.
751538d5 142 *
c7b8a421 143 * log2_esz: log2 of element size in bytes.
751538d5 144 */
c7b8a421 145static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 146{
5a9f8e15 147 /*
8a4b5257 148 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
149 * so vlen in bytes (vlenb) is encoded as maxsz.
150 */
151 uint32_t vlenb = simd_maxsz(desc);
152
153 /* Return VLMAX */
c7b8a421 154 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 155 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
156}
157
f1eed927 158/*
159 * Get number of total elements, including prestart, body and tail elements.
160 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
161 * are held in the same vector register.
162 */
163static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
164 uint32_t esz)
165{
166 uint32_t vlenb = simd_maxsz(desc);
167 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
168 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
169 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
170 return (vlenb << emul) / esz;
171}
172
d6b9d930
LZ
173static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
174{
175 return (addr & env->cur_pmmask) | env->cur_pmbase;
176}
177
751538d5
LZ
178/*
179 * This function checks watchpoint before real load operation.
180 *
181 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
182 * In user mode, there is no watchpoint support now.
183 *
184 * It will trigger an exception if there is no mapping in TLB
185 * and page table walk can't fill the TLB entry. Then the guest
186 * software can return here after process the exception or never return.
187 */
188static void probe_pages(CPURISCVState *env, target_ulong addr,
189 target_ulong len, uintptr_t ra,
190 MMUAccessType access_type)
191{
192 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
193 target_ulong curlen = MIN(pagelen, len);
194
d6b9d930 195 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
196 cpu_mmu_index(env, false), ra);
197 if (len > curlen) {
198 addr += curlen;
199 curlen = len - curlen;
d6b9d930 200 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
201 cpu_mmu_index(env, false), ra);
202 }
203}
204
f1eed927 205/* set agnostic elements to 1s */
206static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
207 uint32_t tot)
208{
209 if (is_agnostic == 0) {
210 /* policy undisturbed */
211 return;
212 }
213 if (tot - cnt == 0) {
214 return ;
215 }
216 memset(base + cnt, -1, tot - cnt);
217}
218
f9298de5
FC
219static inline void vext_set_elem_mask(void *v0, int index,
220 uint8_t value)
3a6f8f68 221{
f9298de5
FC
222 int idx = index / 64;
223 int pos = index % 64;
3a6f8f68 224 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 225 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 226}
751538d5 227
f9298de5
FC
228/*
229 * Earlier designs (pre-0.9) had a varying number of bits
230 * per mask value (MLEN). In the 0.9 design, MLEN=1.
231 * (Section 4.5)
232 */
233static inline int vext_elem_mask(void *v0, int index)
751538d5 234{
f9298de5
FC
235 int idx = index / 64;
236 int pos = index % 64;
751538d5
LZ
237 return (((uint64_t *)v0)[idx] >> pos) & 1;
238}
239
240/* elements operations for load and store */
241typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
242 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 243
79556fb6 244#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
245static void NAME(CPURISCVState *env, abi_ptr addr, \
246 uint32_t idx, void *vd, uintptr_t retaddr)\
247{ \
751538d5 248 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 249 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
250} \
251
79556fb6
FC
252GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
253GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
254GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
255GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
256
257#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
258static void NAME(CPURISCVState *env, abi_ptr addr, \
259 uint32_t idx, void *vd, uintptr_t retaddr)\
260{ \
261 ETYPE data = *((ETYPE *)vd + H(idx)); \
262 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
263}
264
751538d5
LZ
265GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
266GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
267GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
268GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
269
270/*
271 *** stride: access vector element from strided memory
272 */
273static void
274vext_ldst_stride(void *vd, void *v0, target_ulong base,
275 target_ulong stride, CPURISCVState *env,
276 uint32_t desc, uint32_t vm,
3479a814 277 vext_ldst_elem_fn *ldst_elem,
c7b8a421 278 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
279{
280 uint32_t i, k;
281 uint32_t nf = vext_nf(desc);
c7b8a421 282 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 283 uint32_t esz = 1 << log2_esz;
284 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
285 uint32_t vta = vext_vta(desc);
265ecd4c 286 uint32_t vma = vext_vma(desc);
751538d5 287
f714361e 288 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
751538d5 289 k = 0;
751538d5 290 while (k < nf) {
265ecd4c
YTC
291 if (!vm && !vext_elem_mask(v0, i)) {
292 /* set masked-off elements to 1s */
293 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
294 (i + k * max_elems + 1) * esz);
295 k++;
296 continue;
297 }
c7b8a421 298 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 299 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
300 k++;
301 }
302 }
f714361e 303 env->vstart = 0;
752614ca 304 /* set tail elements to 1s */
305 for (k = 0; k < nf; ++k) {
306 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
307 (k * max_elems + max_elems) * esz);
308 }
309 if (nf * max_elems % total_elems != 0) {
310 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
311 uint32_t registers_used =
312 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
313 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
314 registers_used * vlenb);
315 }
751538d5
LZ
316}
317
79556fb6 318#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
319void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
320 target_ulong stride, CPURISCVState *env, \
321 uint32_t desc) \
322{ \
323 uint32_t vm = vext_vm(desc); \
324 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 325 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
326}
327
79556fb6
FC
328GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
329GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
330GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
331GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
332
333#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
334void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
335 target_ulong stride, CPURISCVState *env, \
336 uint32_t desc) \
337{ \
338 uint32_t vm = vext_vm(desc); \
339 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 340 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
341}
342
79556fb6
FC
343GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
344GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
345GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
346GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
347
348/*
349 *** unit-stride: access elements stored contiguously in memory
350 */
351
352/* unmasked unit-stride load and store operation*/
353static void
354vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 355 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 356 uintptr_t ra)
751538d5
LZ
357{
358 uint32_t i, k;
359 uint32_t nf = vext_nf(desc);
c7b8a421 360 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 361 uint32_t esz = 1 << log2_esz;
362 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
363 uint32_t vta = vext_vta(desc);
751538d5 364
751538d5 365 /* load bytes from guest memory */
5c89e9c0 366 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
367 k = 0;
368 while (k < nf) {
c7b8a421 369 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 370 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
371 k++;
372 }
373 }
f714361e 374 env->vstart = 0;
752614ca 375 /* set tail elements to 1s */
376 for (k = 0; k < nf; ++k) {
377 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz,
378 (k * max_elems + max_elems) * esz);
379 }
380 if (nf * max_elems % total_elems != 0) {
381 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
382 uint32_t registers_used =
383 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
384 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
385 registers_used * vlenb);
386 }
751538d5
LZ
387}
388
389/*
390 * masked unit-stride load and store operation will be a special case of stride,
391 * stride = NF * sizeof (MTYPE)
392 */
393
79556fb6 394#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
395void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
396 CPURISCVState *env, uint32_t desc) \
397{ \
5a9f8e15 398 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 399 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 400 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
401} \
402 \
403void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
404 CPURISCVState *env, uint32_t desc) \
405{ \
3479a814 406 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 407 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
408}
409
79556fb6
FC
410GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
411GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
412GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
413GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
414
5c89e9c0
FC
415#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
416void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
417 CPURISCVState *env, uint32_t desc) \
418{ \
419 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
420 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 421 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
422} \
423 \
424void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
425 CPURISCVState *env, uint32_t desc) \
426{ \
427 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 428 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
429}
430
79556fb6
FC
431GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
432GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
433GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
434GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 435
26086aea
FC
436/*
437 *** unit stride mask load and store, EEW = 1
438 */
439void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
440 CPURISCVState *env, uint32_t desc)
441{
442 /* evl = ceil(vl/8) */
443 uint8_t evl = (env->vl + 7) >> 3;
444 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 445 0, evl, GETPC());
26086aea
FC
446}
447
448void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
449 CPURISCVState *env, uint32_t desc)
450{
451 /* evl = ceil(vl/8) */
452 uint8_t evl = (env->vl + 7) >> 3;
453 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 454 0, evl, GETPC());
26086aea
FC
455}
456
f732560e
LZ
457/*
458 *** index: access vector element from indexed memory
459 */
460typedef target_ulong vext_get_index_addr(target_ulong base,
461 uint32_t idx, void *vs2);
462
463#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
464static target_ulong NAME(target_ulong base, \
465 uint32_t idx, void *vs2) \
466{ \
467 return (base + *((ETYPE *)vs2 + H(idx))); \
468}
469
83fcd573
FC
470GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
471GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
472GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
473GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
474
475static inline void
476vext_ldst_index(void *vd, void *v0, target_ulong base,
477 void *vs2, CPURISCVState *env, uint32_t desc,
478 vext_get_index_addr get_index_addr,
479 vext_ldst_elem_fn *ldst_elem,
c7b8a421 480 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
481{
482 uint32_t i, k;
483 uint32_t nf = vext_nf(desc);
484 uint32_t vm = vext_vm(desc);
c7b8a421 485 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 486 uint32_t esz = 1 << log2_esz;
487 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
488 uint32_t vta = vext_vta(desc);
265ecd4c 489 uint32_t vma = vext_vma(desc);
f732560e 490
f732560e 491 /* load bytes from guest memory */
f714361e 492 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f714361e 493 k = 0;
f732560e 494 while (k < nf) {
265ecd4c
YTC
495 if (!vm && !vext_elem_mask(v0, i)) {
496 /* set masked-off elements to 1s */
497 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
498 (i + k * max_elems + 1) * esz);
499 k++;
500 continue;
501 }
c7b8a421 502 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 503 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
504 k++;
505 }
506 }
f714361e 507 env->vstart = 0;
752614ca 508 /* set tail elements to 1s */
509 for (k = 0; k < nf; ++k) {
510 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
511 (k * max_elems + max_elems) * esz);
512 }
513 if (nf * max_elems % total_elems != 0) {
514 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
515 uint32_t registers_used =
516 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
517 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
518 registers_used * vlenb);
519 }
f732560e
LZ
520}
521
08b9d0ed 522#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
523void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
524 void *vs2, CPURISCVState *env, uint32_t desc) \
525{ \
526 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 527 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
528}
529
08b9d0ed
FC
530GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
531GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
532GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
533GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
534GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
535GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
536GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
537GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
538GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
539GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
540GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
541GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
542GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
543GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
544GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
545GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
546
547#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
548void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
549 void *vs2, CPURISCVState *env, uint32_t desc) \
550{ \
551 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 552 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 553 GETPC()); \
f732560e
LZ
554}
555
08b9d0ed
FC
556GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
557GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
558GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
559GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
560GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
561GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
562GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
563GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
564GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
565GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
566GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
567GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
568GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
569GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
570GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
571GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
572
573/*
574 *** unit-stride fault-only-fisrt load instructions
575 */
576static inline void
577vext_ldff(void *vd, void *v0, target_ulong base,
578 CPURISCVState *env, uint32_t desc,
579 vext_ldst_elem_fn *ldst_elem,
c7b8a421 580 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
581{
582 void *host;
583 uint32_t i, k, vl = 0;
022b4ecf
LZ
584 uint32_t nf = vext_nf(desc);
585 uint32_t vm = vext_vm(desc);
c7b8a421 586 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 587 uint32_t esz = 1 << log2_esz;
588 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
589 uint32_t vta = vext_vta(desc);
265ecd4c 590 uint32_t vma = vext_vma(desc);
022b4ecf
LZ
591 target_ulong addr, offset, remain;
592
593 /* probe every access*/
f714361e 594 for (i = env->vstart; i < env->vl; i++) {
f9298de5 595 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
596 continue;
597 }
c7b8a421 598 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 599 if (i == 0) {
c7b8a421 600 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
601 } else {
602 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 603 remain = nf << log2_esz;
022b4ecf
LZ
604 while (remain > 0) {
605 offset = -(addr | TARGET_PAGE_MASK);
606 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
607 cpu_mmu_index(env, false));
608 if (host) {
609#ifdef CONFIG_USER_ONLY
01d09525 610 if (page_check_range(addr, offset, PAGE_READ) < 0) {
022b4ecf
LZ
611 vl = i;
612 goto ProbeSuccess;
613 }
614#else
01d09525 615 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
616#endif
617 } else {
618 vl = i;
619 goto ProbeSuccess;
620 }
621 if (remain <= offset) {
622 break;
623 }
624 remain -= offset;
d6b9d930 625 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
626 }
627 }
628 }
629ProbeSuccess:
630 /* load bytes from guest memory */
631 if (vl != 0) {
632 env->vl = vl;
633 }
f714361e 634 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 635 k = 0;
022b4ecf 636 while (k < nf) {
265ecd4c
YTC
637 if (!vm && !vext_elem_mask(v0, i)) {
638 /* set masked-off elements to 1s */
639 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
640 (i + k * max_elems + 1) * esz);
641 k++;
642 continue;
643 }
c7b8a421 644 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 645 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
646 k++;
647 }
648 }
f714361e 649 env->vstart = 0;
752614ca 650 /* set tail elements to 1s */
651 for (k = 0; k < nf; ++k) {
652 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
653 (k * max_elems + max_elems) * esz);
654 }
655 if (nf * max_elems % total_elems != 0) {
656 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
657 uint32_t registers_used =
658 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
659 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
660 registers_used * vlenb);
661 }
022b4ecf
LZ
662}
663
d3e5e2ff
FC
664#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
665void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
666 CPURISCVState *env, uint32_t desc) \
667{ \
668 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 669 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
670}
671
d3e5e2ff
FC
672GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
673GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
674GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
675GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 676
268fcca6
LZ
677#define DO_SWAP(N, M) (M)
678#define DO_AND(N, M) (N & M)
679#define DO_XOR(N, M) (N ^ M)
680#define DO_OR(N, M) (N | M)
681#define DO_ADD(N, M) (N + M)
682
268fcca6
LZ
683/* Signed min/max */
684#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
685#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
686
687/* Unsigned min/max */
688#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
689#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
690
30206bd8
FC
691/*
692 *** load and store whole register instructions
693 */
694static void
695vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 696 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 697{
f714361e 698 uint32_t i, k, off, pos;
30206bd8
FC
699 uint32_t nf = vext_nf(desc);
700 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
c7b8a421 701 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 702
f714361e
FC
703 k = env->vstart / max_elems;
704 off = env->vstart % max_elems;
30206bd8 705
f714361e
FC
706 if (off) {
707 /* load/store rest of elements of current segment pointed by vstart */
708 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 709 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
d6b9d930 710 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
f714361e
FC
711 }
712 k++;
713 }
714
715 /* load/store elements for rest of segments */
716 for (; k < nf; k++) {
717 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 718 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 719 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
720 }
721 }
f714361e
FC
722
723 env->vstart = 0;
30206bd8
FC
724}
725
726#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
727void HELPER(NAME)(void *vd, target_ulong base, \
728 CPURISCVState *env, uint32_t desc) \
729{ \
730 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 731 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
732}
733
734GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
735GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
736GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
737GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
738GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
739GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
740GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
741GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
742GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
743GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
744GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
745GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
746GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
747GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
748GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
749GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
750
751#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
752void HELPER(NAME)(void *vd, target_ulong base, \
753 CPURISCVState *env, uint32_t desc) \
754{ \
755 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 756 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
757}
758
759GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
760GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
761GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
762GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
763
43740e3a
LZ
764/*
765 *** Vector Integer Arithmetic Instructions
766 */
767
768/* expand macro args before macro */
769#define RVVCALL(macro, ...) macro(__VA_ARGS__)
770
771/* (TD, T1, T2, TX1, TX2) */
772#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
773#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
774#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
775#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
776#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
777#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
778#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
779#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
780#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
781#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
782#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
783#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
784#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
785#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
786#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
787#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
788#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
789#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
790#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
791#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
792#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
793#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
794#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
795#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
796#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
797#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
798#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
799#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
800#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
801#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
802
803/* operation of two vector elements */
804typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
805
806#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
807static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
808{ \
809 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
810 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
811 *((TD *)vd + HD(i)) = OP(s2, s1); \
812}
813#define DO_SUB(N, M) (N - M)
814#define DO_RSUB(N, M) (M - N)
815
816RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
817RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
818RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
819RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
820RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
821RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
822RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
823RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
824
825static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
826 CPURISCVState *env, uint32_t desc,
f1eed927 827 opivv2_fn *fn, uint32_t esz)
43740e3a 828{
43740e3a
LZ
829 uint32_t vm = vext_vm(desc);
830 uint32_t vl = env->vl;
f1eed927 831 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
832 uint32_t vta = vext_vta(desc);
355d5584 833 uint32_t vma = vext_vma(desc);
43740e3a
LZ
834 uint32_t i;
835
f714361e 836 for (i = env->vstart; i < vl; i++) {
f9298de5 837 if (!vm && !vext_elem_mask(v0, i)) {
355d5584
YTC
838 /* set masked-off elements to 1s */
839 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
840 continue;
841 }
842 fn(vd, vs1, vs2, i);
843 }
f714361e 844 env->vstart = 0;
f1eed927 845 /* set tail elements to 1s */
846 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
847}
848
849/* generate the helpers for OPIVV */
f1eed927 850#define GEN_VEXT_VV(NAME, ESZ) \
43740e3a
LZ
851void HELPER(NAME)(void *vd, void *v0, void *vs1, \
852 void *vs2, CPURISCVState *env, \
853 uint32_t desc) \
854{ \
8a085fb2 855 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
f1eed927 856 do_##NAME, ESZ); \
43740e3a
LZ
857}
858
f1eed927 859GEN_VEXT_VV(vadd_vv_b, 1)
860GEN_VEXT_VV(vadd_vv_h, 2)
861GEN_VEXT_VV(vadd_vv_w, 4)
862GEN_VEXT_VV(vadd_vv_d, 8)
863GEN_VEXT_VV(vsub_vv_b, 1)
864GEN_VEXT_VV(vsub_vv_h, 2)
865GEN_VEXT_VV(vsub_vv_w, 4)
866GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a
LZ
867
868typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
869
870/*
871 * (T1)s1 gives the real operator type.
872 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
873 */
874#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
875static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
876{ \
877 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
878 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
879}
880
881RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
882RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
883RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
884RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
885RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
886RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
887RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
888RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
889RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
890RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
891RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
892RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
893
894static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
895 CPURISCVState *env, uint32_t desc,
5c19fc15 896 opivx2_fn fn, uint32_t esz)
43740e3a 897{
43740e3a
LZ
898 uint32_t vm = vext_vm(desc);
899 uint32_t vl = env->vl;
5c19fc15 900 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
901 uint32_t vta = vext_vta(desc);
bce9a636 902 uint32_t vma = vext_vma(desc);
43740e3a
LZ
903 uint32_t i;
904
f714361e 905 for (i = env->vstart; i < vl; i++) {
f9298de5 906 if (!vm && !vext_elem_mask(v0, i)) {
bce9a636
YTC
907 /* set masked-off elements to 1s */
908 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
909 continue;
910 }
911 fn(vd, s1, vs2, i);
912 }
f714361e 913 env->vstart = 0;
5c19fc15 914 /* set tail elements to 1s */
915 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
916}
917
918/* generate the helpers for OPIVX */
5c19fc15 919#define GEN_VEXT_VX(NAME, ESZ) \
43740e3a
LZ
920void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
921 void *vs2, CPURISCVState *env, \
922 uint32_t desc) \
923{ \
8a085fb2 924 do_vext_vx(vd, v0, s1, vs2, env, desc, \
5c19fc15 925 do_##NAME, ESZ); \
3479a814
FC
926}
927
5c19fc15 928GEN_VEXT_VX(vadd_vx_b, 1)
929GEN_VEXT_VX(vadd_vx_h, 2)
930GEN_VEXT_VX(vadd_vx_w, 4)
931GEN_VEXT_VX(vadd_vx_d, 8)
932GEN_VEXT_VX(vsub_vx_b, 1)
933GEN_VEXT_VX(vsub_vx_h, 2)
934GEN_VEXT_VX(vsub_vx_w, 4)
935GEN_VEXT_VX(vsub_vx_d, 8)
936GEN_VEXT_VX(vrsub_vx_b, 1)
937GEN_VEXT_VX(vrsub_vx_h, 2)
938GEN_VEXT_VX(vrsub_vx_w, 4)
939GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
940
941void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
942{
943 intptr_t oprsz = simd_oprsz(desc);
944 intptr_t i;
945
946 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
947 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
948 }
949}
950
951void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
952{
953 intptr_t oprsz = simd_oprsz(desc);
954 intptr_t i;
955
956 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
957 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
958 }
959}
960
961void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
962{
963 intptr_t oprsz = simd_oprsz(desc);
964 intptr_t i;
965
966 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
967 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
968 }
969}
970
971void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
972{
973 intptr_t oprsz = simd_oprsz(desc);
974 intptr_t i;
975
976 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
977 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
978 }
979}
8fcdf776
LZ
980
981/* Vector Widening Integer Add/Subtract */
982#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
983#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
984#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
985#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
986#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
987#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
988#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
989#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
990#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
991#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
992#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
993#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
994RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
995RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
996RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
997RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
998RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
999RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1000RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1001RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1002RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1003RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1004RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1005RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1006RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1007RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1008RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1009RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1010RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1011RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1012RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1013RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1014RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1015RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1016RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1017RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 1018GEN_VEXT_VV(vwaddu_vv_b, 2)
1019GEN_VEXT_VV(vwaddu_vv_h, 4)
1020GEN_VEXT_VV(vwaddu_vv_w, 8)
1021GEN_VEXT_VV(vwsubu_vv_b, 2)
1022GEN_VEXT_VV(vwsubu_vv_h, 4)
1023GEN_VEXT_VV(vwsubu_vv_w, 8)
1024GEN_VEXT_VV(vwadd_vv_b, 2)
1025GEN_VEXT_VV(vwadd_vv_h, 4)
1026GEN_VEXT_VV(vwadd_vv_w, 8)
1027GEN_VEXT_VV(vwsub_vv_b, 2)
1028GEN_VEXT_VV(vwsub_vv_h, 4)
1029GEN_VEXT_VV(vwsub_vv_w, 8)
1030GEN_VEXT_VV(vwaddu_wv_b, 2)
1031GEN_VEXT_VV(vwaddu_wv_h, 4)
1032GEN_VEXT_VV(vwaddu_wv_w, 8)
1033GEN_VEXT_VV(vwsubu_wv_b, 2)
1034GEN_VEXT_VV(vwsubu_wv_h, 4)
1035GEN_VEXT_VV(vwsubu_wv_w, 8)
1036GEN_VEXT_VV(vwadd_wv_b, 2)
1037GEN_VEXT_VV(vwadd_wv_h, 4)
1038GEN_VEXT_VV(vwadd_wv_w, 8)
1039GEN_VEXT_VV(vwsub_wv_b, 2)
1040GEN_VEXT_VV(vwsub_wv_h, 4)
1041GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
1042
1043RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1044RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1045RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1046RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1047RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1048RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1049RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1050RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1051RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1052RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1053RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1054RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1055RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1056RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1057RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1058RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1059RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1060RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1061RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1062RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1063RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1064RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1065RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1066RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 1067GEN_VEXT_VX(vwaddu_vx_b, 2)
1068GEN_VEXT_VX(vwaddu_vx_h, 4)
1069GEN_VEXT_VX(vwaddu_vx_w, 8)
1070GEN_VEXT_VX(vwsubu_vx_b, 2)
1071GEN_VEXT_VX(vwsubu_vx_h, 4)
1072GEN_VEXT_VX(vwsubu_vx_w, 8)
1073GEN_VEXT_VX(vwadd_vx_b, 2)
1074GEN_VEXT_VX(vwadd_vx_h, 4)
1075GEN_VEXT_VX(vwadd_vx_w, 8)
1076GEN_VEXT_VX(vwsub_vx_b, 2)
1077GEN_VEXT_VX(vwsub_vx_h, 4)
1078GEN_VEXT_VX(vwsub_vx_w, 8)
1079GEN_VEXT_VX(vwaddu_wx_b, 2)
1080GEN_VEXT_VX(vwaddu_wx_h, 4)
1081GEN_VEXT_VX(vwaddu_wx_w, 8)
1082GEN_VEXT_VX(vwsubu_wx_b, 2)
1083GEN_VEXT_VX(vwsubu_wx_h, 4)
1084GEN_VEXT_VX(vwsubu_wx_w, 8)
1085GEN_VEXT_VX(vwadd_wx_b, 2)
1086GEN_VEXT_VX(vwadd_wx_h, 4)
1087GEN_VEXT_VX(vwadd_wx_w, 8)
1088GEN_VEXT_VX(vwsub_wx_b, 2)
1089GEN_VEXT_VX(vwsub_wx_h, 4)
1090GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
1091
1092/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1093#define DO_VADC(N, M, C) (N + M + C)
1094#define DO_VSBC(N, M, C) (N - M - C)
1095
3479a814 1096#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1097void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1098 CPURISCVState *env, uint32_t desc) \
1099{ \
3a6f8f68 1100 uint32_t vl = env->vl; \
5c19fc15 1101 uint32_t esz = sizeof(ETYPE); \
1102 uint32_t total_elems = \
1103 vext_get_total_elems(env, desc, esz); \
1104 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1105 uint32_t i; \
1106 \
f714361e 1107 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1108 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1109 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1110 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1111 \
1112 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1113 } \
f714361e 1114 env->vstart = 0; \
5c19fc15 1115 /* set tail elements to 1s */ \
1116 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1117}
1118
3479a814
FC
1119GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1120GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1121GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1122GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1123
3479a814
FC
1124GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1125GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1126GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1127GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 1128
3479a814 1129#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1130void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1131 CPURISCVState *env, uint32_t desc) \
1132{ \
3a6f8f68 1133 uint32_t vl = env->vl; \
5c19fc15 1134 uint32_t esz = sizeof(ETYPE); \
1135 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1136 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1137 uint32_t i; \
1138 \
f714361e 1139 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1140 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1141 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1142 \
1143 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1144 } \
f714361e 1145 env->vstart = 0; \
5c19fc15 1146 /* set tail elements to 1s */ \
1147 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1148}
1149
3479a814
FC
1150GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1151GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1152GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1153GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1154
3479a814
FC
1155GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1156GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1157GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1158GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
1159
1160#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1161 (__typeof(N))(N + M) < N)
1162#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1163
1164#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1165void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1166 CPURISCVState *env, uint32_t desc) \
1167{ \
3a6f8f68 1168 uint32_t vl = env->vl; \
bb45485a 1169 uint32_t vm = vext_vm(desc); \
5c19fc15 1170 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1171 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1172 uint32_t i; \
1173 \
f714361e 1174 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1175 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1176 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1177 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1178 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1179 } \
f714361e 1180 env->vstart = 0; \
5c19fc15 1181 /* mask destination register are always tail-agnostic */ \
1182 /* set tail elements to 1s */ \
1183 if (vta_all_1s) { \
1184 for (; i < total_elems; i++) { \
1185 vext_set_elem_mask(vd, i, 1); \
1186 } \
1187 } \
3a6f8f68
LZ
1188}
1189
1190GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1191GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1192GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1193GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1194
1195GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1196GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1197GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1198GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1199
1200#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1201void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1202 void *vs2, CPURISCVState *env, uint32_t desc) \
1203{ \
3a6f8f68 1204 uint32_t vl = env->vl; \
bb45485a 1205 uint32_t vm = vext_vm(desc); \
5c19fc15 1206 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1207 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1208 uint32_t i; \
1209 \
f714361e 1210 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1211 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1212 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1213 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1214 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1215 } \
f714361e 1216 env->vstart = 0; \
5c19fc15 1217 /* mask destination register are always tail-agnostic */ \
1218 /* set tail elements to 1s */ \
1219 if (vta_all_1s) { \
1220 for (; i < total_elems; i++) { \
1221 vext_set_elem_mask(vd, i, 1); \
1222 } \
1223 } \
3a6f8f68
LZ
1224}
1225
1226GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1227GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1228GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1229GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1230
1231GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1232GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1233GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1234GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1235
1236/* Vector Bitwise Logical Instructions */
1237RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1238RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1239RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1240RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1241RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1242RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1243RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1244RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1245RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1246RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1247RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1248RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1249GEN_VEXT_VV(vand_vv_b, 1)
1250GEN_VEXT_VV(vand_vv_h, 2)
1251GEN_VEXT_VV(vand_vv_w, 4)
1252GEN_VEXT_VV(vand_vv_d, 8)
1253GEN_VEXT_VV(vor_vv_b, 1)
1254GEN_VEXT_VV(vor_vv_h, 2)
1255GEN_VEXT_VV(vor_vv_w, 4)
1256GEN_VEXT_VV(vor_vv_d, 8)
1257GEN_VEXT_VV(vxor_vv_b, 1)
1258GEN_VEXT_VV(vxor_vv_h, 2)
1259GEN_VEXT_VV(vxor_vv_w, 4)
1260GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1261
1262RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1263RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1264RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1265RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1266RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1267RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1268RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1269RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1270RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1271RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1272RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1273RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1274GEN_VEXT_VX(vand_vx_b, 1)
1275GEN_VEXT_VX(vand_vx_h, 2)
1276GEN_VEXT_VX(vand_vx_w, 4)
1277GEN_VEXT_VX(vand_vx_d, 8)
1278GEN_VEXT_VX(vor_vx_b, 1)
1279GEN_VEXT_VX(vor_vx_h, 2)
1280GEN_VEXT_VX(vor_vx_w, 4)
1281GEN_VEXT_VX(vor_vx_d, 8)
1282GEN_VEXT_VX(vxor_vx_b, 1)
1283GEN_VEXT_VX(vxor_vx_h, 2)
1284GEN_VEXT_VX(vxor_vx_w, 4)
1285GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1286
1287/* Vector Single-Width Bit Shift Instructions */
1288#define DO_SLL(N, M) (N << (M))
1289#define DO_SRL(N, M) (N >> (M))
1290
1291/* generate the helpers for shift instructions with two vector operators */
3479a814 1292#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1293void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1294 void *vs2, CPURISCVState *env, uint32_t desc) \
1295{ \
3277d955
LZ
1296 uint32_t vm = vext_vm(desc); \
1297 uint32_t vl = env->vl; \
7b1bff41 1298 uint32_t esz = sizeof(TS1); \
1299 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1300 uint32_t vta = vext_vta(desc); \
3277d955
LZ
1301 uint32_t i; \
1302 \
f714361e 1303 for (i = env->vstart; i < vl; i++) { \
f9298de5 1304 if (!vm && !vext_elem_mask(v0, i)) { \
3277d955
LZ
1305 continue; \
1306 } \
1307 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1308 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1309 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1310 } \
f714361e 1311 env->vstart = 0; \
7b1bff41 1312 /* set tail elements to 1s */ \
1313 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1314}
1315
3479a814
FC
1316GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1317GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1318GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1319GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1320
3479a814
FC
1321GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1322GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1323GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1324GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1325
3479a814
FC
1326GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1327GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1328GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1329GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955
LZ
1330
1331/* generate the helpers for shift instructions with one vector and one scalar */
3479a814
FC
1332#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1333void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1334 void *vs2, CPURISCVState *env, uint32_t desc) \
1335{ \
1336 uint32_t vm = vext_vm(desc); \
1337 uint32_t vl = env->vl; \
7b1bff41 1338 uint32_t esz = sizeof(TD); \
1339 uint32_t total_elems = \
1340 vext_get_total_elems(env, desc, esz); \
1341 uint32_t vta = vext_vta(desc); \
3479a814
FC
1342 uint32_t i; \
1343 \
f714361e 1344 for (i = env->vstart; i < vl; i++) { \
3479a814
FC
1345 if (!vm && !vext_elem_mask(v0, i)) { \
1346 continue; \
1347 } \
1348 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1349 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1350 } \
f714361e 1351 env->vstart = 0; \
7b1bff41 1352 /* set tail elements to 1s */ \
1353 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1354}
1355
1356GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1357GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1358GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1359GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1360
1361GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1362GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1363GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1364GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1365
1366GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1367GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1368GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1369GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1370
1371/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1372GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1373GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1374GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1375GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1376GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1377GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1378GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1379GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1380GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1381GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1382GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1383GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1384
1385/* Vector Integer Comparison Instructions */
1386#define DO_MSEQ(N, M) (N == M)
1387#define DO_MSNE(N, M) (N != M)
1388#define DO_MSLT(N, M) (N < M)
1389#define DO_MSLE(N, M) (N <= M)
1390#define DO_MSGT(N, M) (N > M)
1391
1392#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1393void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1394 CPURISCVState *env, uint32_t desc) \
1395{ \
1366fc79
LZ
1396 uint32_t vm = vext_vm(desc); \
1397 uint32_t vl = env->vl; \
38581e5c 1398 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1399 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1366fc79
LZ
1400 uint32_t i; \
1401 \
f714361e 1402 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1403 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1404 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1405 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1406 continue; \
1407 } \
f9298de5 1408 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1409 } \
f714361e 1410 env->vstart = 0; \
38581e5c 1411 /* mask destination register are always tail-agnostic */ \
1412 /* set tail elements to 1s */ \
1413 if (vta_all_1s) { \
1414 for (; i < total_elems; i++) { \
1415 vext_set_elem_mask(vd, i, 1); \
1416 } \
1417 } \
1366fc79
LZ
1418}
1419
1420GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1421GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1422GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1423GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1424
1425GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1426GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1427GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1428GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1429
1430GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1431GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1432GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1433GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1434
1435GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1436GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1437GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1438GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1439
1440GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1441GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1442GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1443GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1444
1445GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1446GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1447GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1448GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1449
1450#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1451void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1452 CPURISCVState *env, uint32_t desc) \
1453{ \
1366fc79
LZ
1454 uint32_t vm = vext_vm(desc); \
1455 uint32_t vl = env->vl; \
38581e5c 1456 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1457 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1366fc79
LZ
1458 uint32_t i; \
1459 \
f714361e 1460 for (i = env->vstart; i < vl; i++) { \
1366fc79 1461 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1462 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1463 continue; \
1464 } \
f9298de5 1465 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1466 DO_OP(s2, (ETYPE)(target_long)s1)); \
1467 } \
f714361e 1468 env->vstart = 0; \
38581e5c 1469 /* mask destination register are always tail-agnostic */ \
1470 /* set tail elements to 1s */ \
1471 if (vta_all_1s) { \
1472 for (; i < total_elems; i++) { \
1473 vext_set_elem_mask(vd, i, 1); \
1474 } \
1475 } \
1366fc79
LZ
1476}
1477
1478GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1479GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1480GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1481GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1482
1483GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1484GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1485GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1486GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1487
1488GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1489GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1490GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1491GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1492
1493GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1494GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1495GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1496GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1497
1498GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1499GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1500GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1501GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1502
1503GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1504GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1505GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1506GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1507
1508GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1509GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1510GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1511GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1512
1513GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1514GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1515GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1516GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1517
1518/* Vector Integer Min/Max Instructions */
1519RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1520RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1521RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1522RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1523RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1524RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1525RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1526RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1527RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1528RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1529RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1530RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1531RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1532RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1533RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1534RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1535GEN_VEXT_VV(vminu_vv_b, 1)
1536GEN_VEXT_VV(vminu_vv_h, 2)
1537GEN_VEXT_VV(vminu_vv_w, 4)
1538GEN_VEXT_VV(vminu_vv_d, 8)
1539GEN_VEXT_VV(vmin_vv_b, 1)
1540GEN_VEXT_VV(vmin_vv_h, 2)
1541GEN_VEXT_VV(vmin_vv_w, 4)
1542GEN_VEXT_VV(vmin_vv_d, 8)
1543GEN_VEXT_VV(vmaxu_vv_b, 1)
1544GEN_VEXT_VV(vmaxu_vv_h, 2)
1545GEN_VEXT_VV(vmaxu_vv_w, 4)
1546GEN_VEXT_VV(vmaxu_vv_d, 8)
1547GEN_VEXT_VV(vmax_vv_b, 1)
1548GEN_VEXT_VV(vmax_vv_h, 2)
1549GEN_VEXT_VV(vmax_vv_w, 4)
1550GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1551
1552RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1553RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1554RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1555RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1556RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1557RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1558RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1559RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1560RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1561RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1562RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1563RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1564RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1565RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1566RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1567RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1568GEN_VEXT_VX(vminu_vx_b, 1)
1569GEN_VEXT_VX(vminu_vx_h, 2)
1570GEN_VEXT_VX(vminu_vx_w, 4)
1571GEN_VEXT_VX(vminu_vx_d, 8)
1572GEN_VEXT_VX(vmin_vx_b, 1)
1573GEN_VEXT_VX(vmin_vx_h, 2)
1574GEN_VEXT_VX(vmin_vx_w, 4)
1575GEN_VEXT_VX(vmin_vx_d, 8)
1576GEN_VEXT_VX(vmaxu_vx_b, 1)
1577GEN_VEXT_VX(vmaxu_vx_h, 2)
1578GEN_VEXT_VX(vmaxu_vx_w, 4)
1579GEN_VEXT_VX(vmaxu_vx_d, 8)
1580GEN_VEXT_VX(vmax_vx_b, 1)
1581GEN_VEXT_VX(vmax_vx_h, 2)
1582GEN_VEXT_VX(vmax_vx_w, 4)
1583GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1584
1585/* Vector Single-Width Integer Multiply Instructions */
1586#define DO_MUL(N, M) (N * M)
1587RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1588RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1589RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1590RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1591GEN_VEXT_VV(vmul_vv_b, 1)
1592GEN_VEXT_VV(vmul_vv_h, 2)
1593GEN_VEXT_VV(vmul_vv_w, 4)
1594GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1595
1596static int8_t do_mulh_b(int8_t s2, int8_t s1)
1597{
1598 return (int16_t)s2 * (int16_t)s1 >> 8;
1599}
1600
1601static int16_t do_mulh_h(int16_t s2, int16_t s1)
1602{
1603 return (int32_t)s2 * (int32_t)s1 >> 16;
1604}
1605
1606static int32_t do_mulh_w(int32_t s2, int32_t s1)
1607{
1608 return (int64_t)s2 * (int64_t)s1 >> 32;
1609}
1610
1611static int64_t do_mulh_d(int64_t s2, int64_t s1)
1612{
1613 uint64_t hi_64, lo_64;
1614
1615 muls64(&lo_64, &hi_64, s1, s2);
1616 return hi_64;
1617}
1618
1619static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1620{
1621 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1622}
1623
1624static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1625{
1626 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1627}
1628
1629static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1630{
1631 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1632}
1633
1634static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1635{
1636 uint64_t hi_64, lo_64;
1637
1638 mulu64(&lo_64, &hi_64, s2, s1);
1639 return hi_64;
1640}
1641
1642static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1643{
1644 return (int16_t)s2 * (uint16_t)s1 >> 8;
1645}
1646
1647static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1648{
1649 return (int32_t)s2 * (uint32_t)s1 >> 16;
1650}
1651
1652static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1653{
1654 return (int64_t)s2 * (uint64_t)s1 >> 32;
1655}
1656
1657/*
1658 * Let A = signed operand,
1659 * B = unsigned operand
1660 * P = mulu64(A, B), unsigned product
1661 *
1662 * LET X = 2 ** 64 - A, 2's complement of A
1663 * SP = signed product
1664 * THEN
1665 * IF A < 0
1666 * SP = -X * B
1667 * = -(2 ** 64 - A) * B
1668 * = A * B - 2 ** 64 * B
1669 * = P - 2 ** 64 * B
1670 * ELSE
1671 * SP = P
1672 * THEN
1673 * HI_P -= (A < 0 ? B : 0)
1674 */
1675
1676static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1677{
1678 uint64_t hi_64, lo_64;
1679
1680 mulu64(&lo_64, &hi_64, s2, s1);
1681
1682 hi_64 -= s2 < 0 ? s1 : 0;
1683 return hi_64;
1684}
1685
1686RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1687RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1688RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1689RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1690RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1691RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1692RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1693RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1694RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1695RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1696RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1697RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1698GEN_VEXT_VV(vmulh_vv_b, 1)
1699GEN_VEXT_VV(vmulh_vv_h, 2)
1700GEN_VEXT_VV(vmulh_vv_w, 4)
1701GEN_VEXT_VV(vmulh_vv_d, 8)
1702GEN_VEXT_VV(vmulhu_vv_b, 1)
1703GEN_VEXT_VV(vmulhu_vv_h, 2)
1704GEN_VEXT_VV(vmulhu_vv_w, 4)
1705GEN_VEXT_VV(vmulhu_vv_d, 8)
1706GEN_VEXT_VV(vmulhsu_vv_b, 1)
1707GEN_VEXT_VV(vmulhsu_vv_h, 2)
1708GEN_VEXT_VV(vmulhsu_vv_w, 4)
1709GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1710
1711RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1712RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1713RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1714RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1715RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1716RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1717RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1718RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1719RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1720RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1721RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1722RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1723RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1724RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1725RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1726RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1727GEN_VEXT_VX(vmul_vx_b, 1)
1728GEN_VEXT_VX(vmul_vx_h, 2)
1729GEN_VEXT_VX(vmul_vx_w, 4)
1730GEN_VEXT_VX(vmul_vx_d, 8)
1731GEN_VEXT_VX(vmulh_vx_b, 1)
1732GEN_VEXT_VX(vmulh_vx_h, 2)
1733GEN_VEXT_VX(vmulh_vx_w, 4)
1734GEN_VEXT_VX(vmulh_vx_d, 8)
1735GEN_VEXT_VX(vmulhu_vx_b, 1)
1736GEN_VEXT_VX(vmulhu_vx_h, 2)
1737GEN_VEXT_VX(vmulhu_vx_w, 4)
1738GEN_VEXT_VX(vmulhu_vx_d, 8)
1739GEN_VEXT_VX(vmulhsu_vx_b, 1)
1740GEN_VEXT_VX(vmulhsu_vx_h, 2)
1741GEN_VEXT_VX(vmulhsu_vx_w, 4)
1742GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1743
1744/* Vector Integer Divide Instructions */
1745#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1746#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1747#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1748 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1749#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1750 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1751
1752RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1753RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1754RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1755RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1756RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1757RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1758RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1759RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1760RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1761RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1762RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1763RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1764RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1765RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1766RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1767RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1768GEN_VEXT_VV(vdivu_vv_b, 1)
1769GEN_VEXT_VV(vdivu_vv_h, 2)
1770GEN_VEXT_VV(vdivu_vv_w, 4)
1771GEN_VEXT_VV(vdivu_vv_d, 8)
1772GEN_VEXT_VV(vdiv_vv_b, 1)
1773GEN_VEXT_VV(vdiv_vv_h, 2)
1774GEN_VEXT_VV(vdiv_vv_w, 4)
1775GEN_VEXT_VV(vdiv_vv_d, 8)
1776GEN_VEXT_VV(vremu_vv_b, 1)
1777GEN_VEXT_VV(vremu_vv_h, 2)
1778GEN_VEXT_VV(vremu_vv_w, 4)
1779GEN_VEXT_VV(vremu_vv_d, 8)
1780GEN_VEXT_VV(vrem_vv_b, 1)
1781GEN_VEXT_VV(vrem_vv_h, 2)
1782GEN_VEXT_VV(vrem_vv_w, 4)
1783GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1784
1785RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1786RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1787RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1788RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1789RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1790RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1791RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1792RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1793RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1794RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1795RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1796RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1797RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1798RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1799RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1800RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1801GEN_VEXT_VX(vdivu_vx_b, 1)
1802GEN_VEXT_VX(vdivu_vx_h, 2)
1803GEN_VEXT_VX(vdivu_vx_w, 4)
1804GEN_VEXT_VX(vdivu_vx_d, 8)
1805GEN_VEXT_VX(vdiv_vx_b, 1)
1806GEN_VEXT_VX(vdiv_vx_h, 2)
1807GEN_VEXT_VX(vdiv_vx_w, 4)
1808GEN_VEXT_VX(vdiv_vx_d, 8)
1809GEN_VEXT_VX(vremu_vx_b, 1)
1810GEN_VEXT_VX(vremu_vx_h, 2)
1811GEN_VEXT_VX(vremu_vx_w, 4)
1812GEN_VEXT_VX(vremu_vx_d, 8)
1813GEN_VEXT_VX(vrem_vx_b, 1)
1814GEN_VEXT_VX(vrem_vx_h, 2)
1815GEN_VEXT_VX(vrem_vx_w, 4)
1816GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1817
1818/* Vector Widening Integer Multiply Instructions */
1819RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1820RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1821RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1822RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1823RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1824RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1825RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1826RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1827RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1828GEN_VEXT_VV(vwmul_vv_b, 2)
1829GEN_VEXT_VV(vwmul_vv_h, 4)
1830GEN_VEXT_VV(vwmul_vv_w, 8)
1831GEN_VEXT_VV(vwmulu_vv_b, 2)
1832GEN_VEXT_VV(vwmulu_vv_h, 4)
1833GEN_VEXT_VV(vwmulu_vv_w, 8)
1834GEN_VEXT_VV(vwmulsu_vv_b, 2)
1835GEN_VEXT_VV(vwmulsu_vv_h, 4)
1836GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1837
1838RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1839RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1840RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1841RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1842RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1843RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1844RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1845RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1846RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1847GEN_VEXT_VX(vwmul_vx_b, 2)
1848GEN_VEXT_VX(vwmul_vx_h, 4)
1849GEN_VEXT_VX(vwmul_vx_w, 8)
1850GEN_VEXT_VX(vwmulu_vx_b, 2)
1851GEN_VEXT_VX(vwmulu_vx_h, 4)
1852GEN_VEXT_VX(vwmulu_vx_w, 8)
1853GEN_VEXT_VX(vwmulsu_vx_b, 2)
1854GEN_VEXT_VX(vwmulsu_vx_h, 4)
1855GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1856
1857/* Vector Single-Width Integer Multiply-Add Instructions */
1858#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1859static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1860{ \
1861 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1862 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1863 TD d = *((TD *)vd + HD(i)); \
1864 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1865}
1866
1867#define DO_MACC(N, M, D) (M * N + D)
1868#define DO_NMSAC(N, M, D) (-(M * N) + D)
1869#define DO_MADD(N, M, D) (M * D + N)
1870#define DO_NMSUB(N, M, D) (-(M * D) + N)
1871RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1872RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1873RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1874RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1875RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1876RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1877RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1878RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1879RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1880RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1881RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1882RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1883RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1884RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1885RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1886RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1887GEN_VEXT_VV(vmacc_vv_b, 1)
1888GEN_VEXT_VV(vmacc_vv_h, 2)
1889GEN_VEXT_VV(vmacc_vv_w, 4)
1890GEN_VEXT_VV(vmacc_vv_d, 8)
1891GEN_VEXT_VV(vnmsac_vv_b, 1)
1892GEN_VEXT_VV(vnmsac_vv_h, 2)
1893GEN_VEXT_VV(vnmsac_vv_w, 4)
1894GEN_VEXT_VV(vnmsac_vv_d, 8)
1895GEN_VEXT_VV(vmadd_vv_b, 1)
1896GEN_VEXT_VV(vmadd_vv_h, 2)
1897GEN_VEXT_VV(vmadd_vv_w, 4)
1898GEN_VEXT_VV(vmadd_vv_d, 8)
1899GEN_VEXT_VV(vnmsub_vv_b, 1)
1900GEN_VEXT_VV(vnmsub_vv_h, 2)
1901GEN_VEXT_VV(vnmsub_vv_w, 4)
1902GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1903
1904#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1905static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1906{ \
1907 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1908 TD d = *((TD *)vd + HD(i)); \
1909 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1910}
1911
1912RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1913RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1914RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1915RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1916RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1917RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1918RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1919RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1920RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1921RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1922RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1923RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1924RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1925RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1926RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1927RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1928GEN_VEXT_VX(vmacc_vx_b, 1)
1929GEN_VEXT_VX(vmacc_vx_h, 2)
1930GEN_VEXT_VX(vmacc_vx_w, 4)
1931GEN_VEXT_VX(vmacc_vx_d, 8)
1932GEN_VEXT_VX(vnmsac_vx_b, 1)
1933GEN_VEXT_VX(vnmsac_vx_h, 2)
1934GEN_VEXT_VX(vnmsac_vx_w, 4)
1935GEN_VEXT_VX(vnmsac_vx_d, 8)
1936GEN_VEXT_VX(vmadd_vx_b, 1)
1937GEN_VEXT_VX(vmadd_vx_h, 2)
1938GEN_VEXT_VX(vmadd_vx_w, 4)
1939GEN_VEXT_VX(vmadd_vx_d, 8)
1940GEN_VEXT_VX(vnmsub_vx_b, 1)
1941GEN_VEXT_VX(vnmsub_vx_h, 2)
1942GEN_VEXT_VX(vnmsub_vx_w, 4)
1943GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1944
1945/* Vector Widening Integer Multiply-Add Instructions */
1946RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1947RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1948RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1949RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1950RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1951RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1952RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1953RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1954RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1955GEN_VEXT_VV(vwmaccu_vv_b, 2)
1956GEN_VEXT_VV(vwmaccu_vv_h, 4)
1957GEN_VEXT_VV(vwmaccu_vv_w, 8)
1958GEN_VEXT_VV(vwmacc_vv_b, 2)
1959GEN_VEXT_VV(vwmacc_vv_h, 4)
1960GEN_VEXT_VV(vwmacc_vv_w, 8)
1961GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1962GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1963GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1964
1965RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1966RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1967RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1968RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1969RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1970RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1971RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1972RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1973RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1974RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1975RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1976RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1977GEN_VEXT_VX(vwmaccu_vx_b, 2)
1978GEN_VEXT_VX(vwmaccu_vx_h, 4)
1979GEN_VEXT_VX(vwmaccu_vx_w, 8)
1980GEN_VEXT_VX(vwmacc_vx_b, 2)
1981GEN_VEXT_VX(vwmacc_vx_h, 4)
1982GEN_VEXT_VX(vwmacc_vx_w, 8)
1983GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1984GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1985GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1986GEN_VEXT_VX(vwmaccus_vx_b, 2)
1987GEN_VEXT_VX(vwmaccus_vx_h, 4)
1988GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1989
1990/* Vector Integer Merge and Move Instructions */
3479a814 1991#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1992void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1993 uint32_t desc) \
1994{ \
1995 uint32_t vl = env->vl; \
89a32de2 1996 uint32_t esz = sizeof(ETYPE); \
1997 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1998 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1999 uint32_t i; \
2000 \
f714361e 2001 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2002 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2003 *((ETYPE *)vd + H(i)) = s1; \
2004 } \
f714361e 2005 env->vstart = 0; \
89a32de2 2006 /* set tail elements to 1s */ \
2007 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2008}
2009
3479a814
FC
2010GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2011GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2012GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2013GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 2014
3479a814 2015#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2016void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2017 uint32_t desc) \
2018{ \
2019 uint32_t vl = env->vl; \
89a32de2 2020 uint32_t esz = sizeof(ETYPE); \
2021 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2022 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2023 uint32_t i; \
2024 \
f714361e 2025 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2026 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2027 } \
f714361e 2028 env->vstart = 0; \
89a32de2 2029 /* set tail elements to 1s */ \
2030 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2031}
2032
3479a814
FC
2033GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2034GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2035GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2036GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 2037
3479a814 2038#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
2039void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2040 CPURISCVState *env, uint32_t desc) \
2041{ \
f020a7a1 2042 uint32_t vl = env->vl; \
89a32de2 2043 uint32_t esz = sizeof(ETYPE); \
2044 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2045 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2046 uint32_t i; \
2047 \
f714361e 2048 for (i = env->vstart; i < vl; i++) { \
f9298de5 2049 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
2050 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2051 } \
f714361e 2052 env->vstart = 0; \
89a32de2 2053 /* set tail elements to 1s */ \
2054 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2055}
2056
3479a814
FC
2057GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2058GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2059GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2060GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 2061
3479a814 2062#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2063void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2064 void *vs2, CPURISCVState *env, uint32_t desc) \
2065{ \
f020a7a1 2066 uint32_t vl = env->vl; \
89a32de2 2067 uint32_t esz = sizeof(ETYPE); \
2068 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2069 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2070 uint32_t i; \
2071 \
f714361e 2072 for (i = env->vstart; i < vl; i++) { \
f020a7a1 2073 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 2074 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
2075 (ETYPE)(target_long)s1); \
2076 *((ETYPE *)vd + H(i)) = d; \
2077 } \
f714361e 2078 env->vstart = 0; \
89a32de2 2079 /* set tail elements to 1s */ \
2080 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2081}
2082
3479a814
FC
2083GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2084GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2085GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2086GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
2087
2088/*
2089 *** Vector Fixed-Point Arithmetic Instructions
2090 */
2091
2092/* Vector Single-Width Saturating Add and Subtract */
2093
2094/*
2095 * As fixed point instructions probably have round mode and saturation,
2096 * define common macros for fixed point here.
2097 */
2098typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2099 CPURISCVState *env, int vxrm);
2100
2101#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2102static inline void \
2103do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2104 CPURISCVState *env, int vxrm) \
2105{ \
2106 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2107 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2108 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2109}
2110
2111static inline void
2112vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2113 CPURISCVState *env,
f9298de5 2114 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
2115 opivv2_rm_fn *fn)
2116{
f714361e 2117 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2118 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
2119 continue;
2120 }
2121 fn(vd, vs1, vs2, i, env, vxrm);
2122 }
f714361e 2123 env->vstart = 0;
eb2650e3
LZ
2124}
2125
2126static inline void
2127vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2128 CPURISCVState *env,
8a085fb2 2129 uint32_t desc,
09106eed 2130 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 2131{
eb2650e3
LZ
2132 uint32_t vm = vext_vm(desc);
2133 uint32_t vl = env->vl;
09106eed 2134 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2135 uint32_t vta = vext_vta(desc);
eb2650e3
LZ
2136
2137 switch (env->vxrm) {
2138 case 0: /* rnu */
2139 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2140 env, vl, vm, 0, fn);
eb2650e3
LZ
2141 break;
2142 case 1: /* rne */
2143 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2144 env, vl, vm, 1, fn);
eb2650e3
LZ
2145 break;
2146 case 2: /* rdn */
2147 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2148 env, vl, vm, 2, fn);
eb2650e3
LZ
2149 break;
2150 default: /* rod */
2151 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2152 env, vl, vm, 3, fn);
eb2650e3
LZ
2153 break;
2154 }
09106eed 2155 /* set tail elements to 1s */
2156 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2157}
2158
2159/* generate helpers for fixed point instructions with OPIVV format */
09106eed 2160#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
2161void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2162 CPURISCVState *env, uint32_t desc) \
2163{ \
8a085fb2 2164 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 2165 do_##NAME, ESZ); \
eb2650e3
LZ
2166}
2167
2168static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2169{
2170 uint8_t res = a + b;
2171 if (res < a) {
2172 res = UINT8_MAX;
2173 env->vxsat = 0x1;
2174 }
2175 return res;
2176}
2177
2178static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2179 uint16_t b)
2180{
2181 uint16_t res = a + b;
2182 if (res < a) {
2183 res = UINT16_MAX;
2184 env->vxsat = 0x1;
2185 }
2186 return res;
2187}
2188
2189static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2190 uint32_t b)
2191{
2192 uint32_t res = a + b;
2193 if (res < a) {
2194 res = UINT32_MAX;
2195 env->vxsat = 0x1;
2196 }
2197 return res;
2198}
2199
2200static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2201 uint64_t b)
2202{
2203 uint64_t res = a + b;
2204 if (res < a) {
2205 res = UINT64_MAX;
2206 env->vxsat = 0x1;
2207 }
2208 return res;
2209}
2210
2211RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2212RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2213RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2214RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2215GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2216GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2217GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2218GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2219
2220typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2221 CPURISCVState *env, int vxrm);
2222
2223#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2224static inline void \
2225do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2226 CPURISCVState *env, int vxrm) \
2227{ \
2228 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2229 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2230}
2231
2232static inline void
2233vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2234 CPURISCVState *env,
f9298de5 2235 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
2236 opivx2_rm_fn *fn)
2237{
f714361e 2238 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2239 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
2240 continue;
2241 }
2242 fn(vd, s1, vs2, i, env, vxrm);
2243 }
f714361e 2244 env->vstart = 0;
eb2650e3
LZ
2245}
2246
2247static inline void
2248vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2249 CPURISCVState *env,
8a085fb2 2250 uint32_t desc,
09106eed 2251 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2252{
eb2650e3
LZ
2253 uint32_t vm = vext_vm(desc);
2254 uint32_t vl = env->vl;
09106eed 2255 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2256 uint32_t vta = vext_vta(desc);
eb2650e3
LZ
2257
2258 switch (env->vxrm) {
2259 case 0: /* rnu */
2260 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2261 env, vl, vm, 0, fn);
eb2650e3
LZ
2262 break;
2263 case 1: /* rne */
2264 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2265 env, vl, vm, 1, fn);
eb2650e3
LZ
2266 break;
2267 case 2: /* rdn */
2268 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2269 env, vl, vm, 2, fn);
eb2650e3
LZ
2270 break;
2271 default: /* rod */
2272 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2273 env, vl, vm, 3, fn);
eb2650e3
LZ
2274 break;
2275 }
09106eed 2276 /* set tail elements to 1s */
2277 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2278}
2279
2280/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2281#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3
LZ
2282void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2283 void *vs2, CPURISCVState *env, uint32_t desc) \
2284{ \
8a085fb2 2285 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2286 do_##NAME, ESZ); \
eb2650e3
LZ
2287}
2288
2289RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2290RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2291RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2292RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2293GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2294GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2295GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2296GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2297
2298static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2299{
2300 int8_t res = a + b;
2301 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2302 res = a > 0 ? INT8_MAX : INT8_MIN;
2303 env->vxsat = 0x1;
2304 }
2305 return res;
2306}
2307
2308static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2309{
2310 int16_t res = a + b;
2311 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2312 res = a > 0 ? INT16_MAX : INT16_MIN;
2313 env->vxsat = 0x1;
2314 }
2315 return res;
2316}
2317
2318static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2319{
2320 int32_t res = a + b;
2321 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2322 res = a > 0 ? INT32_MAX : INT32_MIN;
2323 env->vxsat = 0x1;
2324 }
2325 return res;
2326}
2327
2328static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2329{
2330 int64_t res = a + b;
2331 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2332 res = a > 0 ? INT64_MAX : INT64_MIN;
2333 env->vxsat = 0x1;
2334 }
2335 return res;
2336}
2337
2338RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2339RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2340RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2341RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2342GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2343GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2344GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2345GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2346
2347RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2348RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2349RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2350RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2351GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2352GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2353GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2354GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3
LZ
2355
2356static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2357{
2358 uint8_t res = a - b;
2359 if (res > a) {
2360 res = 0;
2361 env->vxsat = 0x1;
2362 }
2363 return res;
2364}
2365
2366static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2367 uint16_t b)
2368{
2369 uint16_t res = a - b;
2370 if (res > a) {
2371 res = 0;
2372 env->vxsat = 0x1;
2373 }
2374 return res;
2375}
2376
2377static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2378 uint32_t b)
2379{
2380 uint32_t res = a - b;
2381 if (res > a) {
2382 res = 0;
2383 env->vxsat = 0x1;
2384 }
2385 return res;
2386}
2387
2388static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2389 uint64_t b)
2390{
2391 uint64_t res = a - b;
2392 if (res > a) {
2393 res = 0;
2394 env->vxsat = 0x1;
2395 }
2396 return res;
2397}
2398
2399RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2400RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2401RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2402RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2403GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2404GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2405GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2406GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2407
2408RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2409RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2410RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2411RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2412GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2413GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2414GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2415GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2416
2417static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2418{
2419 int8_t res = a - b;
2420 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2421 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2422 env->vxsat = 0x1;
2423 }
2424 return res;
2425}
2426
2427static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2428{
2429 int16_t res = a - b;
2430 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2431 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2432 env->vxsat = 0x1;
2433 }
2434 return res;
2435}
2436
2437static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2438{
2439 int32_t res = a - b;
2440 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2441 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2442 env->vxsat = 0x1;
2443 }
2444 return res;
2445}
2446
2447static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2448{
2449 int64_t res = a - b;
2450 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2451 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2452 env->vxsat = 0x1;
2453 }
2454 return res;
2455}
2456
2457RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2458RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2459RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2460RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2461GEN_VEXT_VV_RM(vssub_vv_b, 1)
2462GEN_VEXT_VV_RM(vssub_vv_h, 2)
2463GEN_VEXT_VV_RM(vssub_vv_w, 4)
2464GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2465
2466RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2467RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2468RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2469RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2470GEN_VEXT_VX_RM(vssub_vx_b, 1)
2471GEN_VEXT_VX_RM(vssub_vx_h, 2)
2472GEN_VEXT_VX_RM(vssub_vx_w, 4)
2473GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2474
2475/* Vector Single-Width Averaging Add and Subtract */
2476static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2477{
2478 uint8_t d = extract64(v, shift, 1);
2479 uint8_t d1;
2480 uint64_t D1, D2;
2481
2482 if (shift == 0 || shift > 64) {
2483 return 0;
2484 }
2485
2486 d1 = extract64(v, shift - 1, 1);
2487 D1 = extract64(v, 0, shift);
2488 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2489 return d1;
2490 } else if (vxrm == 1) { /* round-to-nearest-even */
2491 if (shift > 1) {
2492 D2 = extract64(v, 0, shift - 1);
2493 return d1 & ((D2 != 0) | d);
2494 } else {
2495 return d1 & d;
2496 }
2497 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2498 return !d & (D1 != 0);
2499 }
2500 return 0; /* round-down (truncate) */
2501}
2502
2503static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2504{
2505 int64_t res = (int64_t)a + b;
2506 uint8_t round = get_round(vxrm, res, 1);
2507
2508 return (res >> 1) + round;
2509}
2510
2511static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2512{
2513 int64_t res = a + b;
2514 uint8_t round = get_round(vxrm, res, 1);
2515 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2516
2517 /* With signed overflow, bit 64 is inverse of bit 63. */
2518 return ((res >> 1) ^ over) + round;
2519}
2520
2521RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2522RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2523RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2524RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2525GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2526GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2527GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2528GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2529
2530RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2531RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2532RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2533RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2534GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2535GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2536GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2537GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2538
8b99a110
FC
2539static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2540 uint32_t a, uint32_t b)
2541{
2542 uint64_t res = (uint64_t)a + b;
2543 uint8_t round = get_round(vxrm, res, 1);
2544
2545 return (res >> 1) + round;
2546}
2547
2548static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2549 uint64_t a, uint64_t b)
2550{
2551 uint64_t res = a + b;
2552 uint8_t round = get_round(vxrm, res, 1);
2553 uint64_t over = (uint64_t)(res < a) << 63;
2554
2555 return ((res >> 1) | over) + round;
2556}
2557
2558RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2559RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2560RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2561RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2562GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2563GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2564GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2565GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2566
2567RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2568RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2569RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2570RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2571GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2572GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2573GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2574GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2575
b7aee481
LZ
2576static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2577{
2578 int64_t res = (int64_t)a - b;
2579 uint8_t round = get_round(vxrm, res, 1);
2580
2581 return (res >> 1) + round;
2582}
2583
2584static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2585{
2586 int64_t res = (int64_t)a - b;
2587 uint8_t round = get_round(vxrm, res, 1);
2588 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2589
2590 /* With signed overflow, bit 64 is inverse of bit 63. */
2591 return ((res >> 1) ^ over) + round;
2592}
2593
2594RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2595RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2596RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2597RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2598GEN_VEXT_VV_RM(vasub_vv_b, 1)
2599GEN_VEXT_VV_RM(vasub_vv_h, 2)
2600GEN_VEXT_VV_RM(vasub_vv_w, 4)
2601GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2602
2603RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2604RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2605RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2606RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2607GEN_VEXT_VX_RM(vasub_vx_b, 1)
2608GEN_VEXT_VX_RM(vasub_vx_h, 2)
2609GEN_VEXT_VX_RM(vasub_vx_w, 4)
2610GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2611
8b99a110
FC
2612static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2613 uint32_t a, uint32_t b)
2614{
2615 int64_t res = (int64_t)a - b;
2616 uint8_t round = get_round(vxrm, res, 1);
2617
2618 return (res >> 1) + round;
2619}
2620
2621static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2622 uint64_t a, uint64_t b)
2623{
2624 uint64_t res = (uint64_t)a - b;
2625 uint8_t round = get_round(vxrm, res, 1);
2626 uint64_t over = (uint64_t)(res > a) << 63;
2627
2628 return ((res >> 1) | over) + round;
2629}
2630
2631RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2632RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2633RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2634RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2635GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2636GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2637GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2638GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2639
2640RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2641RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2642RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2643RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2644GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2645GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2646GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2647GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2648
9f0ff9e5
LZ
2649/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2650static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2651{
2652 uint8_t round;
2653 int16_t res;
2654
2655 res = (int16_t)a * (int16_t)b;
2656 round = get_round(vxrm, res, 7);
2657 res = (res >> 7) + round;
2658
2659 if (res > INT8_MAX) {
2660 env->vxsat = 0x1;
2661 return INT8_MAX;
2662 } else if (res < INT8_MIN) {
2663 env->vxsat = 0x1;
2664 return INT8_MIN;
2665 } else {
2666 return res;
2667 }
2668}
2669
2670static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2671{
2672 uint8_t round;
2673 int32_t res;
2674
2675 res = (int32_t)a * (int32_t)b;
2676 round = get_round(vxrm, res, 15);
2677 res = (res >> 15) + round;
2678
2679 if (res > INT16_MAX) {
2680 env->vxsat = 0x1;
2681 return INT16_MAX;
2682 } else if (res < INT16_MIN) {
2683 env->vxsat = 0x1;
2684 return INT16_MIN;
2685 } else {
2686 return res;
2687 }
2688}
2689
2690static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2691{
2692 uint8_t round;
2693 int64_t res;
2694
2695 res = (int64_t)a * (int64_t)b;
2696 round = get_round(vxrm, res, 31);
2697 res = (res >> 31) + round;
2698
2699 if (res > INT32_MAX) {
2700 env->vxsat = 0x1;
2701 return INT32_MAX;
2702 } else if (res < INT32_MIN) {
2703 env->vxsat = 0x1;
2704 return INT32_MIN;
2705 } else {
2706 return res;
2707 }
2708}
2709
2710static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2711{
2712 uint8_t round;
2713 uint64_t hi_64, lo_64;
2714 int64_t res;
2715
2716 if (a == INT64_MIN && b == INT64_MIN) {
2717 env->vxsat = 1;
2718 return INT64_MAX;
2719 }
2720
2721 muls64(&lo_64, &hi_64, a, b);
2722 round = get_round(vxrm, lo_64, 63);
2723 /*
2724 * Cannot overflow, as there are always
2725 * 2 sign bits after multiply.
2726 */
2727 res = (hi_64 << 1) | (lo_64 >> 63);
2728 if (round) {
2729 if (res == INT64_MAX) {
2730 env->vxsat = 1;
2731 } else {
2732 res += 1;
2733 }
2734 }
2735 return res;
2736}
2737
2738RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2739RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2740RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2741RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2742GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2743GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2744GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2745GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2746
2747RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2748RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2749RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2750RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2751GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2752GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2753GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2754GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2755
04a61406
LZ
2756/* Vector Single-Width Scaling Shift Instructions */
2757static inline uint8_t
2758vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2759{
2760 uint8_t round, shift = b & 0x7;
2761 uint8_t res;
2762
2763 round = get_round(vxrm, a, shift);
2764 res = (a >> shift) + round;
2765 return res;
2766}
2767static inline uint16_t
2768vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2769{
2770 uint8_t round, shift = b & 0xf;
2771 uint16_t res;
2772
2773 round = get_round(vxrm, a, shift);
2774 res = (a >> shift) + round;
2775 return res;
2776}
2777static inline uint32_t
2778vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2779{
2780 uint8_t round, shift = b & 0x1f;
2781 uint32_t res;
2782
2783 round = get_round(vxrm, a, shift);
2784 res = (a >> shift) + round;
2785 return res;
2786}
2787static inline uint64_t
2788vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2789{
2790 uint8_t round, shift = b & 0x3f;
2791 uint64_t res;
2792
2793 round = get_round(vxrm, a, shift);
2794 res = (a >> shift) + round;
2795 return res;
2796}
2797RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2798RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2799RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2800RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2801GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2802GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2803GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2804GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2805
2806RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2807RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2808RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2809RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2810GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2811GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2812GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2813GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2814
2815static inline int8_t
2816vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2817{
2818 uint8_t round, shift = b & 0x7;
2819 int8_t res;
2820
2821 round = get_round(vxrm, a, shift);
2822 res = (a >> shift) + round;
2823 return res;
2824}
2825static inline int16_t
2826vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2827{
2828 uint8_t round, shift = b & 0xf;
2829 int16_t res;
2830
2831 round = get_round(vxrm, a, shift);
2832 res = (a >> shift) + round;
2833 return res;
2834}
2835static inline int32_t
2836vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2837{
2838 uint8_t round, shift = b & 0x1f;
2839 int32_t res;
2840
2841 round = get_round(vxrm, a, shift);
2842 res = (a >> shift) + round;
2843 return res;
2844}
2845static inline int64_t
2846vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2847{
2848 uint8_t round, shift = b & 0x3f;
2849 int64_t res;
2850
2851 round = get_round(vxrm, a, shift);
2852 res = (a >> shift) + round;
2853 return res;
2854}
9ff3d287 2855
04a61406
LZ
2856RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2857RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2858RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2859RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2860GEN_VEXT_VV_RM(vssra_vv_b, 1)
2861GEN_VEXT_VV_RM(vssra_vv_h, 2)
2862GEN_VEXT_VV_RM(vssra_vv_w, 4)
2863GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2864
2865RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2866RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2867RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2868RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2869GEN_VEXT_VX_RM(vssra_vx_b, 1)
2870GEN_VEXT_VX_RM(vssra_vx_h, 2)
2871GEN_VEXT_VX_RM(vssra_vx_w, 4)
2872GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2873
2874/* Vector Narrowing Fixed-Point Clip Instructions */
2875static inline int8_t
2876vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2877{
2878 uint8_t round, shift = b & 0xf;
2879 int16_t res;
2880
2881 round = get_round(vxrm, a, shift);
2882 res = (a >> shift) + round;
2883 if (res > INT8_MAX) {
2884 env->vxsat = 0x1;
2885 return INT8_MAX;
2886 } else if (res < INT8_MIN) {
2887 env->vxsat = 0x1;
2888 return INT8_MIN;
2889 } else {
2890 return res;
2891 }
2892}
2893
2894static inline int16_t
2895vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2896{
2897 uint8_t round, shift = b & 0x1f;
2898 int32_t res;
2899
2900 round = get_round(vxrm, a, shift);
2901 res = (a >> shift) + round;
2902 if (res > INT16_MAX) {
2903 env->vxsat = 0x1;
2904 return INT16_MAX;
2905 } else if (res < INT16_MIN) {
2906 env->vxsat = 0x1;
2907 return INT16_MIN;
2908 } else {
2909 return res;
2910 }
2911}
2912
2913static inline int32_t
2914vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2915{
2916 uint8_t round, shift = b & 0x3f;
2917 int64_t res;
2918
2919 round = get_round(vxrm, a, shift);
2920 res = (a >> shift) + round;
2921 if (res > INT32_MAX) {
2922 env->vxsat = 0x1;
2923 return INT32_MAX;
2924 } else if (res < INT32_MIN) {
2925 env->vxsat = 0x1;
2926 return INT32_MIN;
2927 } else {
2928 return res;
2929 }
2930}
2931
a70b3a73
FC
2932RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2933RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2934RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2935GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2936GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2937GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2938
2939RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2940RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2941RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2942GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2943GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2944GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2945
2946static inline uint8_t
2947vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2948{
2949 uint8_t round, shift = b & 0xf;
2950 uint16_t res;
2951
2952 round = get_round(vxrm, a, shift);
2953 res = (a >> shift) + round;
2954 if (res > UINT8_MAX) {
2955 env->vxsat = 0x1;
2956 return UINT8_MAX;
2957 } else {
2958 return res;
2959 }
2960}
2961
2962static inline uint16_t
2963vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2964{
2965 uint8_t round, shift = b & 0x1f;
2966 uint32_t res;
2967
2968 round = get_round(vxrm, a, shift);
2969 res = (a >> shift) + round;
2970 if (res > UINT16_MAX) {
2971 env->vxsat = 0x1;
2972 return UINT16_MAX;
2973 } else {
2974 return res;
2975 }
2976}
2977
2978static inline uint32_t
2979vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2980{
2981 uint8_t round, shift = b & 0x3f;
a70b3a73 2982 uint64_t res;
9ff3d287
LZ
2983
2984 round = get_round(vxrm, a, shift);
2985 res = (a >> shift) + round;
2986 if (res > UINT32_MAX) {
2987 env->vxsat = 0x1;
2988 return UINT32_MAX;
2989 } else {
2990 return res;
2991 }
2992}
2993
a70b3a73
FC
2994RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2995RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2996RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 2997GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2998GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2999GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 3000
a70b3a73
FC
3001RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3002RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3003RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 3004GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3005GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3006GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
3007
3008/*
3009 *** Vector Float Point Arithmetic Instructions
3010 */
3011/* Vector Single-Width Floating-Point Add/Subtract Instructions */
3012#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3013static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3014 CPURISCVState *env) \
3015{ \
3016 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3017 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3018 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3019}
3020
5eacf7d8 3021#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
3022void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3023 void *vs2, CPURISCVState *env, \
3024 uint32_t desc) \
3025{ \
ce2a0343
LZ
3026 uint32_t vm = vext_vm(desc); \
3027 uint32_t vl = env->vl; \
5eacf7d8 3028 uint32_t total_elems = \
3029 vext_get_total_elems(env, desc, ESZ); \
3030 uint32_t vta = vext_vta(desc); \
ce2a0343
LZ
3031 uint32_t i; \
3032 \
f714361e 3033 for (i = env->vstart; i < vl; i++) { \
f9298de5 3034 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
3035 continue; \
3036 } \
3037 do_##NAME(vd, vs1, vs2, i, env); \
3038 } \
f714361e 3039 env->vstart = 0; \
5eacf7d8 3040 /* set tail elements to 1s */ \
3041 vext_set_elems_1s(vd, vta, vl * ESZ, \
3042 total_elems * ESZ); \
ce2a0343
LZ
3043}
3044
3045RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3046RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3047RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 3048GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3049GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3050GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
3051
3052#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3053static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3054 CPURISCVState *env) \
3055{ \
3056 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3057 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3058}
3059
5eacf7d8 3060#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
3061void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3062 void *vs2, CPURISCVState *env, \
3063 uint32_t desc) \
3064{ \
ce2a0343
LZ
3065 uint32_t vm = vext_vm(desc); \
3066 uint32_t vl = env->vl; \
5eacf7d8 3067 uint32_t total_elems = \
3068 vext_get_total_elems(env, desc, ESZ); \
3069 uint32_t vta = vext_vta(desc); \
ce2a0343
LZ
3070 uint32_t i; \
3071 \
f714361e 3072 for (i = env->vstart; i < vl; i++) { \
f9298de5 3073 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
3074 continue; \
3075 } \
3076 do_##NAME(vd, s1, vs2, i, env); \
3077 } \
f714361e 3078 env->vstart = 0; \
5eacf7d8 3079 /* set tail elements to 1s */ \
3080 vext_set_elems_1s(vd, vta, vl * ESZ, \
3081 total_elems * ESZ); \
ce2a0343
LZ
3082}
3083
3084RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3085RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3086RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 3087GEN_VEXT_VF(vfadd_vf_h, 2)
3088GEN_VEXT_VF(vfadd_vf_w, 4)
3089GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
3090
3091RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3092RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3093RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 3094GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3095GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3096GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
3097RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3098RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3099RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 3100GEN_VEXT_VF(vfsub_vf_h, 2)
3101GEN_VEXT_VF(vfsub_vf_w, 4)
3102GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
3103
3104static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3105{
3106 return float16_sub(b, a, s);
3107}
3108
3109static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3110{
3111 return float32_sub(b, a, s);
3112}
3113
3114static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3115{
3116 return float64_sub(b, a, s);
3117}
3118
3119RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3120RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3121RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 3122GEN_VEXT_VF(vfrsub_vf_h, 2)
3123GEN_VEXT_VF(vfrsub_vf_w, 4)
3124GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
3125
3126/* Vector Widening Floating-Point Add/Subtract Instructions */
3127static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3128{
3129 return float32_add(float16_to_float32(a, true, s),
3130 float16_to_float32(b, true, s), s);
3131}
3132
3133static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3134{
3135 return float64_add(float32_to_float64(a, s),
3136 float32_to_float64(b, s), s);
3137
3138}
3139
3140RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3141RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 3142GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3143GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
3144RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3145RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 3146GEN_VEXT_VF(vfwadd_vf_h, 4)
3147GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
3148
3149static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3150{
3151 return float32_sub(float16_to_float32(a, true, s),
3152 float16_to_float32(b, true, s), s);
3153}
3154
3155static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3156{
3157 return float64_sub(float32_to_float64(a, s),
3158 float32_to_float64(b, s), s);
3159
3160}
3161
3162RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3163RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 3164GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3165GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
3166RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3167RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 3168GEN_VEXT_VF(vfwsub_vf_h, 4)
3169GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
3170
3171static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3172{
3173 return float32_add(a, float16_to_float32(b, true, s), s);
3174}
3175
3176static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3177{
3178 return float64_add(a, float32_to_float64(b, s), s);
3179}
3180
3181RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3182RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3183GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3184GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3185RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3186RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3187GEN_VEXT_VF(vfwadd_wf_h, 4)
3188GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3189
3190static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3191{
3192 return float32_sub(a, float16_to_float32(b, true, s), s);
3193}
3194
3195static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3196{
3197 return float64_sub(a, float32_to_float64(b, s), s);
3198}
3199
3200RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3201RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3202GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3203GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3204RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3205RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3206GEN_VEXT_VF(vfwsub_wf_h, 4)
3207GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3208
3209/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3210RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3211RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3212RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3213GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3214GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3215GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3216RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3217RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3218RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3219GEN_VEXT_VF(vfmul_vf_h, 2)
3220GEN_VEXT_VF(vfmul_vf_w, 4)
3221GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3222
3223RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3224RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3225RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3226GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3227GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3228GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3229RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3230RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3231RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3232GEN_VEXT_VF(vfdiv_vf_h, 2)
3233GEN_VEXT_VF(vfdiv_vf_w, 4)
3234GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3235
3236static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3237{
3238 return float16_div(b, a, s);
3239}
3240
3241static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3242{
3243 return float32_div(b, a, s);
3244}
3245
3246static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3247{
3248 return float64_div(b, a, s);
3249}
3250
3251RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3252RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3253RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3254GEN_VEXT_VF(vfrdiv_vf_h, 2)
3255GEN_VEXT_VF(vfrdiv_vf_w, 4)
3256GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3257
3258/* Vector Widening Floating-Point Multiply */
3259static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3260{
3261 return float32_mul(float16_to_float32(a, true, s),
3262 float16_to_float32(b, true, s), s);
3263}
3264
3265static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3266{
3267 return float64_mul(float32_to_float64(a, s),
3268 float32_to_float64(b, s), s);
3269
3270}
3271RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3272RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3273GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3274GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3275RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3276RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3277GEN_VEXT_VF(vfwmul_vf_h, 4)
3278GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3279
3280/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3281#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3282static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3283 CPURISCVState *env) \
3284{ \
3285 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3286 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3287 TD d = *((TD *)vd + HD(i)); \
3288 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3289}
3290
3291static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3292{
3293 return float16_muladd(a, b, d, 0, s);
3294}
3295
3296static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3297{
3298 return float32_muladd(a, b, d, 0, s);
3299}
3300
3301static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3302{
3303 return float64_muladd(a, b, d, 0, s);
3304}
3305
3306RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3307RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3308RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3309GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3310GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3311GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3312
3313#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3314static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3315 CPURISCVState *env) \
3316{ \
3317 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3318 TD d = *((TD *)vd + HD(i)); \
3319 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3320}
3321
3322RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3323RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3324RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3325GEN_VEXT_VF(vfmacc_vf_h, 2)
3326GEN_VEXT_VF(vfmacc_vf_w, 4)
3327GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3328
3329static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3330{
3331 return float16_muladd(a, b, d,
3332 float_muladd_negate_c | float_muladd_negate_product, s);
3333}
3334
3335static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3336{
3337 return float32_muladd(a, b, d,
3338 float_muladd_negate_c | float_muladd_negate_product, s);
3339}
3340
3341static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3342{
3343 return float64_muladd(a, b, d,
3344 float_muladd_negate_c | float_muladd_negate_product, s);
3345}
3346
3347RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3348RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3349RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3350GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3351GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3352GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3353RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3354RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3355RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3356GEN_VEXT_VF(vfnmacc_vf_h, 2)
3357GEN_VEXT_VF(vfnmacc_vf_w, 4)
3358GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3359
3360static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3361{
3362 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3363}
3364
3365static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3366{
3367 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3368}
3369
3370static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3371{
3372 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3373}
3374
3375RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3376RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3377RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3378GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3379GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3380GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3381RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3382RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3383RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3384GEN_VEXT_VF(vfmsac_vf_h, 2)
3385GEN_VEXT_VF(vfmsac_vf_w, 4)
3386GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3387
3388static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3389{
3390 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3391}
3392
3393static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3394{
3395 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3396}
3397
3398static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3399{
3400 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3401}
3402
3403RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3404RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3405RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3406GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3407GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3408GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3409RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3410RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3411RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3412GEN_VEXT_VF(vfnmsac_vf_h, 2)
3413GEN_VEXT_VF(vfnmsac_vf_w, 4)
3414GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3415
3416static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3417{
3418 return float16_muladd(d, b, a, 0, s);
3419}
3420
3421static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3422{
3423 return float32_muladd(d, b, a, 0, s);
3424}
3425
3426static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3427{
3428 return float64_muladd(d, b, a, 0, s);
3429}
3430
3431RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3432RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3433RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3434GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3435GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3436GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3437RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3438RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3439RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3440GEN_VEXT_VF(vfmadd_vf_h, 2)
3441GEN_VEXT_VF(vfmadd_vf_w, 4)
3442GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3443
3444static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3445{
3446 return float16_muladd(d, b, a,
3447 float_muladd_negate_c | float_muladd_negate_product, s);
3448}
3449
3450static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3451{
3452 return float32_muladd(d, b, a,
3453 float_muladd_negate_c | float_muladd_negate_product, s);
3454}
3455
3456static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3457{
3458 return float64_muladd(d, b, a,
3459 float_muladd_negate_c | float_muladd_negate_product, s);
3460}
3461
3462RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3463RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3464RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3465GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3466GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3467GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3468RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3469RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3470RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3471GEN_VEXT_VF(vfnmadd_vf_h, 2)
3472GEN_VEXT_VF(vfnmadd_vf_w, 4)
3473GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3474
3475static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3476{
3477 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3478}
3479
3480static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3481{
3482 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3483}
3484
3485static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3486{
3487 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3488}
3489
3490RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3491RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3492RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3493GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3494GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3495GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3496RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3497RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3498RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3499GEN_VEXT_VF(vfmsub_vf_h, 2)
3500GEN_VEXT_VF(vfmsub_vf_w, 4)
3501GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3502
3503static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3504{
3505 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3506}
3507
3508static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3509{
3510 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3511}
3512
3513static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3514{
3515 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3516}
3517
3518RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3519RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3520RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3521GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3522GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3523GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3524RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3525RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3526RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3527GEN_VEXT_VF(vfnmsub_vf_h, 2)
3528GEN_VEXT_VF(vfnmsub_vf_w, 4)
3529GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3530
3531/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3532static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3533{
3534 return float32_muladd(float16_to_float32(a, true, s),
3535 float16_to_float32(b, true, s), d, 0, s);
3536}
3537
3538static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3539{
3540 return float64_muladd(float32_to_float64(a, s),
3541 float32_to_float64(b, s), d, 0, s);
3542}
3543
3544RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3545RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3546GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3547GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3548RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3549RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3550GEN_VEXT_VF(vfwmacc_vf_h, 4)
3551GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959
LZ
3552
3553static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3554{
3555 return float32_muladd(float16_to_float32(a, true, s),
3556 float16_to_float32(b, true, s), d,
3557 float_muladd_negate_c | float_muladd_negate_product, s);
3558}
3559
3560static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3561{
3562 return float64_muladd(float32_to_float64(a, s),
3563 float32_to_float64(b, s), d,
3564 float_muladd_negate_c | float_muladd_negate_product, s);
3565}
3566
3567RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3568RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3569GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3570GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3571RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3572RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3573GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3574GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3575
3576static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3577{
3578 return float32_muladd(float16_to_float32(a, true, s),
3579 float16_to_float32(b, true, s), d,
3580 float_muladd_negate_c, s);
3581}
3582
3583static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3584{
3585 return float64_muladd(float32_to_float64(a, s),
3586 float32_to_float64(b, s), d,
3587 float_muladd_negate_c, s);
3588}
3589
3590RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3591RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3592GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3593GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3594RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3595RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3596GEN_VEXT_VF(vfwmsac_vf_h, 4)
3597GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3598
3599static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3600{
3601 return float32_muladd(float16_to_float32(a, true, s),
3602 float16_to_float32(b, true, s), d,
3603 float_muladd_negate_product, s);
3604}
3605
3606static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3607{
3608 return float64_muladd(float32_to_float64(a, s),
3609 float32_to_float64(b, s), d,
3610 float_muladd_negate_product, s);
3611}
3612
3613RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3614RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3615GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3616GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3617RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3618RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3619GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3620GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3621
3622/* Vector Floating-Point Square-Root Instruction */
3623/* (TD, T2, TX2) */
3624#define OP_UU_H uint16_t, uint16_t, uint16_t
3625#define OP_UU_W uint32_t, uint32_t, uint32_t
3626#define OP_UU_D uint64_t, uint64_t, uint64_t
3627
3628#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3629static void do_##NAME(void *vd, void *vs2, int i, \
3630 CPURISCVState *env) \
3631{ \
3632 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3633 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3634}
3635
5eacf7d8 3636#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72
LZ
3637void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3638 CPURISCVState *env, uint32_t desc) \
3639{ \
d9e4ce72
LZ
3640 uint32_t vm = vext_vm(desc); \
3641 uint32_t vl = env->vl; \
5eacf7d8 3642 uint32_t total_elems = \
3643 vext_get_total_elems(env, desc, ESZ); \
3644 uint32_t vta = vext_vta(desc); \
d9e4ce72
LZ
3645 uint32_t i; \
3646 \
3647 if (vl == 0) { \
3648 return; \
3649 } \
f714361e 3650 for (i = env->vstart; i < vl; i++) { \
f9298de5 3651 if (!vm && !vext_elem_mask(v0, i)) { \
d9e4ce72
LZ
3652 continue; \
3653 } \
3654 do_##NAME(vd, vs2, i, env); \
3655 } \
f714361e 3656 env->vstart = 0; \
5eacf7d8 3657 vext_set_elems_1s(vd, vta, vl * ESZ, \
3658 total_elems * ESZ); \
d9e4ce72
LZ
3659}
3660
3661RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3662RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3663RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3664GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3665GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3666GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3667
e848a1e5
FC
3668/*
3669 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3670 *
3671 * Adapted from riscv-v-spec recip.c:
3672 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3673 */
3674static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3675{
3676 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3677 uint64_t exp = extract64(f, frac_size, exp_size);
3678 uint64_t frac = extract64(f, 0, frac_size);
3679
3680 const uint8_t lookup_table[] = {
3681 52, 51, 50, 48, 47, 46, 44, 43,
3682 42, 41, 40, 39, 38, 36, 35, 34,
3683 33, 32, 31, 30, 30, 29, 28, 27,
3684 26, 25, 24, 23, 23, 22, 21, 20,
3685 19, 19, 18, 17, 16, 16, 15, 14,
3686 14, 13, 12, 12, 11, 10, 10, 9,
3687 9, 8, 7, 7, 6, 6, 5, 4,
3688 4, 3, 3, 2, 2, 1, 1, 0,
3689 127, 125, 123, 121, 119, 118, 116, 114,
3690 113, 111, 109, 108, 106, 105, 103, 102,
3691 100, 99, 97, 96, 95, 93, 92, 91,
3692 90, 88, 87, 86, 85, 84, 83, 82,
3693 80, 79, 78, 77, 76, 75, 74, 73,
3694 72, 71, 70, 70, 69, 68, 67, 66,
3695 65, 64, 63, 63, 62, 61, 60, 59,
3696 59, 58, 57, 56, 56, 55, 54, 53
3697 };
3698 const int precision = 7;
3699
3700 if (exp == 0 && frac != 0) { /* subnormal */
3701 /* Normalize the subnormal. */
3702 while (extract64(frac, frac_size - 1, 1) == 0) {
3703 exp--;
3704 frac <<= 1;
3705 }
3706
3707 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3708 }
3709
3710 int idx = ((exp & 1) << (precision - 1)) |
3711 (frac >> (frac_size - precision + 1));
3712 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3713 (frac_size - precision);
3714 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3715
3716 uint64_t val = 0;
3717 val = deposit64(val, 0, frac_size, out_frac);
3718 val = deposit64(val, frac_size, exp_size, out_exp);
3719 val = deposit64(val, frac_size + exp_size, 1, sign);
3720 return val;
3721}
3722
3723static float16 frsqrt7_h(float16 f, float_status *s)
3724{
3725 int exp_size = 5, frac_size = 10;
3726 bool sign = float16_is_neg(f);
3727
3728 /*
3729 * frsqrt7(sNaN) = canonical NaN
3730 * frsqrt7(-inf) = canonical NaN
3731 * frsqrt7(-normal) = canonical NaN
3732 * frsqrt7(-subnormal) = canonical NaN
3733 */
3734 if (float16_is_signaling_nan(f, s) ||
3735 (float16_is_infinity(f) && sign) ||
3736 (float16_is_normal(f) && sign) ||
3737 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3738 s->float_exception_flags |= float_flag_invalid;
3739 return float16_default_nan(s);
3740 }
3741
3742 /* frsqrt7(qNaN) = canonical NaN */
3743 if (float16_is_quiet_nan(f, s)) {
3744 return float16_default_nan(s);
3745 }
3746
3747 /* frsqrt7(+-0) = +-inf */
3748 if (float16_is_zero(f)) {
3749 s->float_exception_flags |= float_flag_divbyzero;
3750 return float16_set_sign(float16_infinity, sign);
3751 }
3752
3753 /* frsqrt7(+inf) = +0 */
3754 if (float16_is_infinity(f) && !sign) {
3755 return float16_set_sign(float16_zero, sign);
3756 }
3757
3758 /* +normal, +subnormal */
3759 uint64_t val = frsqrt7(f, exp_size, frac_size);
3760 return make_float16(val);
3761}
3762
3763static float32 frsqrt7_s(float32 f, float_status *s)
3764{
3765 int exp_size = 8, frac_size = 23;
3766 bool sign = float32_is_neg(f);
3767
3768 /*
3769 * frsqrt7(sNaN) = canonical NaN
3770 * frsqrt7(-inf) = canonical NaN
3771 * frsqrt7(-normal) = canonical NaN
3772 * frsqrt7(-subnormal) = canonical NaN
3773 */
3774 if (float32_is_signaling_nan(f, s) ||
3775 (float32_is_infinity(f) && sign) ||
3776 (float32_is_normal(f) && sign) ||
3777 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3778 s->float_exception_flags |= float_flag_invalid;
3779 return float32_default_nan(s);
3780 }
3781
3782 /* frsqrt7(qNaN) = canonical NaN */
3783 if (float32_is_quiet_nan(f, s)) {
3784 return float32_default_nan(s);
3785 }
3786
3787 /* frsqrt7(+-0) = +-inf */
3788 if (float32_is_zero(f)) {
3789 s->float_exception_flags |= float_flag_divbyzero;
3790 return float32_set_sign(float32_infinity, sign);
3791 }
3792
3793 /* frsqrt7(+inf) = +0 */
3794 if (float32_is_infinity(f) && !sign) {
3795 return float32_set_sign(float32_zero, sign);
3796 }
3797
3798 /* +normal, +subnormal */
3799 uint64_t val = frsqrt7(f, exp_size, frac_size);
3800 return make_float32(val);
3801}
3802
3803static float64 frsqrt7_d(float64 f, float_status *s)
3804{
3805 int exp_size = 11, frac_size = 52;
3806 bool sign = float64_is_neg(f);
3807
3808 /*
3809 * frsqrt7(sNaN) = canonical NaN
3810 * frsqrt7(-inf) = canonical NaN
3811 * frsqrt7(-normal) = canonical NaN
3812 * frsqrt7(-subnormal) = canonical NaN
3813 */
3814 if (float64_is_signaling_nan(f, s) ||
3815 (float64_is_infinity(f) && sign) ||
3816 (float64_is_normal(f) && sign) ||
3817 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3818 s->float_exception_flags |= float_flag_invalid;
3819 return float64_default_nan(s);
3820 }
3821
3822 /* frsqrt7(qNaN) = canonical NaN */
3823 if (float64_is_quiet_nan(f, s)) {
3824 return float64_default_nan(s);
3825 }
3826
3827 /* frsqrt7(+-0) = +-inf */
3828 if (float64_is_zero(f)) {
3829 s->float_exception_flags |= float_flag_divbyzero;
3830 return float64_set_sign(float64_infinity, sign);
3831 }
3832
3833 /* frsqrt7(+inf) = +0 */
3834 if (float64_is_infinity(f) && !sign) {
3835 return float64_set_sign(float64_zero, sign);
3836 }
3837
3838 /* +normal, +subnormal */
3839 uint64_t val = frsqrt7(f, exp_size, frac_size);
3840 return make_float64(val);
3841}
3842
3843RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3844RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3845RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3846GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3847GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3848GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3849
55c35407
FC
3850/*
3851 * Vector Floating-Point Reciprocal Estimate Instruction
3852 *
3853 * Adapted from riscv-v-spec recip.c:
3854 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3855 */
3856static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3857 float_status *s)
3858{
3859 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3860 uint64_t exp = extract64(f, frac_size, exp_size);
3861 uint64_t frac = extract64(f, 0, frac_size);
3862
3863 const uint8_t lookup_table[] = {
3864 127, 125, 123, 121, 119, 117, 116, 114,
3865 112, 110, 109, 107, 105, 104, 102, 100,
3866 99, 97, 96, 94, 93, 91, 90, 88,
3867 87, 85, 84, 83, 81, 80, 79, 77,
3868 76, 75, 74, 72, 71, 70, 69, 68,
3869 66, 65, 64, 63, 62, 61, 60, 59,
3870 58, 57, 56, 55, 54, 53, 52, 51,
3871 50, 49, 48, 47, 46, 45, 44, 43,
3872 42, 41, 40, 40, 39, 38, 37, 36,
3873 35, 35, 34, 33, 32, 31, 31, 30,
3874 29, 28, 28, 27, 26, 25, 25, 24,
3875 23, 23, 22, 21, 21, 20, 19, 19,
3876 18, 17, 17, 16, 15, 15, 14, 14,
3877 13, 12, 12, 11, 11, 10, 9, 9,
3878 8, 8, 7, 7, 6, 5, 5, 4,
3879 4, 3, 3, 2, 2, 1, 1, 0
3880 };
3881 const int precision = 7;
3882
3883 if (exp == 0 && frac != 0) { /* subnormal */
3884 /* Normalize the subnormal. */
3885 while (extract64(frac, frac_size - 1, 1) == 0) {
3886 exp--;
3887 frac <<= 1;
3888 }
3889
3890 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3891
3892 if (exp != 0 && exp != UINT64_MAX) {
3893 /*
3894 * Overflow to inf or max value of same sign,
3895 * depending on sign and rounding mode.
3896 */
3897 s->float_exception_flags |= (float_flag_inexact |
3898 float_flag_overflow);
3899
3900 if ((s->float_rounding_mode == float_round_to_zero) ||
3901 ((s->float_rounding_mode == float_round_down) && !sign) ||
3902 ((s->float_rounding_mode == float_round_up) && sign)) {
3903 /* Return greatest/negative finite value. */
3904 return (sign << (exp_size + frac_size)) |
3905 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3906 } else {
3907 /* Return +-inf. */
3908 return (sign << (exp_size + frac_size)) |
3909 MAKE_64BIT_MASK(frac_size, exp_size);
3910 }
3911 }
3912 }
3913
3914 int idx = frac >> (frac_size - precision);
3915 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3916 (frac_size - precision);
3917 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3918
3919 if (out_exp == 0 || out_exp == UINT64_MAX) {
3920 /*
3921 * The result is subnormal, but don't raise the underflow exception,
3922 * because there's no additional loss of precision.
3923 */
3924 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3925 if (out_exp == UINT64_MAX) {
3926 out_frac >>= 1;
3927 out_exp = 0;
3928 }
3929 }
3930
3931 uint64_t val = 0;
3932 val = deposit64(val, 0, frac_size, out_frac);
3933 val = deposit64(val, frac_size, exp_size, out_exp);
3934 val = deposit64(val, frac_size + exp_size, 1, sign);
3935 return val;
3936}
3937
3938static float16 frec7_h(float16 f, float_status *s)
3939{
3940 int exp_size = 5, frac_size = 10;
3941 bool sign = float16_is_neg(f);
3942
3943 /* frec7(+-inf) = +-0 */
3944 if (float16_is_infinity(f)) {
3945 return float16_set_sign(float16_zero, sign);
3946 }
3947
3948 /* frec7(+-0) = +-inf */
3949 if (float16_is_zero(f)) {
3950 s->float_exception_flags |= float_flag_divbyzero;
3951 return float16_set_sign(float16_infinity, sign);
3952 }
3953
3954 /* frec7(sNaN) = canonical NaN */
3955 if (float16_is_signaling_nan(f, s)) {
3956 s->float_exception_flags |= float_flag_invalid;
3957 return float16_default_nan(s);
3958 }
3959
3960 /* frec7(qNaN) = canonical NaN */
3961 if (float16_is_quiet_nan(f, s)) {
3962 return float16_default_nan(s);
3963 }
3964
3965 /* +-normal, +-subnormal */
3966 uint64_t val = frec7(f, exp_size, frac_size, s);
3967 return make_float16(val);
3968}
3969
3970static float32 frec7_s(float32 f, float_status *s)
3971{
3972 int exp_size = 8, frac_size = 23;
3973 bool sign = float32_is_neg(f);
3974
3975 /* frec7(+-inf) = +-0 */
3976 if (float32_is_infinity(f)) {
3977 return float32_set_sign(float32_zero, sign);
3978 }
3979
3980 /* frec7(+-0) = +-inf */
3981 if (float32_is_zero(f)) {
3982 s->float_exception_flags |= float_flag_divbyzero;
3983 return float32_set_sign(float32_infinity, sign);
3984 }
3985
3986 /* frec7(sNaN) = canonical NaN */
3987 if (float32_is_signaling_nan(f, s)) {
3988 s->float_exception_flags |= float_flag_invalid;
3989 return float32_default_nan(s);
3990 }
3991
3992 /* frec7(qNaN) = canonical NaN */
3993 if (float32_is_quiet_nan(f, s)) {
3994 return float32_default_nan(s);
3995 }
3996
3997 /* +-normal, +-subnormal */
3998 uint64_t val = frec7(f, exp_size, frac_size, s);
3999 return make_float32(val);
4000}
4001
4002static float64 frec7_d(float64 f, float_status *s)
4003{
4004 int exp_size = 11, frac_size = 52;
4005 bool sign = float64_is_neg(f);
4006
4007 /* frec7(+-inf) = +-0 */
4008 if (float64_is_infinity(f)) {
4009 return float64_set_sign(float64_zero, sign);
4010 }
4011
4012 /* frec7(+-0) = +-inf */
4013 if (float64_is_zero(f)) {
4014 s->float_exception_flags |= float_flag_divbyzero;
4015 return float64_set_sign(float64_infinity, sign);
4016 }
4017
4018 /* frec7(sNaN) = canonical NaN */
4019 if (float64_is_signaling_nan(f, s)) {
4020 s->float_exception_flags |= float_flag_invalid;
4021 return float64_default_nan(s);
4022 }
4023
4024 /* frec7(qNaN) = canonical NaN */
4025 if (float64_is_quiet_nan(f, s)) {
4026 return float64_default_nan(s);
4027 }
4028
4029 /* +-normal, +-subnormal */
4030 uint64_t val = frec7(f, exp_size, frac_size, s);
4031 return make_float64(val);
4032}
4033
4034RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4035RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4036RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 4037GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4038GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4039GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 4040
230b53dd 4041/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
4042RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4043RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4044RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 4045GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4046GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4047GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
4048RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4049RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4050RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 4051GEN_VEXT_VF(vfmin_vf_h, 2)
4052GEN_VEXT_VF(vfmin_vf_w, 4)
4053GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 4054
49c5611a
FC
4055RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4056RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4057RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 4058GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4059GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4060GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
4061RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4062RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4063RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 4064GEN_VEXT_VF(vfmax_vf_h, 2)
4065GEN_VEXT_VF(vfmax_vf_w, 4)
4066GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
4067
4068/* Vector Floating-Point Sign-Injection Instructions */
4069static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4070{
4071 return deposit64(b, 0, 15, a);
4072}
4073
4074static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4075{
4076 return deposit64(b, 0, 31, a);
4077}
4078
4079static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4080{
4081 return deposit64(b, 0, 63, a);
4082}
4083
4084RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4085RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4086RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 4087GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4088GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4089GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
4090RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4091RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4092RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 4093GEN_VEXT_VF(vfsgnj_vf_h, 2)
4094GEN_VEXT_VF(vfsgnj_vf_w, 4)
4095GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
4096
4097static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4098{
4099 return deposit64(~b, 0, 15, a);
4100}
4101
4102static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4103{
4104 return deposit64(~b, 0, 31, a);
4105}
4106
4107static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4108{
4109 return deposit64(~b, 0, 63, a);
4110}
4111
4112RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4113RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4114RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 4115GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4116GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4117GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
4118RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4119RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4120RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 4121GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4122GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4123GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
4124
4125static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4126{
4127 return deposit64(b ^ a, 0, 15, a);
4128}
4129
4130static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4131{
4132 return deposit64(b ^ a, 0, 31, a);
4133}
4134
4135static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4136{
4137 return deposit64(b ^ a, 0, 63, a);
4138}
4139
4140RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4141RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4142RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 4143GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4144GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4145GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
4146RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4147RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4148RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 4149GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4150GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4151GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
4152
4153/* Vector Floating-Point Compare Instructions */
4154#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4155void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4156 CPURISCVState *env, uint32_t desc) \
4157{ \
2a68e9e5
LZ
4158 uint32_t vm = vext_vm(desc); \
4159 uint32_t vl = env->vl; \
5eacf7d8 4160 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4161 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
2a68e9e5
LZ
4162 uint32_t i; \
4163 \
f714361e 4164 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4165 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4166 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4167 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
4168 continue; \
4169 } \
f9298de5 4170 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4171 DO_OP(s2, s1, &env->fp_status)); \
4172 } \
f714361e 4173 env->vstart = 0; \
5eacf7d8 4174 /* mask destination register are always tail-agnostic */ \
4175 /* set tail elements to 1s */ \
4176 if (vta_all_1s) { \
4177 for (; i < total_elems; i++) { \
4178 vext_set_elem_mask(vd, i, 1); \
4179 } \
4180 } \
2a68e9e5
LZ
4181}
4182
2a68e9e5
LZ
4183GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4184GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4185GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4186
4187#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4188void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4189 CPURISCVState *env, uint32_t desc) \
4190{ \
2a68e9e5
LZ
4191 uint32_t vm = vext_vm(desc); \
4192 uint32_t vl = env->vl; \
5eacf7d8 4193 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4194 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
2a68e9e5
LZ
4195 uint32_t i; \
4196 \
f714361e 4197 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4198 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4199 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
4200 continue; \
4201 } \
f9298de5 4202 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4203 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4204 } \
f714361e 4205 env->vstart = 0; \
5eacf7d8 4206 /* mask destination register are always tail-agnostic */ \
4207 /* set tail elements to 1s */ \
4208 if (vta_all_1s) { \
4209 for (; i < total_elems; i++) { \
4210 vext_set_elem_mask(vd, i, 1); \
4211 } \
4212 } \
2a68e9e5
LZ
4213}
4214
4215GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4216GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4217GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4218
4219static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4220{
4221 FloatRelation compare = float16_compare_quiet(a, b, s);
4222 return compare != float_relation_equal;
4223}
4224
4225static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4226{
4227 FloatRelation compare = float32_compare_quiet(a, b, s);
4228 return compare != float_relation_equal;
4229}
4230
4231static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4232{
4233 FloatRelation compare = float64_compare_quiet(a, b, s);
4234 return compare != float_relation_equal;
4235}
4236
4237GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4238GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4239GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4240GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4241GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4242GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4243
2a68e9e5
LZ
4244GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4245GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4246GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4247GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4248GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4249GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4250
2a68e9e5
LZ
4251GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4252GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4253GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4254GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4255GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4256GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4257
4258static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4259{
4260 FloatRelation compare = float16_compare(a, b, s);
4261 return compare == float_relation_greater;
4262}
4263
4264static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4265{
4266 FloatRelation compare = float32_compare(a, b, s);
4267 return compare == float_relation_greater;
4268}
4269
4270static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4271{
4272 FloatRelation compare = float64_compare(a, b, s);
4273 return compare == float_relation_greater;
4274}
4275
4276GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4277GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4278GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4279
4280static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4281{
4282 FloatRelation compare = float16_compare(a, b, s);
4283 return compare == float_relation_greater ||
4284 compare == float_relation_equal;
4285}
4286
4287static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4288{
4289 FloatRelation compare = float32_compare(a, b, s);
4290 return compare == float_relation_greater ||
4291 compare == float_relation_equal;
4292}
4293
4294static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4295{
4296 FloatRelation compare = float64_compare(a, b, s);
4297 return compare == float_relation_greater ||
4298 compare == float_relation_equal;
4299}
4300
4301GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4302GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4303GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4304
121ddbb3
LZ
4305/* Vector Floating-Point Classify Instruction */
4306#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4307static void do_##NAME(void *vd, void *vs2, int i) \
4308{ \
4309 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4310 *((TD *)vd + HD(i)) = OP(s2); \
4311}
4312
5eacf7d8 4313#define GEN_VEXT_V(NAME, ESZ) \
121ddbb3
LZ
4314void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4315 CPURISCVState *env, uint32_t desc) \
4316{ \
121ddbb3
LZ
4317 uint32_t vm = vext_vm(desc); \
4318 uint32_t vl = env->vl; \
5eacf7d8 4319 uint32_t total_elems = \
4320 vext_get_total_elems(env, desc, ESZ); \
4321 uint32_t vta = vext_vta(desc); \
121ddbb3
LZ
4322 uint32_t i; \
4323 \
f714361e 4324 for (i = env->vstart; i < vl; i++) { \
f9298de5 4325 if (!vm && !vext_elem_mask(v0, i)) { \
121ddbb3
LZ
4326 continue; \
4327 } \
4328 do_##NAME(vd, vs2, i); \
4329 } \
f714361e 4330 env->vstart = 0; \
5eacf7d8 4331 /* set tail elements to 1s */ \
4332 vext_set_elems_1s(vd, vta, vl * ESZ, \
4333 total_elems * ESZ); \
121ddbb3
LZ
4334}
4335
4336target_ulong fclass_h(uint64_t frs1)
4337{
4338 float16 f = frs1;
4339 bool sign = float16_is_neg(f);
4340
4341 if (float16_is_infinity(f)) {
4342 return sign ? 1 << 0 : 1 << 7;
4343 } else if (float16_is_zero(f)) {
4344 return sign ? 1 << 3 : 1 << 4;
4345 } else if (float16_is_zero_or_denormal(f)) {
4346 return sign ? 1 << 2 : 1 << 5;
4347 } else if (float16_is_any_nan(f)) {
4348 float_status s = { }; /* for snan_bit_is_one */
4349 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4350 } else {
4351 return sign ? 1 << 1 : 1 << 6;
4352 }
4353}
4354
4355target_ulong fclass_s(uint64_t frs1)
4356{
4357 float32 f = frs1;
4358 bool sign = float32_is_neg(f);
4359
4360 if (float32_is_infinity(f)) {
4361 return sign ? 1 << 0 : 1 << 7;
4362 } else if (float32_is_zero(f)) {
4363 return sign ? 1 << 3 : 1 << 4;
4364 } else if (float32_is_zero_or_denormal(f)) {
4365 return sign ? 1 << 2 : 1 << 5;
4366 } else if (float32_is_any_nan(f)) {
4367 float_status s = { }; /* for snan_bit_is_one */
4368 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4369 } else {
4370 return sign ? 1 << 1 : 1 << 6;
4371 }
4372}
4373
4374target_ulong fclass_d(uint64_t frs1)
4375{
4376 float64 f = frs1;
4377 bool sign = float64_is_neg(f);
4378
4379 if (float64_is_infinity(f)) {
4380 return sign ? 1 << 0 : 1 << 7;
4381 } else if (float64_is_zero(f)) {
4382 return sign ? 1 << 3 : 1 << 4;
4383 } else if (float64_is_zero_or_denormal(f)) {
4384 return sign ? 1 << 2 : 1 << 5;
4385 } else if (float64_is_any_nan(f)) {
4386 float_status s = { }; /* for snan_bit_is_one */
4387 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4388 } else {
4389 return sign ? 1 << 1 : 1 << 6;
4390 }
4391}
4392
4393RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4394RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4395RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4396GEN_VEXT_V(vfclass_v_h, 2)
4397GEN_VEXT_V(vfclass_v_w, 4)
4398GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4399
4400/* Vector Floating-Point Merge Instruction */
5eacf7d8 4401
3479a814 4402#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4403void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4404 CPURISCVState *env, uint32_t desc) \
4405{ \
64ab5846
LZ
4406 uint32_t vm = vext_vm(desc); \
4407 uint32_t vl = env->vl; \
5eacf7d8 4408 uint32_t esz = sizeof(ETYPE); \
4409 uint32_t total_elems = \
4410 vext_get_total_elems(env, desc, esz); \
4411 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4412 uint32_t i; \
4413 \
f714361e 4414 for (i = env->vstart; i < vl; i++) { \
64ab5846
LZ
4415 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4416 *((ETYPE *)vd + H(i)) \
f9298de5 4417 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4418 } \
f714361e 4419 env->vstart = 0; \
5eacf7d8 4420 /* set tail elements to 1s */ \
4421 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4422}
4423
3479a814
FC
4424GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4425GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4426GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4427
4428/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4429/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4430RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4431RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4432RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4433GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4434GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4435GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4436
4437/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4438RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4439RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4440RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4441GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4442GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4443GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4444
4445/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4446RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4447RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4448RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4449GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4450GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4451GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4452
4453/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4454RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4455RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4456RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4457GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4458GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4459GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4460
4461/* Widening Floating-Point/Integer Type-Convert Instructions */
4462/* (TD, T2, TX2) */
3ce4c09d 4463#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4464#define WOP_UU_H uint32_t, uint16_t, uint16_t
4465#define WOP_UU_W uint64_t, uint32_t, uint32_t
4466/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4467RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4468RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4469GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4470GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4471
4472/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4473RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4474RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4475GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4476GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1
LZ
4477
4478/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3ce4c09d 4479RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4480RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4481RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4482GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4483GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4484GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4485
4486/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4487RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4488RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4489RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4490GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4491GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4492GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4493
4494/*
3ce4c09d 4495 * vfwcvt.f.f.v vd, vs2, vm
4514b7b1
LZ
4496 * Convert single-width float to double-width float.
4497 */
4498static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4499{
4500 return float16_to_float32(a, true, s);
4501}
4502
4503RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4504RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4505GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4506GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e
LZ
4507
4508/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4509/* (TD, T2, TX2) */
ff679b58 4510#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4511#define NOP_UU_H uint16_t, uint32_t, uint32_t
4512#define NOP_UU_W uint32_t, uint64_t, uint64_t
4513/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4514RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4515RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4516RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4517GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4518GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4519GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4520
4521/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4522RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4523RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4524RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4525GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4526GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4527GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e
LZ
4528
4529/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
ff679b58
FC
4530RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4531RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4532GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4533GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4534
4535/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4536RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4537RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4538GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4539GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4540
4541/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4542static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4543{
4544 return float32_to_float16(a, true, s);
4545}
4546
ff679b58
FC
4547RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4548RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4549GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4550GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1
LZ
4551
4552/*
4553 *** Vector Reduction Operations
4554 */
4555/* Vector Single-Width Integer Reduction Instructions */
3479a814 4556#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1
LZ
4557void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4558 void *vs2, CPURISCVState *env, uint32_t desc) \
4559{ \
fe5c9ab1
LZ
4560 uint32_t vm = vext_vm(desc); \
4561 uint32_t vl = env->vl; \
df4f52a7 4562 uint32_t esz = sizeof(TD); \
4563 uint32_t vlenb = simd_maxsz(desc); \
4564 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4565 uint32_t i; \
fe5c9ab1
LZ
4566 TD s1 = *((TD *)vs1 + HD(0)); \
4567 \
f714361e 4568 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4569 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4570 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4571 continue; \
4572 } \
4573 s1 = OP(s1, (TD)s2); \
4574 } \
4575 *((TD *)vd + HD(0)) = s1; \
f714361e 4576 env->vstart = 0; \
df4f52a7 4577 /* set tail elements to 1s */ \
4578 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4579}
4580
4581/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4582GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4583GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4584GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4585GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4586
4587/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4588GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4589GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4590GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4591GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4592
4593/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4594GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4595GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4596GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4597GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4598
4599/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4600GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4601GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4602GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4603GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4604
4605/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4606GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4607GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4608GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4609GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4610
4611/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4612GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4613GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4614GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4615GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4616
4617/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4618GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4619GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4620GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4621GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4622
4623/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4624GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4625GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4626GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4627GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4628
4629/* Vector Widening Integer Reduction Instructions */
4630/* signed sum reduction into double-width accumulator */
3479a814
FC
4631GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4632GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4633GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4634
4635/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4636GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4637GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4638GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4639
4640/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4641#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4642void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4643 void *vs2, CPURISCVState *env, \
4644 uint32_t desc) \
4645{ \
523547f1
LZ
4646 uint32_t vm = vext_vm(desc); \
4647 uint32_t vl = env->vl; \
df4f52a7 4648 uint32_t esz = sizeof(TD); \
4649 uint32_t vlenb = simd_maxsz(desc); \
4650 uint32_t vta = vext_vta(desc); \
523547f1 4651 uint32_t i; \
523547f1
LZ
4652 TD s1 = *((TD *)vs1 + HD(0)); \
4653 \
f714361e 4654 for (i = env->vstart; i < vl; i++) { \
523547f1 4655 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4656 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4657 continue; \
4658 } \
4659 s1 = OP(s1, (TD)s2, &env->fp_status); \
4660 } \
4661 *((TD *)vd + HD(0)) = s1; \
f714361e 4662 env->vstart = 0; \
df4f52a7 4663 /* set tail elements to 1s */ \
4664 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4665}
4666
4667/* Unordered sum */
3479a814
FC
4668GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4669GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4670GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4671
4672/* Maximum value */
08b60eeb
FC
4673GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4674GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4675GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
523547f1
LZ
4676
4677/* Minimum value */
08b60eeb
FC
4678GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4679GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4680GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
696b0c26
LZ
4681
4682/* Vector Widening Floating-Point Reduction Instructions */
4683/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4684void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4685 void *vs2, CPURISCVState *env, uint32_t desc)
4686{
696b0c26
LZ
4687 uint32_t vm = vext_vm(desc);
4688 uint32_t vl = env->vl;
df4f52a7 4689 uint32_t esz = sizeof(uint32_t);
4690 uint32_t vlenb = simd_maxsz(desc);
4691 uint32_t vta = vext_vta(desc);
696b0c26 4692 uint32_t i;
696b0c26
LZ
4693 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4694
f714361e 4695 for (i = env->vstart; i < vl; i++) {
696b0c26 4696 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
f9298de5 4697 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4698 continue;
4699 }
4700 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4701 &env->fp_status);
4702 }
4703 *((uint32_t *)vd + H4(0)) = s1;
f714361e 4704 env->vstart = 0;
df4f52a7 4705 /* set tail elements to 1s */
4706 vext_set_elems_1s(vd, vta, esz, vlenb);
696b0c26
LZ
4707}
4708
4709void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4710 void *vs2, CPURISCVState *env, uint32_t desc)
4711{
696b0c26
LZ
4712 uint32_t vm = vext_vm(desc);
4713 uint32_t vl = env->vl;
df4f52a7 4714 uint32_t esz = sizeof(uint64_t);
4715 uint32_t vlenb = simd_maxsz(desc);
4716 uint32_t vta = vext_vta(desc);
696b0c26 4717 uint32_t i;
696b0c26
LZ
4718 uint64_t s1 = *((uint64_t *)vs1);
4719
f714361e 4720 for (i = env->vstart; i < vl; i++) {
696b0c26 4721 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
f9298de5 4722 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4723 continue;
4724 }
4725 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4726 &env->fp_status);
4727 }
4728 *((uint64_t *)vd) = s1;
f714361e 4729 env->vstart = 0;
df4f52a7 4730 /* set tail elements to 1s */
4731 vext_set_elems_1s(vd, vta, esz, vlenb);
696b0c26 4732}
c21f34ae
LZ
4733
4734/*
4735 *** Vector Mask Operations
4736 */
4737/* Vector Mask-Register Logical Instructions */
4738#define GEN_VEXT_MASK_VV(NAME, OP) \
4739void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4740 void *vs2, CPURISCVState *env, \
4741 uint32_t desc) \
4742{ \
c21f34ae 4743 uint32_t vl = env->vl; \
acc6ffd4 4744 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4745 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4746 uint32_t i; \
4747 int a, b; \
4748 \
f714361e 4749 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4750 a = vext_elem_mask(vs1, i); \
4751 b = vext_elem_mask(vs2, i); \
4752 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4753 } \
f714361e 4754 env->vstart = 0; \
acc6ffd4 4755 /* mask destination register are always tail- \
4756 * agnostic \
4757 */ \
4758 /* set tail elements to 1s */ \
4759 if (vta_all_1s) { \
4760 for (; i < total_elems; i++) { \
4761 vext_set_elem_mask(vd, i, 1); \
4762 } \
4763 } \
c21f34ae
LZ
4764}
4765
4766#define DO_NAND(N, M) (!(N & M))
4767#define DO_ANDNOT(N, M) (N & !M)
4768#define DO_NOR(N, M) (!(N | M))
4769#define DO_ORNOT(N, M) (N | !M)
4770#define DO_XNOR(N, M) (!(N ^ M))
4771
4772GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4773GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4774GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4775GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4776GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4777GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4778GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4779GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4780
0014aa74
FC
4781/* Vector count population in mask vcpop */
4782target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4783 uint32_t desc)
2e88f551
LZ
4784{
4785 target_ulong cnt = 0;
2e88f551
LZ
4786 uint32_t vm = vext_vm(desc);
4787 uint32_t vl = env->vl;
4788 int i;
4789
f714361e 4790 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4791 if (vm || vext_elem_mask(v0, i)) {
4792 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4793 cnt++;
4794 }
4795 }
4796 }
f714361e 4797 env->vstart = 0;
2e88f551
LZ
4798 return cnt;
4799}
0db67e1c 4800
d71a24fc
FC
4801/* vfirst find-first-set mask bit*/
4802target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4803 uint32_t desc)
0db67e1c 4804{
0db67e1c
LZ
4805 uint32_t vm = vext_vm(desc);
4806 uint32_t vl = env->vl;
4807 int i;
4808
f714361e 4809 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4810 if (vm || vext_elem_mask(v0, i)) {
4811 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4812 return i;
4813 }
4814 }
4815 }
f714361e 4816 env->vstart = 0;
0db67e1c
LZ
4817 return -1LL;
4818}
81fbf7da
LZ
4819
4820enum set_mask_type {
4821 ONLY_FIRST = 1,
4822 INCLUDE_FIRST,
4823 BEFORE_FIRST,
4824};
4825
4826static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4827 uint32_t desc, enum set_mask_type type)
4828{
81fbf7da
LZ
4829 uint32_t vm = vext_vm(desc);
4830 uint32_t vl = env->vl;
acc6ffd4 4831 uint32_t total_elems = env_archcpu(env)->cfg.vlen;
4832 uint32_t vta_all_1s = vext_vta_all_1s(desc);
81fbf7da
LZ
4833 int i;
4834 bool first_mask_bit = false;
4835
f714361e 4836 for (i = env->vstart; i < vl; i++) {
f9298de5 4837 if (!vm && !vext_elem_mask(v0, i)) {
81fbf7da
LZ
4838 continue;
4839 }
4840 /* write a zero to all following active elements */
4841 if (first_mask_bit) {
f9298de5 4842 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4843 continue;
4844 }
f9298de5 4845 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4846 first_mask_bit = true;
4847 if (type == BEFORE_FIRST) {
f9298de5 4848 vext_set_elem_mask(vd, i, 0);
81fbf7da 4849 } else {
f9298de5 4850 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4851 }
4852 } else {
4853 if (type == ONLY_FIRST) {
f9298de5 4854 vext_set_elem_mask(vd, i, 0);
81fbf7da 4855 } else {
f9298de5 4856 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4857 }
4858 }
4859 }
f714361e 4860 env->vstart = 0;
acc6ffd4 4861 /* mask destination register are always tail-agnostic */
4862 /* set tail elements to 1s */
4863 if (vta_all_1s) {
4864 for (; i < total_elems; i++) {
4865 vext_set_elem_mask(vd, i, 1);
4866 }
4867 }
81fbf7da
LZ
4868}
4869
4870void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4871 uint32_t desc)
4872{
4873 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4874}
4875
4876void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4877 uint32_t desc)
4878{
4879 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4880}
4881
4882void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4883 uint32_t desc)
4884{
4885 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4886}
78d90cfe
LZ
4887
4888/* Vector Iota Instruction */
3479a814 4889#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4890void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4891 uint32_t desc) \
4892{ \
78d90cfe
LZ
4893 uint32_t vm = vext_vm(desc); \
4894 uint32_t vl = env->vl; \
acc6ffd4 4895 uint32_t esz = sizeof(ETYPE); \
4896 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4897 uint32_t vta = vext_vta(desc); \
78d90cfe
LZ
4898 uint32_t sum = 0; \
4899 int i; \
4900 \
f714361e 4901 for (i = env->vstart; i < vl; i++) { \
f9298de5 4902 if (!vm && !vext_elem_mask(v0, i)) { \
78d90cfe
LZ
4903 continue; \
4904 } \
4905 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4906 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4907 sum++; \
4908 } \
4909 } \
f714361e 4910 env->vstart = 0; \
acc6ffd4 4911 /* set tail elements to 1s */ \
4912 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4913}
4914
3479a814
FC
4915GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4916GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4917GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4918GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4919
4920/* Vector Element Index Instruction */
3479a814 4921#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4922void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4923{ \
126bec3f
LZ
4924 uint32_t vm = vext_vm(desc); \
4925 uint32_t vl = env->vl; \
acc6ffd4 4926 uint32_t esz = sizeof(ETYPE); \
4927 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4928 uint32_t vta = vext_vta(desc); \
126bec3f
LZ
4929 int i; \
4930 \
f714361e 4931 for (i = env->vstart; i < vl; i++) { \
f9298de5 4932 if (!vm && !vext_elem_mask(v0, i)) { \
126bec3f
LZ
4933 continue; \
4934 } \
4935 *((ETYPE *)vd + H(i)) = i; \
4936 } \
f714361e 4937 env->vstart = 0; \
acc6ffd4 4938 /* set tail elements to 1s */ \
4939 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4940}
4941
3479a814
FC
4942GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4943GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4944GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4945GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4946
4947/*
4948 *** Vector Permutation Instructions
4949 */
4950
4951/* Vector Slide Instructions */
3479a814 4952#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4953void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4954 CPURISCVState *env, uint32_t desc) \
4955{ \
ec17e036
LZ
4956 uint32_t vm = vext_vm(desc); \
4957 uint32_t vl = env->vl; \
803963f7 4958 uint32_t esz = sizeof(ETYPE); \
4959 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4960 uint32_t vta = vext_vta(desc); \
f714361e 4961 target_ulong offset = s1, i_min, i; \
ec17e036 4962 \
f714361e
FC
4963 i_min = MAX(env->vstart, offset); \
4964 for (i = i_min; i < vl; i++) { \
f9298de5 4965 if (!vm && !vext_elem_mask(v0, i)) { \
ec17e036
LZ
4966 continue; \
4967 } \
4968 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4969 } \
803963f7 4970 /* set tail elements to 1s */ \
4971 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4972}
4973
4974/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4975GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4976GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4977GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4978GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4979
3479a814 4980#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4981void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4982 CPURISCVState *env, uint32_t desc) \
4983{ \
6438ed61 4984 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4985 uint32_t vm = vext_vm(desc); \
4986 uint32_t vl = env->vl; \
803963f7 4987 uint32_t esz = sizeof(ETYPE); \
4988 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4989 uint32_t vta = vext_vta(desc); \
6438ed61 4990 target_ulong i_max, i; \
ec17e036 4991 \
f714361e
FC
4992 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4993 for (i = env->vstart; i < i_max; ++i) { \
6438ed61
FC
4994 if (vm || vext_elem_mask(v0, i)) { \
4995 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4996 } \
4997 } \
4998 \
4999 for (i = i_max; i < vl; ++i) { \
5000 if (vm || vext_elem_mask(v0, i)) { \
5001 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 5002 } \
ec17e036 5003 } \
f714361e
FC
5004 \
5005 env->vstart = 0; \
803963f7 5006 /* set tail elements to 1s */ \
5007 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
5008}
5009
5010/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
5011GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5012GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5013GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5014GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 5015
c7b8a421 5016#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5017static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
5018 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 5019{ \
c7b8a421 5020 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5021 uint32_t vm = vext_vm(desc); \
5022 uint32_t vl = env->vl; \
803963f7 5023 uint32_t esz = sizeof(ETYPE); \
5024 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5025 uint32_t vta = vext_vta(desc); \
8500d4ab
FC
5026 uint32_t i; \
5027 \
f714361e 5028 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
5029 if (!vm && !vext_elem_mask(v0, i)) { \
5030 continue; \
5031 } \
5032 if (i == 0) { \
5033 *((ETYPE *)vd + H(i)) = s1; \
5034 } else { \
5035 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5036 } \
5037 } \
f714361e 5038 env->vstart = 0; \
803963f7 5039 /* set tail elements to 1s */ \
5040 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5041}
5042
5043GEN_VEXT_VSLIE1UP(8, H1)
5044GEN_VEXT_VSLIE1UP(16, H2)
5045GEN_VEXT_VSLIE1UP(32, H4)
5046GEN_VEXT_VSLIE1UP(64, H8)
5047
c7b8a421 5048#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
5049void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5050 CPURISCVState *env, uint32_t desc) \
5051{ \
c7b8a421 5052 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5053}
5054
5055/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
5056GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5057GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5058GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5059GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5060
c7b8a421 5061#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5062static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
5063 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 5064{ \
c7b8a421 5065 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5066 uint32_t vm = vext_vm(desc); \
5067 uint32_t vl = env->vl; \
803963f7 5068 uint32_t esz = sizeof(ETYPE); \
5069 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5070 uint32_t vta = vext_vta(desc); \
8500d4ab
FC
5071 uint32_t i; \
5072 \
f714361e 5073 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
5074 if (!vm && !vext_elem_mask(v0, i)) { \
5075 continue; \
5076 } \
5077 if (i == vl - 1) { \
5078 *((ETYPE *)vd + H(i)) = s1; \
5079 } else { \
5080 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5081 } \
5082 } \
f714361e 5083 env->vstart = 0; \
803963f7 5084 /* set tail elements to 1s */ \
5085 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5086}
5087
5088GEN_VEXT_VSLIDE1DOWN(8, H1)
5089GEN_VEXT_VSLIDE1DOWN(16, H2)
5090GEN_VEXT_VSLIDE1DOWN(32, H4)
5091GEN_VEXT_VSLIDE1DOWN(64, H8)
5092
c7b8a421 5093#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
5094void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5095 CPURISCVState *env, uint32_t desc) \
5096{ \
c7b8a421 5097 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5098}
5099
5100/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
5101GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5102GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5103GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5104GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5105
5106/* Vector Floating-Point Slide Instructions */
c7b8a421 5107#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
5108void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5109 CPURISCVState *env, uint32_t desc) \
5110{ \
c7b8a421 5111 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5112}
5113
5114/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5115GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5116GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5117GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5118
c7b8a421 5119#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
5120void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5121 CPURISCVState *env, uint32_t desc) \
5122{ \
c7b8a421 5123 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5124}
5125
5126/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5127GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5128GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5129GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
5130
5131/* Vector Register Gather Instruction */
50bfb45b 5132#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
5133void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5134 CPURISCVState *env, uint32_t desc) \
5135{ \
f714361e 5136 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
5137 uint32_t vm = vext_vm(desc); \
5138 uint32_t vl = env->vl; \
803963f7 5139 uint32_t esz = sizeof(TS2); \
5140 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5141 uint32_t vta = vext_vta(desc); \
b11e84b8
FC
5142 uint64_t index; \
5143 uint32_t i; \
e4b83d5c 5144 \
f714361e 5145 for (i = env->vstart; i < vl; i++) { \
f9298de5 5146 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
5147 continue; \
5148 } \
50bfb45b 5149 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 5150 if (index >= vlmax) { \
50bfb45b 5151 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 5152 } else { \
50bfb45b 5153 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
5154 } \
5155 } \
f714361e 5156 env->vstart = 0; \
803963f7 5157 /* set tail elements to 1s */ \
5158 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5159}
5160
5161/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
5162GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5163GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5164GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5165GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5166
5167GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5168GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5169GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5170GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5171
3479a814 5172#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5173void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5174 CPURISCVState *env, uint32_t desc) \
5175{ \
5a9f8e15 5176 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5177 uint32_t vm = vext_vm(desc); \
5178 uint32_t vl = env->vl; \
803963f7 5179 uint32_t esz = sizeof(ETYPE); \
5180 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5181 uint32_t vta = vext_vta(desc); \
b11e84b8
FC
5182 uint64_t index = s1; \
5183 uint32_t i; \
e4b83d5c 5184 \
f714361e 5185 for (i = env->vstart; i < vl; i++) { \
f9298de5 5186 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
5187 continue; \
5188 } \
5189 if (index >= vlmax) { \
5190 *((ETYPE *)vd + H(i)) = 0; \
5191 } else { \
5192 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5193 } \
5194 } \
f714361e 5195 env->vstart = 0; \
803963f7 5196 /* set tail elements to 1s */ \
5197 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5198}
5199
5200/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5201GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5202GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5203GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5204GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5205
5206/* Vector Compress Instruction */
3479a814 5207#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5208void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5209 CPURISCVState *env, uint32_t desc) \
5210{ \
31bf42a2 5211 uint32_t vl = env->vl; \
803963f7 5212 uint32_t esz = sizeof(ETYPE); \
5213 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5214 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5215 uint32_t num = 0, i; \
5216 \
f714361e 5217 for (i = env->vstart; i < vl; i++) { \
f9298de5 5218 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5219 continue; \
5220 } \
5221 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5222 num++; \
5223 } \
f714361e 5224 env->vstart = 0; \
803963f7 5225 /* set tail elements to 1s */ \
5226 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5227}
5228
5229/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5230GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5231GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5232GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5233GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5234
f714361e 5235/* Vector Whole Register Move */
f32d82f6
WL
5236void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5237{
f06193c4 5238 /* EEW = SEW */
f32d82f6 5239 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5240 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5241 uint32_t startb = env->vstart * sewb;
5242 uint32_t i = startb;
f32d82f6
WL
5243
5244 memcpy((uint8_t *)vd + H1(i),
5245 (uint8_t *)vs2 + H1(i),
f06193c4 5246 maxsz - startb);
f714361e 5247
f32d82f6
WL
5248 env->vstart = 0;
5249}
f714361e 5250
cd01340e
FC
5251/* Vector Integer Extension */
5252#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5253void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5254 CPURISCVState *env, uint32_t desc) \
5255{ \
5256 uint32_t vl = env->vl; \
5257 uint32_t vm = vext_vm(desc); \
803963f7 5258 uint32_t esz = sizeof(ETYPE); \
5259 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5260 uint32_t vta = vext_vta(desc); \
cd01340e
FC
5261 uint32_t i; \
5262 \
f714361e 5263 for (i = env->vstart; i < vl; i++) { \
cd01340e
FC
5264 if (!vm && !vext_elem_mask(v0, i)) { \
5265 continue; \
5266 } \
5267 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5268 } \
f714361e 5269 env->vstart = 0; \
803963f7 5270 /* set tail elements to 1s */ \
5271 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5272}
5273
5274GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5275GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5276GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5277GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5278GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5279GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5280
5281GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5282GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5283GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5284GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5285GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5286GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)