]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
Merge tag 'pull-ppc-20230610' of https://gitlab.com/danielhb/qemu into staging
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc
LZ
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
ce2a0343 26#include "fpu/softfloat.h"
751538d5
LZ
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
2b7168fc
LZ
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 44
d9b7609a
FC
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
c45eff30 53 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
2b7168fc 54 /* only set vill bit. */
d96a271a
LZ
55 env->vill = 1;
56 env->vtype = 0;
2b7168fc
LZ
57 env->vl = 0;
58 env->vstart = 0;
59 return 0;
60 }
61
62 vlmax = vext_get_vlmax(cpu, s2);
63 if (s1 <= vlmax) {
64 vl = s1;
65 } else {
66 vl = vlmax;
67 }
68 env->vl = vl;
69 env->vtype = s2;
70 env->vstart = 0;
ac6bcf4d 71 env->vill = 0;
2b7168fc
LZ
72 return vl;
73}
751538d5
LZ
74
75/*
76 * Note that vector data is stored in host-endian 64-bit chunks,
77 * so addressing units smaller than that needs a host-endian fixup.
78 */
e03b5686 79#if HOST_BIG_ENDIAN
751538d5
LZ
80#define H1(x) ((x) ^ 7)
81#define H1_2(x) ((x) ^ 6)
82#define H1_4(x) ((x) ^ 4)
83#define H2(x) ((x) ^ 3)
84#define H4(x) ((x) ^ 1)
85#define H8(x) ((x))
86#else
87#define H1(x) (x)
88#define H1_2(x) (x)
89#define H1_4(x) (x)
90#define H2(x) (x)
91#define H4(x) (x)
92#define H8(x) (x)
93#endif
94
95static inline uint32_t vext_nf(uint32_t desc)
96{
97 return FIELD_EX32(simd_data(desc), VDATA, NF);
98}
99
751538d5
LZ
100static inline uint32_t vext_vm(uint32_t desc)
101{
102 return FIELD_EX32(simd_data(desc), VDATA, VM);
103}
104
33f1beaf
FC
105/*
106 * Encode LMUL to lmul as following:
107 * LMUL vlmul lmul
108 * 1 000 0
109 * 2 001 1
110 * 4 010 2
111 * 8 011 3
112 * - 100 -
113 * 1/8 101 -3
114 * 1/4 110 -2
115 * 1/2 111 -1
116 */
117static inline int32_t vext_lmul(uint32_t desc)
751538d5 118{
33f1beaf 119 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
120}
121
f1eed927 122static inline uint32_t vext_vta(uint32_t desc)
123{
124 return FIELD_EX32(simd_data(desc), VDATA, VTA);
125}
126
355d5584
YTC
127static inline uint32_t vext_vma(uint32_t desc)
128{
129 return FIELD_EX32(simd_data(desc), VDATA, VMA);
130}
131
5c19fc15 132static inline uint32_t vext_vta_all_1s(uint32_t desc)
133{
134 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
135}
136
751538d5 137/*
5a9f8e15 138 * Get the maximum number of elements can be operated.
751538d5 139 *
c7b8a421 140 * log2_esz: log2 of element size in bytes.
751538d5 141 */
c7b8a421 142static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 143{
5a9f8e15 144 /*
8a4b5257 145 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
146 * so vlen in bytes (vlenb) is encoded as maxsz.
147 */
148 uint32_t vlenb = simd_maxsz(desc);
149
150 /* Return VLMAX */
c7b8a421 151 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 152 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
153}
154
f1eed927 155/*
156 * Get number of total elements, including prestart, body and tail elements.
157 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
158 * are held in the same vector register.
159 */
160static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
161 uint32_t esz)
162{
163 uint32_t vlenb = simd_maxsz(desc);
164 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
165 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
166 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
167 return (vlenb << emul) / esz;
168}
169
d6b9d930
LZ
170static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
171{
172 return (addr & env->cur_pmmask) | env->cur_pmbase;
173}
174
751538d5
LZ
175/*
176 * This function checks watchpoint before real load operation.
177 *
178 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
179 * In user mode, there is no watchpoint support now.
180 *
181 * It will trigger an exception if there is no mapping in TLB
182 * and page table walk can't fill the TLB entry. Then the guest
183 * software can return here after process the exception or never return.
184 */
185static void probe_pages(CPURISCVState *env, target_ulong addr,
186 target_ulong len, uintptr_t ra,
187 MMUAccessType access_type)
188{
189 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
190 target_ulong curlen = MIN(pagelen, len);
191
d6b9d930 192 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
193 cpu_mmu_index(env, false), ra);
194 if (len > curlen) {
195 addr += curlen;
196 curlen = len - curlen;
d6b9d930 197 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
198 cpu_mmu_index(env, false), ra);
199 }
200}
201
f1eed927 202/* set agnostic elements to 1s */
203static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
204 uint32_t tot)
205{
206 if (is_agnostic == 0) {
207 /* policy undisturbed */
208 return;
209 }
210 if (tot - cnt == 0) {
c1dadb84 211 return;
f1eed927 212 }
213 memset(base + cnt, -1, tot - cnt);
214}
215
f9298de5
FC
216static inline void vext_set_elem_mask(void *v0, int index,
217 uint8_t value)
3a6f8f68 218{
f9298de5
FC
219 int idx = index / 64;
220 int pos = index % 64;
3a6f8f68 221 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 222 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 223}
751538d5 224
f9298de5
FC
225/*
226 * Earlier designs (pre-0.9) had a varying number of bits
227 * per mask value (MLEN). In the 0.9 design, MLEN=1.
228 * (Section 4.5)
229 */
230static inline int vext_elem_mask(void *v0, int index)
751538d5 231{
f9298de5
FC
232 int idx = index / 64;
233 int pos = index % 64;
751538d5
LZ
234 return (((uint64_t *)v0)[idx] >> pos) & 1;
235}
236
237/* elements operations for load and store */
238typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
239 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 240
79556fb6 241#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
242static void NAME(CPURISCVState *env, abi_ptr addr, \
243 uint32_t idx, void *vd, uintptr_t retaddr)\
244{ \
751538d5 245 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 246 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
247} \
248
79556fb6
FC
249GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
250GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
251GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
252GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
253
254#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
255static void NAME(CPURISCVState *env, abi_ptr addr, \
256 uint32_t idx, void *vd, uintptr_t retaddr)\
257{ \
258 ETYPE data = *((ETYPE *)vd + H(idx)); \
259 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
260}
261
751538d5
LZ
262GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
263GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
264GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
265GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
266
e130683f
DHB
267static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl,
268 void *vd, uint32_t desc, uint32_t nf,
269 uint32_t esz, uint32_t max_elems)
270{
271 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
86247c51 272 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
e130683f
DHB
273 uint32_t vta = vext_vta(desc);
274 uint32_t registers_used;
275 int k;
276
277 for (k = 0; k < nf; ++k) {
278 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
279 (k * max_elems + max_elems) * esz);
280 }
281
282 if (nf * max_elems % total_elems != 0) {
283 registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
284 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
285 registers_used * vlenb);
286 }
287}
288
751538d5 289/*
3b57254d 290 * stride: access vector element from strided memory
751538d5
LZ
291 */
292static void
293vext_ldst_stride(void *vd, void *v0, target_ulong base,
294 target_ulong stride, CPURISCVState *env,
295 uint32_t desc, uint32_t vm,
3479a814 296 vext_ldst_elem_fn *ldst_elem,
c7b8a421 297 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
298{
299 uint32_t i, k;
300 uint32_t nf = vext_nf(desc);
c7b8a421 301 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 302 uint32_t esz = 1 << log2_esz;
265ecd4c 303 uint32_t vma = vext_vma(desc);
751538d5 304
f714361e 305 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
751538d5 306 k = 0;
751538d5 307 while (k < nf) {
265ecd4c
YTC
308 if (!vm && !vext_elem_mask(v0, i)) {
309 /* set masked-off elements to 1s */
310 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
311 (i + k * max_elems + 1) * esz);
312 k++;
313 continue;
314 }
c7b8a421 315 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 316 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
317 k++;
318 }
319 }
f714361e 320 env->vstart = 0;
e130683f
DHB
321
322 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
751538d5
LZ
323}
324
79556fb6 325#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
326void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
327 target_ulong stride, CPURISCVState *env, \
328 uint32_t desc) \
329{ \
330 uint32_t vm = vext_vm(desc); \
331 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 332 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
333}
334
79556fb6
FC
335GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
336GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
337GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
338GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
339
340#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
341void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
342 target_ulong stride, CPURISCVState *env, \
343 uint32_t desc) \
344{ \
345 uint32_t vm = vext_vm(desc); \
346 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 347 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
348}
349
79556fb6
FC
350GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
351GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
352GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
353GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
354
355/*
3b57254d 356 * unit-stride: access elements stored contiguously in memory
751538d5
LZ
357 */
358
3b57254d 359/* unmasked unit-stride load and store operation */
751538d5
LZ
360static void
361vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 362 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 363 uintptr_t ra)
751538d5
LZ
364{
365 uint32_t i, k;
366 uint32_t nf = vext_nf(desc);
c7b8a421 367 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 368 uint32_t esz = 1 << log2_esz;
751538d5 369
751538d5 370 /* load bytes from guest memory */
5c89e9c0 371 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
372 k = 0;
373 while (k < nf) {
c7b8a421 374 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 375 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
376 k++;
377 }
378 }
f714361e 379 env->vstart = 0;
e130683f
DHB
380
381 vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems);
751538d5
LZ
382}
383
384/*
246f8796
WL
385 * masked unit-stride load and store operation will be a special case of
386 * stride, stride = NF * sizeof (MTYPE)
751538d5
LZ
387 */
388
79556fb6 389#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
390void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
391 CPURISCVState *env, uint32_t desc) \
392{ \
5a9f8e15 393 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 394 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 395 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
396} \
397 \
398void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
399 CPURISCVState *env, uint32_t desc) \
400{ \
3479a814 401 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 402 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
403}
404
79556fb6
FC
405GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
406GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
407GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
408GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
409
5c89e9c0
FC
410#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
411void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
412 CPURISCVState *env, uint32_t desc) \
413{ \
414 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
415 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 416 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
417} \
418 \
419void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
420 CPURISCVState *env, uint32_t desc) \
421{ \
422 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 423 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
424}
425
79556fb6
FC
426GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
427GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
428GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
429GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 430
26086aea 431/*
3b57254d 432 * unit stride mask load and store, EEW = 1
26086aea
FC
433 */
434void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
435 CPURISCVState *env, uint32_t desc)
436{
437 /* evl = ceil(vl/8) */
438 uint8_t evl = (env->vl + 7) >> 3;
439 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 440 0, evl, GETPC());
26086aea
FC
441}
442
443void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
444 CPURISCVState *env, uint32_t desc)
445{
446 /* evl = ceil(vl/8) */
447 uint8_t evl = (env->vl + 7) >> 3;
448 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 449 0, evl, GETPC());
26086aea
FC
450}
451
f732560e 452/*
3b57254d 453 * index: access vector element from indexed memory
f732560e
LZ
454 */
455typedef target_ulong vext_get_index_addr(target_ulong base,
456 uint32_t idx, void *vs2);
457
458#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
459static target_ulong NAME(target_ulong base, \
460 uint32_t idx, void *vs2) \
461{ \
462 return (base + *((ETYPE *)vs2 + H(idx))); \
463}
464
83fcd573
FC
465GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
466GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
467GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
468GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
469
470static inline void
471vext_ldst_index(void *vd, void *v0, target_ulong base,
472 void *vs2, CPURISCVState *env, uint32_t desc,
473 vext_get_index_addr get_index_addr,
474 vext_ldst_elem_fn *ldst_elem,
c7b8a421 475 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
476{
477 uint32_t i, k;
478 uint32_t nf = vext_nf(desc);
479 uint32_t vm = vext_vm(desc);
c7b8a421 480 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 481 uint32_t esz = 1 << log2_esz;
265ecd4c 482 uint32_t vma = vext_vma(desc);
f732560e 483
f732560e 484 /* load bytes from guest memory */
f714361e 485 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f714361e 486 k = 0;
f732560e 487 while (k < nf) {
265ecd4c
YTC
488 if (!vm && !vext_elem_mask(v0, i)) {
489 /* set masked-off elements to 1s */
490 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
491 (i + k * max_elems + 1) * esz);
492 k++;
493 continue;
494 }
c7b8a421 495 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 496 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
497 k++;
498 }
499 }
f714361e 500 env->vstart = 0;
e130683f
DHB
501
502 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
f732560e
LZ
503}
504
08b9d0ed 505#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
506void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
507 void *vs2, CPURISCVState *env, uint32_t desc) \
508{ \
509 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 510 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
511}
512
08b9d0ed
FC
513GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
514GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
515GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
516GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
517GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
518GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
519GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
520GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
521GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
522GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
523GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
524GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
525GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
526GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
527GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
528GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
529
530#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
531void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
532 void *vs2, CPURISCVState *env, uint32_t desc) \
533{ \
534 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 535 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 536 GETPC()); \
f732560e
LZ
537}
538
08b9d0ed
FC
539GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
540GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
541GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
542GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
543GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
544GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
545GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
546GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
547GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
548GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
549GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
550GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
551GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
552GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
553GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
554GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
555
556/*
3b57254d 557 * unit-stride fault-only-fisrt load instructions
022b4ecf
LZ
558 */
559static inline void
560vext_ldff(void *vd, void *v0, target_ulong base,
561 CPURISCVState *env, uint32_t desc,
562 vext_ldst_elem_fn *ldst_elem,
c7b8a421 563 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
564{
565 void *host;
566 uint32_t i, k, vl = 0;
022b4ecf
LZ
567 uint32_t nf = vext_nf(desc);
568 uint32_t vm = vext_vm(desc);
c7b8a421 569 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 570 uint32_t esz = 1 << log2_esz;
265ecd4c 571 uint32_t vma = vext_vma(desc);
022b4ecf
LZ
572 target_ulong addr, offset, remain;
573
3b57254d 574 /* probe every access */
f714361e 575 for (i = env->vstart; i < env->vl; i++) {
f9298de5 576 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
577 continue;
578 }
c7b8a421 579 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 580 if (i == 0) {
c7b8a421 581 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
582 } else {
583 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 584 remain = nf << log2_esz;
022b4ecf
LZ
585 while (remain > 0) {
586 offset = -(addr | TARGET_PAGE_MASK);
587 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
588 cpu_mmu_index(env, false));
589 if (host) {
590#ifdef CONFIG_USER_ONLY
01d09525 591 if (page_check_range(addr, offset, PAGE_READ) < 0) {
022b4ecf
LZ
592 vl = i;
593 goto ProbeSuccess;
594 }
595#else
01d09525 596 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
597#endif
598 } else {
599 vl = i;
600 goto ProbeSuccess;
601 }
602 if (remain <= offset) {
603 break;
604 }
605 remain -= offset;
d6b9d930 606 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
607 }
608 }
609 }
610ProbeSuccess:
611 /* load bytes from guest memory */
612 if (vl != 0) {
613 env->vl = vl;
614 }
f714361e 615 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 616 k = 0;
022b4ecf 617 while (k < nf) {
265ecd4c
YTC
618 if (!vm && !vext_elem_mask(v0, i)) {
619 /* set masked-off elements to 1s */
620 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
621 (i + k * max_elems + 1) * esz);
622 k++;
623 continue;
624 }
c7b8a421 625 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 626 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
627 k++;
628 }
629 }
f714361e 630 env->vstart = 0;
e130683f
DHB
631
632 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
022b4ecf
LZ
633}
634
d3e5e2ff
FC
635#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
636void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
637 CPURISCVState *env, uint32_t desc) \
638{ \
639 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 640 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
641}
642
d3e5e2ff
FC
643GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
644GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
645GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
646GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 647
268fcca6
LZ
648#define DO_SWAP(N, M) (M)
649#define DO_AND(N, M) (N & M)
650#define DO_XOR(N, M) (N ^ M)
651#define DO_OR(N, M) (N | M)
652#define DO_ADD(N, M) (N + M)
653
268fcca6
LZ
654/* Signed min/max */
655#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
656#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
657
658/* Unsigned min/max */
659#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
660#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
661
30206bd8 662/*
3b57254d 663 * load and store whole register instructions
30206bd8
FC
664 */
665static void
666vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 667 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 668{
f714361e 669 uint32_t i, k, off, pos;
30206bd8 670 uint32_t nf = vext_nf(desc);
86247c51 671 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
c7b8a421 672 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 673
f714361e
FC
674 k = env->vstart / max_elems;
675 off = env->vstart % max_elems;
30206bd8 676
f714361e
FC
677 if (off) {
678 /* load/store rest of elements of current segment pointed by vstart */
679 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 680 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
246f8796
WL
681 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd,
682 ra);
f714361e
FC
683 }
684 k++;
685 }
686
687 /* load/store elements for rest of segments */
688 for (; k < nf; k++) {
689 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 690 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 691 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
692 }
693 }
f714361e
FC
694
695 env->vstart = 0;
30206bd8
FC
696}
697
698#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
699void HELPER(NAME)(void *vd, target_ulong base, \
700 CPURISCVState *env, uint32_t desc) \
701{ \
702 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 703 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
704}
705
706GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
707GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
708GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
709GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
710GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
711GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
712GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
713GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
714GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
715GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
716GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
717GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
718GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
719GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
720GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
721GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
722
723#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
724void HELPER(NAME)(void *vd, target_ulong base, \
725 CPURISCVState *env, uint32_t desc) \
726{ \
727 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 728 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
729}
730
731GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
732GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
733GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
734GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
735
43740e3a 736/*
3b57254d 737 * Vector Integer Arithmetic Instructions
43740e3a
LZ
738 */
739
740/* expand macro args before macro */
741#define RVVCALL(macro, ...) macro(__VA_ARGS__)
742
743/* (TD, T1, T2, TX1, TX2) */
744#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
745#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
746#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
747#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
748#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
749#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
750#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
751#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
752#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
753#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
754#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
755#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
756#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
757#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
758#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
759#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
760#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
761#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
762#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
763#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
764#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
765#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
766#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
767#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
768#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
769#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
770#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
771#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
772#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
773#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
774
775/* operation of two vector elements */
776typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
777
778#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
779static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
780{ \
781 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
782 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
783 *((TD *)vd + HD(i)) = OP(s2, s1); \
784}
785#define DO_SUB(N, M) (N - M)
786#define DO_RSUB(N, M) (M - N)
787
788RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
789RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
790RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
791RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
792RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
793RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
794RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
795RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
796
797static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
798 CPURISCVState *env, uint32_t desc,
f1eed927 799 opivv2_fn *fn, uint32_t esz)
43740e3a 800{
43740e3a
LZ
801 uint32_t vm = vext_vm(desc);
802 uint32_t vl = env->vl;
f1eed927 803 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
804 uint32_t vta = vext_vta(desc);
355d5584 805 uint32_t vma = vext_vma(desc);
43740e3a
LZ
806 uint32_t i;
807
f714361e 808 for (i = env->vstart; i < vl; i++) {
f9298de5 809 if (!vm && !vext_elem_mask(v0, i)) {
355d5584
YTC
810 /* set masked-off elements to 1s */
811 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
812 continue;
813 }
814 fn(vd, vs1, vs2, i);
815 }
f714361e 816 env->vstart = 0;
f1eed927 817 /* set tail elements to 1s */
818 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
819}
820
821/* generate the helpers for OPIVV */
f1eed927 822#define GEN_VEXT_VV(NAME, ESZ) \
43740e3a
LZ
823void HELPER(NAME)(void *vd, void *v0, void *vs1, \
824 void *vs2, CPURISCVState *env, \
825 uint32_t desc) \
826{ \
8a085fb2 827 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
f1eed927 828 do_##NAME, ESZ); \
43740e3a
LZ
829}
830
f1eed927 831GEN_VEXT_VV(vadd_vv_b, 1)
832GEN_VEXT_VV(vadd_vv_h, 2)
833GEN_VEXT_VV(vadd_vv_w, 4)
834GEN_VEXT_VV(vadd_vv_d, 8)
835GEN_VEXT_VV(vsub_vv_b, 1)
836GEN_VEXT_VV(vsub_vv_h, 2)
837GEN_VEXT_VV(vsub_vv_w, 4)
838GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a
LZ
839
840typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
841
842/*
843 * (T1)s1 gives the real operator type.
844 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
845 */
846#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
847static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
848{ \
849 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
850 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
851}
852
853RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
854RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
855RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
856RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
857RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
858RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
859RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
860RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
861RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
862RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
863RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
864RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
865
866static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
867 CPURISCVState *env, uint32_t desc,
5c19fc15 868 opivx2_fn fn, uint32_t esz)
43740e3a 869{
43740e3a
LZ
870 uint32_t vm = vext_vm(desc);
871 uint32_t vl = env->vl;
5c19fc15 872 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
873 uint32_t vta = vext_vta(desc);
bce9a636 874 uint32_t vma = vext_vma(desc);
43740e3a
LZ
875 uint32_t i;
876
f714361e 877 for (i = env->vstart; i < vl; i++) {
f9298de5 878 if (!vm && !vext_elem_mask(v0, i)) {
bce9a636
YTC
879 /* set masked-off elements to 1s */
880 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
881 continue;
882 }
883 fn(vd, s1, vs2, i);
884 }
f714361e 885 env->vstart = 0;
5c19fc15 886 /* set tail elements to 1s */
887 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
888}
889
890/* generate the helpers for OPIVX */
5c19fc15 891#define GEN_VEXT_VX(NAME, ESZ) \
43740e3a
LZ
892void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
893 void *vs2, CPURISCVState *env, \
894 uint32_t desc) \
895{ \
8a085fb2 896 do_vext_vx(vd, v0, s1, vs2, env, desc, \
5c19fc15 897 do_##NAME, ESZ); \
3479a814
FC
898}
899
5c19fc15 900GEN_VEXT_VX(vadd_vx_b, 1)
901GEN_VEXT_VX(vadd_vx_h, 2)
902GEN_VEXT_VX(vadd_vx_w, 4)
903GEN_VEXT_VX(vadd_vx_d, 8)
904GEN_VEXT_VX(vsub_vx_b, 1)
905GEN_VEXT_VX(vsub_vx_h, 2)
906GEN_VEXT_VX(vsub_vx_w, 4)
907GEN_VEXT_VX(vsub_vx_d, 8)
908GEN_VEXT_VX(vrsub_vx_b, 1)
909GEN_VEXT_VX(vrsub_vx_h, 2)
910GEN_VEXT_VX(vrsub_vx_w, 4)
911GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
912
913void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
914{
915 intptr_t oprsz = simd_oprsz(desc);
916 intptr_t i;
917
918 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
919 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
920 }
921}
922
923void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
924{
925 intptr_t oprsz = simd_oprsz(desc);
926 intptr_t i;
927
928 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
929 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
930 }
931}
932
933void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
934{
935 intptr_t oprsz = simd_oprsz(desc);
936 intptr_t i;
937
938 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
939 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
940 }
941}
942
943void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
944{
945 intptr_t oprsz = simd_oprsz(desc);
946 intptr_t i;
947
948 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
949 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
950 }
951}
8fcdf776
LZ
952
953/* Vector Widening Integer Add/Subtract */
954#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
955#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
956#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
957#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
958#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
959#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
960#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
961#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
962#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
963#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
964#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
965#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
966RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
967RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
968RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
969RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
970RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
971RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
972RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
973RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
974RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
975RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
976RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
977RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
978RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
979RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
980RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
981RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
982RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
983RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
984RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
985RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
986RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
987RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
988RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
989RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 990GEN_VEXT_VV(vwaddu_vv_b, 2)
991GEN_VEXT_VV(vwaddu_vv_h, 4)
992GEN_VEXT_VV(vwaddu_vv_w, 8)
993GEN_VEXT_VV(vwsubu_vv_b, 2)
994GEN_VEXT_VV(vwsubu_vv_h, 4)
995GEN_VEXT_VV(vwsubu_vv_w, 8)
996GEN_VEXT_VV(vwadd_vv_b, 2)
997GEN_VEXT_VV(vwadd_vv_h, 4)
998GEN_VEXT_VV(vwadd_vv_w, 8)
999GEN_VEXT_VV(vwsub_vv_b, 2)
1000GEN_VEXT_VV(vwsub_vv_h, 4)
1001GEN_VEXT_VV(vwsub_vv_w, 8)
1002GEN_VEXT_VV(vwaddu_wv_b, 2)
1003GEN_VEXT_VV(vwaddu_wv_h, 4)
1004GEN_VEXT_VV(vwaddu_wv_w, 8)
1005GEN_VEXT_VV(vwsubu_wv_b, 2)
1006GEN_VEXT_VV(vwsubu_wv_h, 4)
1007GEN_VEXT_VV(vwsubu_wv_w, 8)
1008GEN_VEXT_VV(vwadd_wv_b, 2)
1009GEN_VEXT_VV(vwadd_wv_h, 4)
1010GEN_VEXT_VV(vwadd_wv_w, 8)
1011GEN_VEXT_VV(vwsub_wv_b, 2)
1012GEN_VEXT_VV(vwsub_wv_h, 4)
1013GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
1014
1015RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1016RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1017RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1018RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1019RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1020RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1021RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1022RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1023RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1024RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1025RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1026RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1027RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1028RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1029RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1030RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1031RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1032RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1033RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1034RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1035RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1036RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1037RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1038RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 1039GEN_VEXT_VX(vwaddu_vx_b, 2)
1040GEN_VEXT_VX(vwaddu_vx_h, 4)
1041GEN_VEXT_VX(vwaddu_vx_w, 8)
1042GEN_VEXT_VX(vwsubu_vx_b, 2)
1043GEN_VEXT_VX(vwsubu_vx_h, 4)
1044GEN_VEXT_VX(vwsubu_vx_w, 8)
1045GEN_VEXT_VX(vwadd_vx_b, 2)
1046GEN_VEXT_VX(vwadd_vx_h, 4)
1047GEN_VEXT_VX(vwadd_vx_w, 8)
1048GEN_VEXT_VX(vwsub_vx_b, 2)
1049GEN_VEXT_VX(vwsub_vx_h, 4)
1050GEN_VEXT_VX(vwsub_vx_w, 8)
1051GEN_VEXT_VX(vwaddu_wx_b, 2)
1052GEN_VEXT_VX(vwaddu_wx_h, 4)
1053GEN_VEXT_VX(vwaddu_wx_w, 8)
1054GEN_VEXT_VX(vwsubu_wx_b, 2)
1055GEN_VEXT_VX(vwsubu_wx_h, 4)
1056GEN_VEXT_VX(vwsubu_wx_w, 8)
1057GEN_VEXT_VX(vwadd_wx_b, 2)
1058GEN_VEXT_VX(vwadd_wx_h, 4)
1059GEN_VEXT_VX(vwadd_wx_w, 8)
1060GEN_VEXT_VX(vwsub_wx_b, 2)
1061GEN_VEXT_VX(vwsub_wx_h, 4)
1062GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
1063
1064/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1065#define DO_VADC(N, M, C) (N + M + C)
1066#define DO_VSBC(N, M, C) (N - M - C)
1067
3479a814 1068#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1069void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1070 CPURISCVState *env, uint32_t desc) \
1071{ \
3a6f8f68 1072 uint32_t vl = env->vl; \
5c19fc15 1073 uint32_t esz = sizeof(ETYPE); \
1074 uint32_t total_elems = \
1075 vext_get_total_elems(env, desc, esz); \
1076 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1077 uint32_t i; \
1078 \
f714361e 1079 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1080 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1081 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1082 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1083 \
1084 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1085 } \
f714361e 1086 env->vstart = 0; \
5c19fc15 1087 /* set tail elements to 1s */ \
1088 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1089}
1090
3479a814
FC
1091GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1092GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1093GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1094GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1095
3479a814
FC
1096GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1097GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1098GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1099GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 1100
3479a814 1101#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1102void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1103 CPURISCVState *env, uint32_t desc) \
1104{ \
3a6f8f68 1105 uint32_t vl = env->vl; \
5c19fc15 1106 uint32_t esz = sizeof(ETYPE); \
1107 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1108 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1109 uint32_t i; \
1110 \
f714361e 1111 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1112 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1113 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1114 \
1115 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1116 } \
c45eff30 1117 env->vstart = 0; \
5c19fc15 1118 /* set tail elements to 1s */ \
1119 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1120}
1121
3479a814
FC
1122GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1123GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1124GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1125GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1126
3479a814
FC
1127GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1128GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1129GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1130GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
1131
1132#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1133 (__typeof(N))(N + M) < N)
1134#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1135
1136#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1137void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1138 CPURISCVState *env, uint32_t desc) \
1139{ \
3a6f8f68 1140 uint32_t vl = env->vl; \
bb45485a 1141 uint32_t vm = vext_vm(desc); \
86247c51 1142 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5c19fc15 1143 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1144 uint32_t i; \
1145 \
f714361e 1146 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1147 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1148 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1149 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1150 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1151 } \
f714361e 1152 env->vstart = 0; \
3b57254d
WL
1153 /*
1154 * mask destination register are always tail-agnostic
1155 * set tail elements to 1s
1156 */ \
5c19fc15 1157 if (vta_all_1s) { \
1158 for (; i < total_elems; i++) { \
1159 vext_set_elem_mask(vd, i, 1); \
1160 } \
1161 } \
3a6f8f68
LZ
1162}
1163
1164GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1165GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1166GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1167GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1168
1169GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1170GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1171GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1172GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1173
1174#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1175void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1176 void *vs2, CPURISCVState *env, uint32_t desc) \
1177{ \
3a6f8f68 1178 uint32_t vl = env->vl; \
bb45485a 1179 uint32_t vm = vext_vm(desc); \
86247c51 1180 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5c19fc15 1181 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1182 uint32_t i; \
1183 \
f714361e 1184 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1185 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1186 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1187 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1188 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1189 } \
f714361e 1190 env->vstart = 0; \
3b57254d
WL
1191 /*
1192 * mask destination register are always tail-agnostic
1193 * set tail elements to 1s
1194 */ \
5c19fc15 1195 if (vta_all_1s) { \
1196 for (; i < total_elems; i++) { \
1197 vext_set_elem_mask(vd, i, 1); \
1198 } \
1199 } \
3a6f8f68
LZ
1200}
1201
1202GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1203GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1204GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1205GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1206
1207GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1208GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1209GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1210GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1211
1212/* Vector Bitwise Logical Instructions */
1213RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1214RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1215RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1216RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1217RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1218RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1219RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1220RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1221RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1222RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1223RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1224RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1225GEN_VEXT_VV(vand_vv_b, 1)
1226GEN_VEXT_VV(vand_vv_h, 2)
1227GEN_VEXT_VV(vand_vv_w, 4)
1228GEN_VEXT_VV(vand_vv_d, 8)
1229GEN_VEXT_VV(vor_vv_b, 1)
1230GEN_VEXT_VV(vor_vv_h, 2)
1231GEN_VEXT_VV(vor_vv_w, 4)
1232GEN_VEXT_VV(vor_vv_d, 8)
1233GEN_VEXT_VV(vxor_vv_b, 1)
1234GEN_VEXT_VV(vxor_vv_h, 2)
1235GEN_VEXT_VV(vxor_vv_w, 4)
1236GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1237
1238RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1239RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1240RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1241RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1242RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1243RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1244RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1245RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1246RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1247RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1248RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1249RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1250GEN_VEXT_VX(vand_vx_b, 1)
1251GEN_VEXT_VX(vand_vx_h, 2)
1252GEN_VEXT_VX(vand_vx_w, 4)
1253GEN_VEXT_VX(vand_vx_d, 8)
1254GEN_VEXT_VX(vor_vx_b, 1)
1255GEN_VEXT_VX(vor_vx_h, 2)
1256GEN_VEXT_VX(vor_vx_w, 4)
1257GEN_VEXT_VX(vor_vx_d, 8)
1258GEN_VEXT_VX(vxor_vx_b, 1)
1259GEN_VEXT_VX(vxor_vx_h, 2)
1260GEN_VEXT_VX(vxor_vx_w, 4)
1261GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1262
1263/* Vector Single-Width Bit Shift Instructions */
1264#define DO_SLL(N, M) (N << (M))
1265#define DO_SRL(N, M) (N >> (M))
1266
1267/* generate the helpers for shift instructions with two vector operators */
3479a814 1268#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1269void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1270 void *vs2, CPURISCVState *env, uint32_t desc) \
1271{ \
3277d955
LZ
1272 uint32_t vm = vext_vm(desc); \
1273 uint32_t vl = env->vl; \
7b1bff41 1274 uint32_t esz = sizeof(TS1); \
1275 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1276 uint32_t vta = vext_vta(desc); \
fd93045e 1277 uint32_t vma = vext_vma(desc); \
3277d955
LZ
1278 uint32_t i; \
1279 \
f714361e 1280 for (i = env->vstart; i < vl; i++) { \
f9298de5 1281 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1282 /* set masked-off elements to 1s */ \
1283 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
3277d955
LZ
1284 continue; \
1285 } \
1286 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1287 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1288 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1289 } \
f714361e 1290 env->vstart = 0; \
7b1bff41 1291 /* set tail elements to 1s */ \
1292 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1293}
1294
3479a814
FC
1295GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1296GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1297GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1298GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1299
3479a814
FC
1300GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1301GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1302GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1303GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1304
3479a814
FC
1305GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1306GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1307GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1308GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1309
246f8796
WL
1310/*
1311 * generate the helpers for shift instructions with one vector and one scalar
1312 */
3479a814
FC
1313#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1314void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
1315 void *vs2, CPURISCVState *env, \
1316 uint32_t desc) \
3479a814
FC
1317{ \
1318 uint32_t vm = vext_vm(desc); \
1319 uint32_t vl = env->vl; \
7b1bff41 1320 uint32_t esz = sizeof(TD); \
1321 uint32_t total_elems = \
1322 vext_get_total_elems(env, desc, esz); \
1323 uint32_t vta = vext_vta(desc); \
fd93045e 1324 uint32_t vma = vext_vma(desc); \
3479a814
FC
1325 uint32_t i; \
1326 \
f714361e 1327 for (i = env->vstart; i < vl; i++) { \
3479a814 1328 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1329 /* set masked-off elements to 1s */ \
1330 vext_set_elems_1s(vd, vma, i * esz, \
1331 (i + 1) * esz); \
3479a814
FC
1332 continue; \
1333 } \
1334 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1335 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1336 } \
f714361e 1337 env->vstart = 0; \
7b1bff41 1338 /* set tail elements to 1s */ \
1339 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1340}
1341
1342GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1343GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1344GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1345GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1346
1347GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1348GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1349GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1350GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1351
1352GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1353GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1354GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1355GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1356
1357/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1358GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1359GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1360GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1361GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1362GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1363GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1364GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1365GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1366GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1367GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1368GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1369GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1370
1371/* Vector Integer Comparison Instructions */
1372#define DO_MSEQ(N, M) (N == M)
1373#define DO_MSNE(N, M) (N != M)
1374#define DO_MSLT(N, M) (N < M)
1375#define DO_MSLE(N, M) (N <= M)
1376#define DO_MSGT(N, M) (N > M)
1377
1378#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1379void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1380 CPURISCVState *env, uint32_t desc) \
1381{ \
1366fc79
LZ
1382 uint32_t vm = vext_vm(desc); \
1383 uint32_t vl = env->vl; \
86247c51 1384 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
38581e5c 1385 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1386 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1387 uint32_t i; \
1388 \
f714361e 1389 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1390 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1391 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1392 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1393 /* set masked-off elements to 1s */ \
1394 if (vma) { \
1395 vext_set_elem_mask(vd, i, 1); \
1396 } \
1366fc79
LZ
1397 continue; \
1398 } \
f9298de5 1399 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1400 } \
f714361e 1401 env->vstart = 0; \
3b57254d
WL
1402 /*
1403 * mask destination register are always tail-agnostic
1404 * set tail elements to 1s
1405 */ \
38581e5c 1406 if (vta_all_1s) { \
1407 for (; i < total_elems; i++) { \
1408 vext_set_elem_mask(vd, i, 1); \
1409 } \
1410 } \
1366fc79
LZ
1411}
1412
1413GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1414GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1415GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1416GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1417
1418GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1419GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1420GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1421GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1422
1423GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1424GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1425GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1426GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1427
1428GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1429GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1430GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1431GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1432
1433GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1434GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1435GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1436GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1437
1438GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1439GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1440GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1441GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1442
1443#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1444void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1445 CPURISCVState *env, uint32_t desc) \
1446{ \
1366fc79
LZ
1447 uint32_t vm = vext_vm(desc); \
1448 uint32_t vl = env->vl; \
86247c51 1449 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
38581e5c 1450 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1451 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1452 uint32_t i; \
1453 \
f714361e 1454 for (i = env->vstart; i < vl; i++) { \
1366fc79 1455 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1456 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1457 /* set masked-off elements to 1s */ \
1458 if (vma) { \
1459 vext_set_elem_mask(vd, i, 1); \
1460 } \
1366fc79
LZ
1461 continue; \
1462 } \
f9298de5 1463 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1464 DO_OP(s2, (ETYPE)(target_long)s1)); \
1465 } \
f714361e 1466 env->vstart = 0; \
3b57254d
WL
1467 /*
1468 * mask destination register are always tail-agnostic
1469 * set tail elements to 1s
1470 */ \
38581e5c 1471 if (vta_all_1s) { \
1472 for (; i < total_elems; i++) { \
1473 vext_set_elem_mask(vd, i, 1); \
1474 } \
1475 } \
1366fc79
LZ
1476}
1477
1478GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1479GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1480GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1481GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1482
1483GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1484GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1485GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1486GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1487
1488GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1489GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1490GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1491GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1492
1493GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1494GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1495GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1496GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1497
1498GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1499GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1500GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1501GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1502
1503GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1504GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1505GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1506GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1507
1508GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1509GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1510GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1511GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1512
1513GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1514GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1515GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1516GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1517
1518/* Vector Integer Min/Max Instructions */
1519RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1520RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1521RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1522RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1523RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1524RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1525RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1526RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1527RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1528RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1529RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1530RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1531RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1532RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1533RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1534RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1535GEN_VEXT_VV(vminu_vv_b, 1)
1536GEN_VEXT_VV(vminu_vv_h, 2)
1537GEN_VEXT_VV(vminu_vv_w, 4)
1538GEN_VEXT_VV(vminu_vv_d, 8)
1539GEN_VEXT_VV(vmin_vv_b, 1)
1540GEN_VEXT_VV(vmin_vv_h, 2)
1541GEN_VEXT_VV(vmin_vv_w, 4)
1542GEN_VEXT_VV(vmin_vv_d, 8)
1543GEN_VEXT_VV(vmaxu_vv_b, 1)
1544GEN_VEXT_VV(vmaxu_vv_h, 2)
1545GEN_VEXT_VV(vmaxu_vv_w, 4)
1546GEN_VEXT_VV(vmaxu_vv_d, 8)
1547GEN_VEXT_VV(vmax_vv_b, 1)
1548GEN_VEXT_VV(vmax_vv_h, 2)
1549GEN_VEXT_VV(vmax_vv_w, 4)
1550GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1551
1552RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1553RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1554RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1555RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1556RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1557RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1558RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1559RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1560RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1561RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1562RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1563RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1564RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1565RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1566RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1567RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1568GEN_VEXT_VX(vminu_vx_b, 1)
1569GEN_VEXT_VX(vminu_vx_h, 2)
1570GEN_VEXT_VX(vminu_vx_w, 4)
1571GEN_VEXT_VX(vminu_vx_d, 8)
1572GEN_VEXT_VX(vmin_vx_b, 1)
1573GEN_VEXT_VX(vmin_vx_h, 2)
1574GEN_VEXT_VX(vmin_vx_w, 4)
1575GEN_VEXT_VX(vmin_vx_d, 8)
1576GEN_VEXT_VX(vmaxu_vx_b, 1)
1577GEN_VEXT_VX(vmaxu_vx_h, 2)
1578GEN_VEXT_VX(vmaxu_vx_w, 4)
1579GEN_VEXT_VX(vmaxu_vx_d, 8)
1580GEN_VEXT_VX(vmax_vx_b, 1)
1581GEN_VEXT_VX(vmax_vx_h, 2)
1582GEN_VEXT_VX(vmax_vx_w, 4)
1583GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1584
1585/* Vector Single-Width Integer Multiply Instructions */
1586#define DO_MUL(N, M) (N * M)
1587RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1588RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1589RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1590RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1591GEN_VEXT_VV(vmul_vv_b, 1)
1592GEN_VEXT_VV(vmul_vv_h, 2)
1593GEN_VEXT_VV(vmul_vv_w, 4)
1594GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1595
1596static int8_t do_mulh_b(int8_t s2, int8_t s1)
1597{
1598 return (int16_t)s2 * (int16_t)s1 >> 8;
1599}
1600
1601static int16_t do_mulh_h(int16_t s2, int16_t s1)
1602{
1603 return (int32_t)s2 * (int32_t)s1 >> 16;
1604}
1605
1606static int32_t do_mulh_w(int32_t s2, int32_t s1)
1607{
1608 return (int64_t)s2 * (int64_t)s1 >> 32;
1609}
1610
1611static int64_t do_mulh_d(int64_t s2, int64_t s1)
1612{
1613 uint64_t hi_64, lo_64;
1614
1615 muls64(&lo_64, &hi_64, s1, s2);
1616 return hi_64;
1617}
1618
1619static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1620{
1621 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1622}
1623
1624static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1625{
1626 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1627}
1628
1629static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1630{
1631 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1632}
1633
1634static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1635{
1636 uint64_t hi_64, lo_64;
1637
1638 mulu64(&lo_64, &hi_64, s2, s1);
1639 return hi_64;
1640}
1641
1642static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1643{
1644 return (int16_t)s2 * (uint16_t)s1 >> 8;
1645}
1646
1647static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1648{
1649 return (int32_t)s2 * (uint32_t)s1 >> 16;
1650}
1651
1652static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1653{
1654 return (int64_t)s2 * (uint64_t)s1 >> 32;
1655}
1656
1657/*
1658 * Let A = signed operand,
1659 * B = unsigned operand
1660 * P = mulu64(A, B), unsigned product
1661 *
1662 * LET X = 2 ** 64 - A, 2's complement of A
1663 * SP = signed product
1664 * THEN
1665 * IF A < 0
1666 * SP = -X * B
1667 * = -(2 ** 64 - A) * B
1668 * = A * B - 2 ** 64 * B
1669 * = P - 2 ** 64 * B
1670 * ELSE
1671 * SP = P
1672 * THEN
1673 * HI_P -= (A < 0 ? B : 0)
1674 */
1675
1676static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1677{
1678 uint64_t hi_64, lo_64;
1679
1680 mulu64(&lo_64, &hi_64, s2, s1);
1681
1682 hi_64 -= s2 < 0 ? s1 : 0;
1683 return hi_64;
1684}
1685
1686RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1687RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1688RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1689RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1690RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1691RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1692RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1693RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1694RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1695RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1696RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1697RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1698GEN_VEXT_VV(vmulh_vv_b, 1)
1699GEN_VEXT_VV(vmulh_vv_h, 2)
1700GEN_VEXT_VV(vmulh_vv_w, 4)
1701GEN_VEXT_VV(vmulh_vv_d, 8)
1702GEN_VEXT_VV(vmulhu_vv_b, 1)
1703GEN_VEXT_VV(vmulhu_vv_h, 2)
1704GEN_VEXT_VV(vmulhu_vv_w, 4)
1705GEN_VEXT_VV(vmulhu_vv_d, 8)
1706GEN_VEXT_VV(vmulhsu_vv_b, 1)
1707GEN_VEXT_VV(vmulhsu_vv_h, 2)
1708GEN_VEXT_VV(vmulhsu_vv_w, 4)
1709GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1710
1711RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1712RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1713RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1714RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1715RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1716RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1717RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1718RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1719RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1720RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1721RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1722RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1723RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1724RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1725RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1726RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1727GEN_VEXT_VX(vmul_vx_b, 1)
1728GEN_VEXT_VX(vmul_vx_h, 2)
1729GEN_VEXT_VX(vmul_vx_w, 4)
1730GEN_VEXT_VX(vmul_vx_d, 8)
1731GEN_VEXT_VX(vmulh_vx_b, 1)
1732GEN_VEXT_VX(vmulh_vx_h, 2)
1733GEN_VEXT_VX(vmulh_vx_w, 4)
1734GEN_VEXT_VX(vmulh_vx_d, 8)
1735GEN_VEXT_VX(vmulhu_vx_b, 1)
1736GEN_VEXT_VX(vmulhu_vx_h, 2)
1737GEN_VEXT_VX(vmulhu_vx_w, 4)
1738GEN_VEXT_VX(vmulhu_vx_d, 8)
1739GEN_VEXT_VX(vmulhsu_vx_b, 1)
1740GEN_VEXT_VX(vmulhsu_vx_h, 2)
1741GEN_VEXT_VX(vmulhsu_vx_w, 4)
1742GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1743
1744/* Vector Integer Divide Instructions */
1745#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1746#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
c45eff30 1747#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
85e6658c 1748 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
c45eff30 1749#define DO_REM(N, M) (unlikely(M == 0) ? N : \
85e6658c
LZ
1750 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1751
1752RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1753RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1754RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1755RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1756RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1757RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1758RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1759RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1760RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1761RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1762RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1763RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1764RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1765RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1766RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1767RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1768GEN_VEXT_VV(vdivu_vv_b, 1)
1769GEN_VEXT_VV(vdivu_vv_h, 2)
1770GEN_VEXT_VV(vdivu_vv_w, 4)
1771GEN_VEXT_VV(vdivu_vv_d, 8)
1772GEN_VEXT_VV(vdiv_vv_b, 1)
1773GEN_VEXT_VV(vdiv_vv_h, 2)
1774GEN_VEXT_VV(vdiv_vv_w, 4)
1775GEN_VEXT_VV(vdiv_vv_d, 8)
1776GEN_VEXT_VV(vremu_vv_b, 1)
1777GEN_VEXT_VV(vremu_vv_h, 2)
1778GEN_VEXT_VV(vremu_vv_w, 4)
1779GEN_VEXT_VV(vremu_vv_d, 8)
1780GEN_VEXT_VV(vrem_vv_b, 1)
1781GEN_VEXT_VV(vrem_vv_h, 2)
1782GEN_VEXT_VV(vrem_vv_w, 4)
1783GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1784
1785RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1786RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1787RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1788RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1789RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1790RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1791RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1792RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1793RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1794RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1795RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1796RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1797RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1798RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1799RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1800RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1801GEN_VEXT_VX(vdivu_vx_b, 1)
1802GEN_VEXT_VX(vdivu_vx_h, 2)
1803GEN_VEXT_VX(vdivu_vx_w, 4)
1804GEN_VEXT_VX(vdivu_vx_d, 8)
1805GEN_VEXT_VX(vdiv_vx_b, 1)
1806GEN_VEXT_VX(vdiv_vx_h, 2)
1807GEN_VEXT_VX(vdiv_vx_w, 4)
1808GEN_VEXT_VX(vdiv_vx_d, 8)
1809GEN_VEXT_VX(vremu_vx_b, 1)
1810GEN_VEXT_VX(vremu_vx_h, 2)
1811GEN_VEXT_VX(vremu_vx_w, 4)
1812GEN_VEXT_VX(vremu_vx_d, 8)
1813GEN_VEXT_VX(vrem_vx_b, 1)
1814GEN_VEXT_VX(vrem_vx_h, 2)
1815GEN_VEXT_VX(vrem_vx_w, 4)
1816GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1817
1818/* Vector Widening Integer Multiply Instructions */
1819RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1820RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1821RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1822RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1823RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1824RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1825RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1826RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1827RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1828GEN_VEXT_VV(vwmul_vv_b, 2)
1829GEN_VEXT_VV(vwmul_vv_h, 4)
1830GEN_VEXT_VV(vwmul_vv_w, 8)
1831GEN_VEXT_VV(vwmulu_vv_b, 2)
1832GEN_VEXT_VV(vwmulu_vv_h, 4)
1833GEN_VEXT_VV(vwmulu_vv_w, 8)
1834GEN_VEXT_VV(vwmulsu_vv_b, 2)
1835GEN_VEXT_VV(vwmulsu_vv_h, 4)
1836GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1837
1838RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1839RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1840RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1841RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1842RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1843RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1844RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1845RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1846RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1847GEN_VEXT_VX(vwmul_vx_b, 2)
1848GEN_VEXT_VX(vwmul_vx_h, 4)
1849GEN_VEXT_VX(vwmul_vx_w, 8)
1850GEN_VEXT_VX(vwmulu_vx_b, 2)
1851GEN_VEXT_VX(vwmulu_vx_h, 4)
1852GEN_VEXT_VX(vwmulu_vx_w, 8)
1853GEN_VEXT_VX(vwmulsu_vx_b, 2)
1854GEN_VEXT_VX(vwmulsu_vx_h, 4)
1855GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1856
1857/* Vector Single-Width Integer Multiply-Add Instructions */
c45eff30 1858#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
54df813a
LZ
1859static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1860{ \
1861 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1862 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1863 TD d = *((TD *)vd + HD(i)); \
1864 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1865}
1866
1867#define DO_MACC(N, M, D) (M * N + D)
1868#define DO_NMSAC(N, M, D) (-(M * N) + D)
1869#define DO_MADD(N, M, D) (M * D + N)
1870#define DO_NMSUB(N, M, D) (-(M * D) + N)
1871RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1872RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1873RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1874RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1875RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1876RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1877RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1878RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1879RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1880RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1881RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1882RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1883RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1884RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1885RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1886RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1887GEN_VEXT_VV(vmacc_vv_b, 1)
1888GEN_VEXT_VV(vmacc_vv_h, 2)
1889GEN_VEXT_VV(vmacc_vv_w, 4)
1890GEN_VEXT_VV(vmacc_vv_d, 8)
1891GEN_VEXT_VV(vnmsac_vv_b, 1)
1892GEN_VEXT_VV(vnmsac_vv_h, 2)
1893GEN_VEXT_VV(vnmsac_vv_w, 4)
1894GEN_VEXT_VV(vnmsac_vv_d, 8)
1895GEN_VEXT_VV(vmadd_vv_b, 1)
1896GEN_VEXT_VV(vmadd_vv_h, 2)
1897GEN_VEXT_VV(vmadd_vv_w, 4)
1898GEN_VEXT_VV(vmadd_vv_d, 8)
1899GEN_VEXT_VV(vnmsub_vv_b, 1)
1900GEN_VEXT_VV(vnmsub_vv_h, 2)
1901GEN_VEXT_VV(vnmsub_vv_w, 4)
1902GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1903
1904#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1905static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1906{ \
1907 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1908 TD d = *((TD *)vd + HD(i)); \
1909 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1910}
1911
1912RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1913RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1914RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1915RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1916RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1917RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1918RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1919RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1920RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1921RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1922RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1923RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1924RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1925RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1926RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1927RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1928GEN_VEXT_VX(vmacc_vx_b, 1)
1929GEN_VEXT_VX(vmacc_vx_h, 2)
1930GEN_VEXT_VX(vmacc_vx_w, 4)
1931GEN_VEXT_VX(vmacc_vx_d, 8)
1932GEN_VEXT_VX(vnmsac_vx_b, 1)
1933GEN_VEXT_VX(vnmsac_vx_h, 2)
1934GEN_VEXT_VX(vnmsac_vx_w, 4)
1935GEN_VEXT_VX(vnmsac_vx_d, 8)
1936GEN_VEXT_VX(vmadd_vx_b, 1)
1937GEN_VEXT_VX(vmadd_vx_h, 2)
1938GEN_VEXT_VX(vmadd_vx_w, 4)
1939GEN_VEXT_VX(vmadd_vx_d, 8)
1940GEN_VEXT_VX(vnmsub_vx_b, 1)
1941GEN_VEXT_VX(vnmsub_vx_h, 2)
1942GEN_VEXT_VX(vnmsub_vx_w, 4)
1943GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1944
1945/* Vector Widening Integer Multiply-Add Instructions */
1946RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1947RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1948RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1949RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1950RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1951RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1952RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1953RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1954RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1955GEN_VEXT_VV(vwmaccu_vv_b, 2)
1956GEN_VEXT_VV(vwmaccu_vv_h, 4)
1957GEN_VEXT_VV(vwmaccu_vv_w, 8)
1958GEN_VEXT_VV(vwmacc_vv_b, 2)
1959GEN_VEXT_VV(vwmacc_vv_h, 4)
1960GEN_VEXT_VV(vwmacc_vv_w, 8)
1961GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1962GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1963GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1964
1965RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1966RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1967RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1968RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1969RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1970RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1971RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1972RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1973RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1974RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1975RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1976RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1977GEN_VEXT_VX(vwmaccu_vx_b, 2)
1978GEN_VEXT_VX(vwmaccu_vx_h, 4)
1979GEN_VEXT_VX(vwmaccu_vx_w, 8)
1980GEN_VEXT_VX(vwmacc_vx_b, 2)
1981GEN_VEXT_VX(vwmacc_vx_h, 4)
1982GEN_VEXT_VX(vwmacc_vx_w, 8)
1983GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1984GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1985GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1986GEN_VEXT_VX(vwmaccus_vx_b, 2)
1987GEN_VEXT_VX(vwmaccus_vx_h, 4)
1988GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1989
1990/* Vector Integer Merge and Move Instructions */
3479a814 1991#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1992void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1993 uint32_t desc) \
1994{ \
1995 uint32_t vl = env->vl; \
89a32de2 1996 uint32_t esz = sizeof(ETYPE); \
1997 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1998 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1999 uint32_t i; \
2000 \
f714361e 2001 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2002 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2003 *((ETYPE *)vd + H(i)) = s1; \
2004 } \
f714361e 2005 env->vstart = 0; \
89a32de2 2006 /* set tail elements to 1s */ \
2007 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2008}
2009
3479a814
FC
2010GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2011GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2012GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2013GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 2014
3479a814 2015#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2016void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2017 uint32_t desc) \
2018{ \
2019 uint32_t vl = env->vl; \
89a32de2 2020 uint32_t esz = sizeof(ETYPE); \
2021 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2022 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2023 uint32_t i; \
2024 \
f714361e 2025 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2026 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2027 } \
f714361e 2028 env->vstart = 0; \
89a32de2 2029 /* set tail elements to 1s */ \
2030 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2031}
2032
3479a814
FC
2033GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2034GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2035GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2036GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 2037
3479a814 2038#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
2039void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2040 CPURISCVState *env, uint32_t desc) \
2041{ \
f020a7a1 2042 uint32_t vl = env->vl; \
89a32de2 2043 uint32_t esz = sizeof(ETYPE); \
2044 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2045 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2046 uint32_t i; \
2047 \
f714361e 2048 for (i = env->vstart; i < vl; i++) { \
f9298de5 2049 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
2050 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2051 } \
f714361e 2052 env->vstart = 0; \
89a32de2 2053 /* set tail elements to 1s */ \
2054 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2055}
2056
3479a814
FC
2057GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2058GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2059GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2060GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 2061
3479a814 2062#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2063void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2064 void *vs2, CPURISCVState *env, uint32_t desc) \
2065{ \
f020a7a1 2066 uint32_t vl = env->vl; \
89a32de2 2067 uint32_t esz = sizeof(ETYPE); \
2068 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2069 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2070 uint32_t i; \
2071 \
f714361e 2072 for (i = env->vstart; i < vl; i++) { \
f020a7a1 2073 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 2074 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
2075 (ETYPE)(target_long)s1); \
2076 *((ETYPE *)vd + H(i)) = d; \
2077 } \
f714361e 2078 env->vstart = 0; \
89a32de2 2079 /* set tail elements to 1s */ \
2080 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2081}
2082
3479a814
FC
2083GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2084GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2085GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2086GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
2087
2088/*
3b57254d 2089 * Vector Fixed-Point Arithmetic Instructions
eb2650e3
LZ
2090 */
2091
2092/* Vector Single-Width Saturating Add and Subtract */
2093
2094/*
2095 * As fixed point instructions probably have round mode and saturation,
2096 * define common macros for fixed point here.
2097 */
2098typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2099 CPURISCVState *env, int vxrm);
2100
2101#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2102static inline void \
2103do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2104 CPURISCVState *env, int vxrm) \
2105{ \
2106 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2107 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2108 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2109}
2110
2111static inline void
2112vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2113 CPURISCVState *env,
f9298de5 2114 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2115 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2116{
f714361e 2117 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2118 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2119 /* set masked-off elements to 1s */
2120 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2121 continue;
2122 }
2123 fn(vd, vs1, vs2, i, env, vxrm);
2124 }
f714361e 2125 env->vstart = 0;
eb2650e3
LZ
2126}
2127
2128static inline void
2129vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2130 CPURISCVState *env,
8a085fb2 2131 uint32_t desc,
09106eed 2132 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 2133{
eb2650e3
LZ
2134 uint32_t vm = vext_vm(desc);
2135 uint32_t vl = env->vl;
09106eed 2136 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2137 uint32_t vta = vext_vta(desc);
72e17a9f 2138 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2139
2140 switch (env->vxrm) {
2141 case 0: /* rnu */
2142 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2143 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2144 break;
2145 case 1: /* rne */
2146 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2147 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2148 break;
2149 case 2: /* rdn */
2150 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2151 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2152 break;
2153 default: /* rod */
2154 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2155 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2156 break;
2157 }
09106eed 2158 /* set tail elements to 1s */
2159 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2160}
2161
2162/* generate helpers for fixed point instructions with OPIVV format */
09106eed 2163#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
2164void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2165 CPURISCVState *env, uint32_t desc) \
2166{ \
8a085fb2 2167 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 2168 do_##NAME, ESZ); \
eb2650e3
LZ
2169}
2170
246f8796
WL
2171static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2172 uint8_t b)
eb2650e3
LZ
2173{
2174 uint8_t res = a + b;
2175 if (res < a) {
2176 res = UINT8_MAX;
2177 env->vxsat = 0x1;
2178 }
2179 return res;
2180}
2181
2182static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2183 uint16_t b)
2184{
2185 uint16_t res = a + b;
2186 if (res < a) {
2187 res = UINT16_MAX;
2188 env->vxsat = 0x1;
2189 }
2190 return res;
2191}
2192
2193static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2194 uint32_t b)
2195{
2196 uint32_t res = a + b;
2197 if (res < a) {
2198 res = UINT32_MAX;
2199 env->vxsat = 0x1;
2200 }
2201 return res;
2202}
2203
2204static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2205 uint64_t b)
2206{
2207 uint64_t res = a + b;
2208 if (res < a) {
2209 res = UINT64_MAX;
2210 env->vxsat = 0x1;
2211 }
2212 return res;
2213}
2214
2215RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2216RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2217RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2218RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2219GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2220GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2221GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2222GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2223
2224typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2225 CPURISCVState *env, int vxrm);
2226
2227#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2228static inline void \
2229do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2230 CPURISCVState *env, int vxrm) \
2231{ \
2232 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2233 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2234}
2235
2236static inline void
2237vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2238 CPURISCVState *env,
f9298de5 2239 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2240 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2241{
f714361e 2242 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2243 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2244 /* set masked-off elements to 1s */
2245 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2246 continue;
2247 }
2248 fn(vd, s1, vs2, i, env, vxrm);
2249 }
f714361e 2250 env->vstart = 0;
eb2650e3
LZ
2251}
2252
2253static inline void
2254vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2255 CPURISCVState *env,
8a085fb2 2256 uint32_t desc,
09106eed 2257 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2258{
eb2650e3
LZ
2259 uint32_t vm = vext_vm(desc);
2260 uint32_t vl = env->vl;
09106eed 2261 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2262 uint32_t vta = vext_vta(desc);
72e17a9f 2263 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2264
2265 switch (env->vxrm) {
2266 case 0: /* rnu */
2267 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2268 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2269 break;
2270 case 1: /* rne */
2271 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2272 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2273 break;
2274 case 2: /* rdn */
2275 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2276 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2277 break;
2278 default: /* rod */
2279 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2280 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2281 break;
2282 }
09106eed 2283 /* set tail elements to 1s */
2284 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2285}
2286
2287/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2288#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3 2289void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
2290 void *vs2, CPURISCVState *env, \
2291 uint32_t desc) \
eb2650e3 2292{ \
8a085fb2 2293 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2294 do_##NAME, ESZ); \
eb2650e3
LZ
2295}
2296
2297RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2298RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2299RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2300RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2301GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2302GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2303GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2304GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2305
2306static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2307{
2308 int8_t res = a + b;
2309 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2310 res = a > 0 ? INT8_MAX : INT8_MIN;
2311 env->vxsat = 0x1;
2312 }
2313 return res;
2314}
2315
246f8796
WL
2316static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2317 int16_t b)
eb2650e3
LZ
2318{
2319 int16_t res = a + b;
2320 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2321 res = a > 0 ? INT16_MAX : INT16_MIN;
2322 env->vxsat = 0x1;
2323 }
2324 return res;
2325}
2326
246f8796
WL
2327static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2328 int32_t b)
eb2650e3
LZ
2329{
2330 int32_t res = a + b;
2331 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2332 res = a > 0 ? INT32_MAX : INT32_MIN;
2333 env->vxsat = 0x1;
2334 }
2335 return res;
2336}
2337
246f8796
WL
2338static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2339 int64_t b)
eb2650e3
LZ
2340{
2341 int64_t res = a + b;
2342 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2343 res = a > 0 ? INT64_MAX : INT64_MIN;
2344 env->vxsat = 0x1;
2345 }
2346 return res;
2347}
2348
2349RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2350RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2351RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2352RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2353GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2354GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2355GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2356GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2357
2358RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2359RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2360RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2361RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2362GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2363GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2364GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2365GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3 2366
246f8796
WL
2367static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2368 uint8_t b)
eb2650e3
LZ
2369{
2370 uint8_t res = a - b;
2371 if (res > a) {
2372 res = 0;
2373 env->vxsat = 0x1;
2374 }
2375 return res;
2376}
2377
2378static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2379 uint16_t b)
2380{
2381 uint16_t res = a - b;
2382 if (res > a) {
2383 res = 0;
2384 env->vxsat = 0x1;
2385 }
2386 return res;
2387}
2388
2389static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2390 uint32_t b)
2391{
2392 uint32_t res = a - b;
2393 if (res > a) {
2394 res = 0;
2395 env->vxsat = 0x1;
2396 }
2397 return res;
2398}
2399
2400static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2401 uint64_t b)
2402{
2403 uint64_t res = a - b;
2404 if (res > a) {
2405 res = 0;
2406 env->vxsat = 0x1;
2407 }
2408 return res;
2409}
2410
2411RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2412RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2413RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2414RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2415GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2416GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2417GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2418GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2419
2420RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2421RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2422RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2423RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2424GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2425GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2426GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2427GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2428
2429static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2430{
2431 int8_t res = a - b;
2432 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2433 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2434 env->vxsat = 0x1;
2435 }
2436 return res;
2437}
2438
246f8796
WL
2439static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2440 int16_t b)
eb2650e3
LZ
2441{
2442 int16_t res = a - b;
2443 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2444 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2445 env->vxsat = 0x1;
2446 }
2447 return res;
2448}
2449
246f8796
WL
2450static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2451 int32_t b)
eb2650e3
LZ
2452{
2453 int32_t res = a - b;
2454 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2455 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2456 env->vxsat = 0x1;
2457 }
2458 return res;
2459}
2460
246f8796
WL
2461static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2462 int64_t b)
eb2650e3
LZ
2463{
2464 int64_t res = a - b;
2465 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2466 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2467 env->vxsat = 0x1;
2468 }
2469 return res;
2470}
2471
2472RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2473RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2474RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2475RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2476GEN_VEXT_VV_RM(vssub_vv_b, 1)
2477GEN_VEXT_VV_RM(vssub_vv_h, 2)
2478GEN_VEXT_VV_RM(vssub_vv_w, 4)
2479GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2480
2481RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2482RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2483RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2484RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2485GEN_VEXT_VX_RM(vssub_vx_b, 1)
2486GEN_VEXT_VX_RM(vssub_vx_h, 2)
2487GEN_VEXT_VX_RM(vssub_vx_w, 4)
2488GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2489
2490/* Vector Single-Width Averaging Add and Subtract */
2491static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2492{
2493 uint8_t d = extract64(v, shift, 1);
2494 uint8_t d1;
2495 uint64_t D1, D2;
2496
2497 if (shift == 0 || shift > 64) {
2498 return 0;
2499 }
2500
2501 d1 = extract64(v, shift - 1, 1);
2502 D1 = extract64(v, 0, shift);
2503 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2504 return d1;
2505 } else if (vxrm == 1) { /* round-to-nearest-even */
2506 if (shift > 1) {
2507 D2 = extract64(v, 0, shift - 1);
2508 return d1 & ((D2 != 0) | d);
2509 } else {
2510 return d1 & d;
2511 }
2512 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2513 return !d & (D1 != 0);
2514 }
2515 return 0; /* round-down (truncate) */
2516}
2517
246f8796
WL
2518static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2519 int32_t b)
b7aee481
LZ
2520{
2521 int64_t res = (int64_t)a + b;
2522 uint8_t round = get_round(vxrm, res, 1);
2523
2524 return (res >> 1) + round;
2525}
2526
246f8796
WL
2527static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2528 int64_t b)
b7aee481
LZ
2529{
2530 int64_t res = a + b;
2531 uint8_t round = get_round(vxrm, res, 1);
2532 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2533
2534 /* With signed overflow, bit 64 is inverse of bit 63. */
2535 return ((res >> 1) ^ over) + round;
2536}
2537
2538RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2539RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2540RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2541RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2542GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2543GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2544GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2545GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2546
2547RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2548RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2549RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2550RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2551GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2552GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2553GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2554GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2555
8b99a110
FC
2556static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2557 uint32_t a, uint32_t b)
2558{
2559 uint64_t res = (uint64_t)a + b;
2560 uint8_t round = get_round(vxrm, res, 1);
2561
2562 return (res >> 1) + round;
2563}
2564
2565static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2566 uint64_t a, uint64_t b)
2567{
2568 uint64_t res = a + b;
2569 uint8_t round = get_round(vxrm, res, 1);
2570 uint64_t over = (uint64_t)(res < a) << 63;
2571
2572 return ((res >> 1) | over) + round;
2573}
2574
2575RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2576RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2577RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2578RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2579GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2580GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2581GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2582GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2583
2584RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2585RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2586RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2587RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2588GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2589GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2590GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2591GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2592
246f8796
WL
2593static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2594 int32_t b)
b7aee481
LZ
2595{
2596 int64_t res = (int64_t)a - b;
2597 uint8_t round = get_round(vxrm, res, 1);
2598
2599 return (res >> 1) + round;
2600}
2601
246f8796
WL
2602static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2603 int64_t b)
b7aee481
LZ
2604{
2605 int64_t res = (int64_t)a - b;
2606 uint8_t round = get_round(vxrm, res, 1);
2607 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2608
2609 /* With signed overflow, bit 64 is inverse of bit 63. */
2610 return ((res >> 1) ^ over) + round;
2611}
2612
2613RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2614RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2615RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2616RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2617GEN_VEXT_VV_RM(vasub_vv_b, 1)
2618GEN_VEXT_VV_RM(vasub_vv_h, 2)
2619GEN_VEXT_VV_RM(vasub_vv_w, 4)
2620GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2621
2622RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2623RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2624RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2625RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2626GEN_VEXT_VX_RM(vasub_vx_b, 1)
2627GEN_VEXT_VX_RM(vasub_vx_h, 2)
2628GEN_VEXT_VX_RM(vasub_vx_w, 4)
2629GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2630
8b99a110
FC
2631static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2632 uint32_t a, uint32_t b)
2633{
2634 int64_t res = (int64_t)a - b;
2635 uint8_t round = get_round(vxrm, res, 1);
2636
2637 return (res >> 1) + round;
2638}
2639
2640static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2641 uint64_t a, uint64_t b)
2642{
2643 uint64_t res = (uint64_t)a - b;
2644 uint8_t round = get_round(vxrm, res, 1);
2645 uint64_t over = (uint64_t)(res > a) << 63;
2646
2647 return ((res >> 1) | over) + round;
2648}
2649
2650RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2651RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2652RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2653RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2654GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2655GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2656GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2657GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2658
2659RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2660RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2661RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2662RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2663GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2664GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2665GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2666GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2667
9f0ff9e5
LZ
2668/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2669static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2670{
2671 uint8_t round;
2672 int16_t res;
2673
2674 res = (int16_t)a * (int16_t)b;
2675 round = get_round(vxrm, res, 7);
c45eff30 2676 res = (res >> 7) + round;
9f0ff9e5
LZ
2677
2678 if (res > INT8_MAX) {
2679 env->vxsat = 0x1;
2680 return INT8_MAX;
2681 } else if (res < INT8_MIN) {
2682 env->vxsat = 0x1;
2683 return INT8_MIN;
2684 } else {
2685 return res;
2686 }
2687}
2688
2689static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2690{
2691 uint8_t round;
2692 int32_t res;
2693
2694 res = (int32_t)a * (int32_t)b;
2695 round = get_round(vxrm, res, 15);
c45eff30 2696 res = (res >> 15) + round;
9f0ff9e5
LZ
2697
2698 if (res > INT16_MAX) {
2699 env->vxsat = 0x1;
2700 return INT16_MAX;
2701 } else if (res < INT16_MIN) {
2702 env->vxsat = 0x1;
2703 return INT16_MIN;
2704 } else {
2705 return res;
2706 }
2707}
2708
2709static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2710{
2711 uint8_t round;
2712 int64_t res;
2713
2714 res = (int64_t)a * (int64_t)b;
2715 round = get_round(vxrm, res, 31);
c45eff30 2716 res = (res >> 31) + round;
9f0ff9e5
LZ
2717
2718 if (res > INT32_MAX) {
2719 env->vxsat = 0x1;
2720 return INT32_MAX;
2721 } else if (res < INT32_MIN) {
2722 env->vxsat = 0x1;
2723 return INT32_MIN;
2724 } else {
2725 return res;
2726 }
2727}
2728
2729static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2730{
2731 uint8_t round;
2732 uint64_t hi_64, lo_64;
2733 int64_t res;
2734
2735 if (a == INT64_MIN && b == INT64_MIN) {
2736 env->vxsat = 1;
2737 return INT64_MAX;
2738 }
2739
2740 muls64(&lo_64, &hi_64, a, b);
2741 round = get_round(vxrm, lo_64, 63);
2742 /*
2743 * Cannot overflow, as there are always
2744 * 2 sign bits after multiply.
2745 */
2746 res = (hi_64 << 1) | (lo_64 >> 63);
2747 if (round) {
2748 if (res == INT64_MAX) {
2749 env->vxsat = 1;
2750 } else {
2751 res += 1;
2752 }
2753 }
2754 return res;
2755}
2756
2757RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2758RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2759RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2760RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2761GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2762GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2763GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2764GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2765
2766RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2767RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2768RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2769RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2770GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2771GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2772GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2773GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2774
04a61406
LZ
2775/* Vector Single-Width Scaling Shift Instructions */
2776static inline uint8_t
2777vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2778{
2779 uint8_t round, shift = b & 0x7;
2780 uint8_t res;
2781
2782 round = get_round(vxrm, a, shift);
c45eff30 2783 res = (a >> shift) + round;
04a61406
LZ
2784 return res;
2785}
2786static inline uint16_t
2787vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2788{
2789 uint8_t round, shift = b & 0xf;
04a61406
LZ
2790
2791 round = get_round(vxrm, a, shift);
66997c42 2792 return (a >> shift) + round;
04a61406
LZ
2793}
2794static inline uint32_t
2795vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2796{
2797 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2798
2799 round = get_round(vxrm, a, shift);
66997c42 2800 return (a >> shift) + round;
04a61406
LZ
2801}
2802static inline uint64_t
2803vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2804{
2805 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2806
2807 round = get_round(vxrm, a, shift);
66997c42 2808 return (a >> shift) + round;
04a61406
LZ
2809}
2810RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2811RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2812RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2813RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2814GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2815GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2816GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2817GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2818
2819RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2820RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2821RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2822RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2823GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2824GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2825GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2826GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2827
2828static inline int8_t
2829vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2830{
2831 uint8_t round, shift = b & 0x7;
04a61406
LZ
2832
2833 round = get_round(vxrm, a, shift);
66997c42 2834 return (a >> shift) + round;
04a61406
LZ
2835}
2836static inline int16_t
2837vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2838{
2839 uint8_t round, shift = b & 0xf;
04a61406
LZ
2840
2841 round = get_round(vxrm, a, shift);
66997c42 2842 return (a >> shift) + round;
04a61406
LZ
2843}
2844static inline int32_t
2845vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2846{
2847 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2848
2849 round = get_round(vxrm, a, shift);
66997c42 2850 return (a >> shift) + round;
04a61406
LZ
2851}
2852static inline int64_t
2853vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2854{
2855 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2856
2857 round = get_round(vxrm, a, shift);
66997c42 2858 return (a >> shift) + round;
04a61406 2859}
9ff3d287 2860
04a61406
LZ
2861RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2862RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2863RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2864RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2865GEN_VEXT_VV_RM(vssra_vv_b, 1)
2866GEN_VEXT_VV_RM(vssra_vv_h, 2)
2867GEN_VEXT_VV_RM(vssra_vv_w, 4)
2868GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2869
2870RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2871RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2872RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2873RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2874GEN_VEXT_VX_RM(vssra_vx_b, 1)
2875GEN_VEXT_VX_RM(vssra_vx_h, 2)
2876GEN_VEXT_VX_RM(vssra_vx_w, 4)
2877GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2878
2879/* Vector Narrowing Fixed-Point Clip Instructions */
2880static inline int8_t
2881vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2882{
2883 uint8_t round, shift = b & 0xf;
2884 int16_t res;
2885
2886 round = get_round(vxrm, a, shift);
c45eff30 2887 res = (a >> shift) + round;
9ff3d287
LZ
2888 if (res > INT8_MAX) {
2889 env->vxsat = 0x1;
2890 return INT8_MAX;
2891 } else if (res < INT8_MIN) {
2892 env->vxsat = 0x1;
2893 return INT8_MIN;
2894 } else {
2895 return res;
2896 }
2897}
2898
2899static inline int16_t
2900vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2901{
2902 uint8_t round, shift = b & 0x1f;
2903 int32_t res;
2904
2905 round = get_round(vxrm, a, shift);
c45eff30 2906 res = (a >> shift) + round;
9ff3d287
LZ
2907 if (res > INT16_MAX) {
2908 env->vxsat = 0x1;
2909 return INT16_MAX;
2910 } else if (res < INT16_MIN) {
2911 env->vxsat = 0x1;
2912 return INT16_MIN;
2913 } else {
2914 return res;
2915 }
2916}
2917
2918static inline int32_t
2919vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2920{
2921 uint8_t round, shift = b & 0x3f;
2922 int64_t res;
2923
2924 round = get_round(vxrm, a, shift);
c45eff30 2925 res = (a >> shift) + round;
9ff3d287
LZ
2926 if (res > INT32_MAX) {
2927 env->vxsat = 0x1;
2928 return INT32_MAX;
2929 } else if (res < INT32_MIN) {
2930 env->vxsat = 0x1;
2931 return INT32_MIN;
2932 } else {
2933 return res;
2934 }
2935}
2936
a70b3a73
FC
2937RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2938RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2939RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2940GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2941GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2942GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2943
2944RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2945RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2946RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2947GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2948GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2949GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2950
2951static inline uint8_t
2952vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2953{
2954 uint8_t round, shift = b & 0xf;
2955 uint16_t res;
2956
2957 round = get_round(vxrm, a, shift);
c45eff30 2958 res = (a >> shift) + round;
9ff3d287
LZ
2959 if (res > UINT8_MAX) {
2960 env->vxsat = 0x1;
2961 return UINT8_MAX;
2962 } else {
2963 return res;
2964 }
2965}
2966
2967static inline uint16_t
2968vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2969{
2970 uint8_t round, shift = b & 0x1f;
2971 uint32_t res;
2972
2973 round = get_round(vxrm, a, shift);
c45eff30 2974 res = (a >> shift) + round;
9ff3d287
LZ
2975 if (res > UINT16_MAX) {
2976 env->vxsat = 0x1;
2977 return UINT16_MAX;
2978 } else {
2979 return res;
2980 }
2981}
2982
2983static inline uint32_t
2984vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2985{
2986 uint8_t round, shift = b & 0x3f;
a70b3a73 2987 uint64_t res;
9ff3d287
LZ
2988
2989 round = get_round(vxrm, a, shift);
c45eff30 2990 res = (a >> shift) + round;
9ff3d287
LZ
2991 if (res > UINT32_MAX) {
2992 env->vxsat = 0x1;
2993 return UINT32_MAX;
2994 } else {
2995 return res;
2996 }
2997}
2998
a70b3a73
FC
2999RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3000RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3001RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 3002GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3003GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3004GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 3005
a70b3a73
FC
3006RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3007RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3008RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 3009GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3010GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3011GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
3012
3013/*
3b57254d 3014 * Vector Float Point Arithmetic Instructions
ce2a0343
LZ
3015 */
3016/* Vector Single-Width Floating-Point Add/Subtract Instructions */
3017#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3018static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3019 CPURISCVState *env) \
3020{ \
3021 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3022 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3023 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3024}
3025
5eacf7d8 3026#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
3027void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3028 void *vs2, CPURISCVState *env, \
3029 uint32_t desc) \
3030{ \
ce2a0343
LZ
3031 uint32_t vm = vext_vm(desc); \
3032 uint32_t vl = env->vl; \
5eacf7d8 3033 uint32_t total_elems = \
3034 vext_get_total_elems(env, desc, ESZ); \
3035 uint32_t vta = vext_vta(desc); \
5b448f44 3036 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
3037 uint32_t i; \
3038 \
f714361e 3039 for (i = env->vstart; i < vl; i++) { \
f9298de5 3040 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3041 /* set masked-off elements to 1s */ \
3042 vext_set_elems_1s(vd, vma, i * ESZ, \
3043 (i + 1) * ESZ); \
ce2a0343
LZ
3044 continue; \
3045 } \
3046 do_##NAME(vd, vs1, vs2, i, env); \
3047 } \
f714361e 3048 env->vstart = 0; \
5eacf7d8 3049 /* set tail elements to 1s */ \
3050 vext_set_elems_1s(vd, vta, vl * ESZ, \
3051 total_elems * ESZ); \
ce2a0343
LZ
3052}
3053
3054RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3055RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3056RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 3057GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3058GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3059GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
3060
3061#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3062static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3063 CPURISCVState *env) \
3064{ \
3065 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3066 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3067}
3068
5eacf7d8 3069#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
3070void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3071 void *vs2, CPURISCVState *env, \
3072 uint32_t desc) \
3073{ \
ce2a0343
LZ
3074 uint32_t vm = vext_vm(desc); \
3075 uint32_t vl = env->vl; \
5eacf7d8 3076 uint32_t total_elems = \
c45eff30 3077 vext_get_total_elems(env, desc, ESZ); \
5eacf7d8 3078 uint32_t vta = vext_vta(desc); \
5b448f44 3079 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
3080 uint32_t i; \
3081 \
f714361e 3082 for (i = env->vstart; i < vl; i++) { \
f9298de5 3083 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3084 /* set masked-off elements to 1s */ \
3085 vext_set_elems_1s(vd, vma, i * ESZ, \
3086 (i + 1) * ESZ); \
ce2a0343
LZ
3087 continue; \
3088 } \
3089 do_##NAME(vd, s1, vs2, i, env); \
3090 } \
f714361e 3091 env->vstart = 0; \
5eacf7d8 3092 /* set tail elements to 1s */ \
3093 vext_set_elems_1s(vd, vta, vl * ESZ, \
3094 total_elems * ESZ); \
ce2a0343
LZ
3095}
3096
3097RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3098RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3099RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 3100GEN_VEXT_VF(vfadd_vf_h, 2)
3101GEN_VEXT_VF(vfadd_vf_w, 4)
3102GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
3103
3104RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3105RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3106RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 3107GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3108GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3109GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
3110RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3111RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3112RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 3113GEN_VEXT_VF(vfsub_vf_h, 2)
3114GEN_VEXT_VF(vfsub_vf_w, 4)
3115GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
3116
3117static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3118{
3119 return float16_sub(b, a, s);
3120}
3121
3122static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3123{
3124 return float32_sub(b, a, s);
3125}
3126
3127static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3128{
3129 return float64_sub(b, a, s);
3130}
3131
3132RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3133RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3134RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 3135GEN_VEXT_VF(vfrsub_vf_h, 2)
3136GEN_VEXT_VF(vfrsub_vf_w, 4)
3137GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
3138
3139/* Vector Widening Floating-Point Add/Subtract Instructions */
3140static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3141{
3142 return float32_add(float16_to_float32(a, true, s),
c45eff30 3143 float16_to_float32(b, true, s), s);
eeffab2e
LZ
3144}
3145
3146static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3147{
3148 return float64_add(float32_to_float64(a, s),
c45eff30 3149 float32_to_float64(b, s), s);
eeffab2e
LZ
3150
3151}
3152
3153RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3154RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 3155GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3156GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
3157RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3158RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 3159GEN_VEXT_VF(vfwadd_vf_h, 4)
3160GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
3161
3162static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3163{
3164 return float32_sub(float16_to_float32(a, true, s),
c45eff30 3165 float16_to_float32(b, true, s), s);
eeffab2e
LZ
3166}
3167
3168static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3169{
3170 return float64_sub(float32_to_float64(a, s),
c45eff30 3171 float32_to_float64(b, s), s);
eeffab2e
LZ
3172
3173}
3174
3175RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3176RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 3177GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3178GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
3179RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3180RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 3181GEN_VEXT_VF(vfwsub_vf_h, 4)
3182GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
3183
3184static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3185{
3186 return float32_add(a, float16_to_float32(b, true, s), s);
3187}
3188
3189static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3190{
3191 return float64_add(a, float32_to_float64(b, s), s);
3192}
3193
3194RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3195RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3196GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3197GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3198RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3199RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3200GEN_VEXT_VF(vfwadd_wf_h, 4)
3201GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3202
3203static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3204{
3205 return float32_sub(a, float16_to_float32(b, true, s), s);
3206}
3207
3208static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3209{
3210 return float64_sub(a, float32_to_float64(b, s), s);
3211}
3212
3213RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3214RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3215GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3216GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3217RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3218RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3219GEN_VEXT_VF(vfwsub_wf_h, 4)
3220GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3221
3222/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3223RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3224RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3225RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3226GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3227GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3228GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3229RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3230RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3231RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3232GEN_VEXT_VF(vfmul_vf_h, 2)
3233GEN_VEXT_VF(vfmul_vf_w, 4)
3234GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3235
3236RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3237RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3238RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3239GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3240GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3241GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3242RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3243RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3244RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3245GEN_VEXT_VF(vfdiv_vf_h, 2)
3246GEN_VEXT_VF(vfdiv_vf_w, 4)
3247GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3248
3249static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3250{
3251 return float16_div(b, a, s);
3252}
3253
3254static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3255{
3256 return float32_div(b, a, s);
3257}
3258
3259static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3260{
3261 return float64_div(b, a, s);
3262}
3263
3264RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3265RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3266RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3267GEN_VEXT_VF(vfrdiv_vf_h, 2)
3268GEN_VEXT_VF(vfrdiv_vf_w, 4)
3269GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3270
3271/* Vector Widening Floating-Point Multiply */
3272static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3273{
3274 return float32_mul(float16_to_float32(a, true, s),
c45eff30 3275 float16_to_float32(b, true, s), s);
f7c7b7cd
LZ
3276}
3277
3278static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3279{
3280 return float64_mul(float32_to_float64(a, s),
c45eff30 3281 float32_to_float64(b, s), s);
f7c7b7cd
LZ
3282
3283}
3284RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3285RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3286GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3287GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3288RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3289RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3290GEN_VEXT_VF(vfwmul_vf_h, 4)
3291GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3292
3293/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3294#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3295static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
c45eff30 3296 CPURISCVState *env) \
4aa5a8fe
LZ
3297{ \
3298 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3299 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3300 TD d = *((TD *)vd + HD(i)); \
3301 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3302}
3303
3304static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3305{
3306 return float16_muladd(a, b, d, 0, s);
3307}
3308
3309static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3310{
3311 return float32_muladd(a, b, d, 0, s);
3312}
3313
3314static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3315{
3316 return float64_muladd(a, b, d, 0, s);
3317}
3318
3319RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3320RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3321RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3322GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3323GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3324GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3325
3326#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3327static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
c45eff30 3328 CPURISCVState *env) \
4aa5a8fe
LZ
3329{ \
3330 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3331 TD d = *((TD *)vd + HD(i)); \
3332 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3333}
3334
3335RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3336RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3337RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3338GEN_VEXT_VF(vfmacc_vf_h, 2)
3339GEN_VEXT_VF(vfmacc_vf_w, 4)
3340GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3341
3342static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3343{
c45eff30
WL
3344 return float16_muladd(a, b, d, float_muladd_negate_c |
3345 float_muladd_negate_product, s);
4aa5a8fe
LZ
3346}
3347
3348static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3349{
c45eff30
WL
3350 return float32_muladd(a, b, d, float_muladd_negate_c |
3351 float_muladd_negate_product, s);
4aa5a8fe
LZ
3352}
3353
3354static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3355{
c45eff30
WL
3356 return float64_muladd(a, b, d, float_muladd_negate_c |
3357 float_muladd_negate_product, s);
4aa5a8fe
LZ
3358}
3359
3360RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3361RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3362RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3363GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3364GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3365GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3366RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3367RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3368RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3369GEN_VEXT_VF(vfnmacc_vf_h, 2)
3370GEN_VEXT_VF(vfnmacc_vf_w, 4)
3371GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3372
3373static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3374{
3375 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3376}
3377
3378static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3379{
3380 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3381}
3382
3383static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3384{
3385 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3386}
3387
3388RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3389RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3390RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3391GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3392GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3393GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3394RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3395RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3396RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3397GEN_VEXT_VF(vfmsac_vf_h, 2)
3398GEN_VEXT_VF(vfmsac_vf_w, 4)
3399GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3400
3401static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3402{
3403 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3404}
3405
3406static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3407{
3408 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3409}
3410
3411static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3412{
3413 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3414}
3415
3416RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3417RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3418RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3419GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3420GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3421GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3422RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3423RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3424RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3425GEN_VEXT_VF(vfnmsac_vf_h, 2)
3426GEN_VEXT_VF(vfnmsac_vf_w, 4)
3427GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3428
3429static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3430{
3431 return float16_muladd(d, b, a, 0, s);
3432}
3433
3434static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3435{
3436 return float32_muladd(d, b, a, 0, s);
3437}
3438
3439static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3440{
3441 return float64_muladd(d, b, a, 0, s);
3442}
3443
3444RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3445RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3446RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3447GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3448GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3449GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3450RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3451RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3452RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3453GEN_VEXT_VF(vfmadd_vf_h, 2)
3454GEN_VEXT_VF(vfmadd_vf_w, 4)
3455GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3456
3457static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3458{
c45eff30
WL
3459 return float16_muladd(d, b, a, float_muladd_negate_c |
3460 float_muladd_negate_product, s);
4aa5a8fe
LZ
3461}
3462
3463static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3464{
c45eff30
WL
3465 return float32_muladd(d, b, a, float_muladd_negate_c |
3466 float_muladd_negate_product, s);
4aa5a8fe
LZ
3467}
3468
3469static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3470{
c45eff30
WL
3471 return float64_muladd(d, b, a, float_muladd_negate_c |
3472 float_muladd_negate_product, s);
4aa5a8fe
LZ
3473}
3474
3475RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3476RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3477RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3478GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3479GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3480GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3481RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3482RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3483RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3484GEN_VEXT_VF(vfnmadd_vf_h, 2)
3485GEN_VEXT_VF(vfnmadd_vf_w, 4)
3486GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3487
3488static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3489{
3490 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3491}
3492
3493static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3494{
3495 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3496}
3497
3498static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3499{
3500 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3501}
3502
3503RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3504RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3505RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3506GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3507GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3508GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3509RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3510RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3511RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3512GEN_VEXT_VF(vfmsub_vf_h, 2)
3513GEN_VEXT_VF(vfmsub_vf_w, 4)
3514GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3515
3516static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3517{
3518 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3519}
3520
3521static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3522{
3523 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3524}
3525
3526static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3527{
3528 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3529}
3530
3531RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3532RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3533RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3534GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3535GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3536GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3537RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3538RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3539RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3540GEN_VEXT_VF(vfnmsub_vf_h, 2)
3541GEN_VEXT_VF(vfnmsub_vf_w, 4)
3542GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3543
3544/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3545static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3546{
3547 return float32_muladd(float16_to_float32(a, true, s),
c45eff30 3548 float16_to_float32(b, true, s), d, 0, s);
0dd50959
LZ
3549}
3550
3551static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3552{
3553 return float64_muladd(float32_to_float64(a, s),
c45eff30 3554 float32_to_float64(b, s), d, 0, s);
0dd50959
LZ
3555}
3556
3557RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3558RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3559GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3560GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3561RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3562RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3563GEN_VEXT_VF(vfwmacc_vf_h, 4)
3564GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959
LZ
3565
3566static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3567{
3568 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3569 float16_to_float32(b, true, s), d,
3570 float_muladd_negate_c | float_muladd_negate_product,
3571 s);
0dd50959
LZ
3572}
3573
3574static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3575{
c45eff30
WL
3576 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3577 d, float_muladd_negate_c |
3578 float_muladd_negate_product, s);
0dd50959
LZ
3579}
3580
3581RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3582RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3583GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3584GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3585RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3586RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3587GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3588GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3589
3590static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3591{
3592 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3593 float16_to_float32(b, true, s), d,
3594 float_muladd_negate_c, s);
0dd50959
LZ
3595}
3596
3597static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3598{
3599 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3600 float32_to_float64(b, s), d,
3601 float_muladd_negate_c, s);
0dd50959
LZ
3602}
3603
3604RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3605RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3606GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3607GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3608RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3609RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3610GEN_VEXT_VF(vfwmsac_vf_h, 4)
3611GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3612
3613static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3614{
3615 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3616 float16_to_float32(b, true, s), d,
3617 float_muladd_negate_product, s);
0dd50959
LZ
3618}
3619
3620static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3621{
3622 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3623 float32_to_float64(b, s), d,
3624 float_muladd_negate_product, s);
0dd50959
LZ
3625}
3626
3627RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3628RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3629GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3630GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3631RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3632RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3633GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3634GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3635
3636/* Vector Floating-Point Square-Root Instruction */
3637/* (TD, T2, TX2) */
3638#define OP_UU_H uint16_t, uint16_t, uint16_t
3639#define OP_UU_W uint32_t, uint32_t, uint32_t
3640#define OP_UU_D uint64_t, uint64_t, uint64_t
3641
c45eff30 3642#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
d9e4ce72 3643static void do_##NAME(void *vd, void *vs2, int i, \
c45eff30 3644 CPURISCVState *env) \
d9e4ce72
LZ
3645{ \
3646 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3647 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3648}
3649
5eacf7d8 3650#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72 3651void HELPER(NAME)(void *vd, void *v0, void *vs2, \
c45eff30 3652 CPURISCVState *env, uint32_t desc) \
d9e4ce72 3653{ \
d9e4ce72
LZ
3654 uint32_t vm = vext_vm(desc); \
3655 uint32_t vl = env->vl; \
5eacf7d8 3656 uint32_t total_elems = \
3657 vext_get_total_elems(env, desc, ESZ); \
3658 uint32_t vta = vext_vta(desc); \
5b448f44 3659 uint32_t vma = vext_vma(desc); \
d9e4ce72
LZ
3660 uint32_t i; \
3661 \
3662 if (vl == 0) { \
3663 return; \
3664 } \
f714361e 3665 for (i = env->vstart; i < vl; i++) { \
f9298de5 3666 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3667 /* set masked-off elements to 1s */ \
3668 vext_set_elems_1s(vd, vma, i * ESZ, \
3669 (i + 1) * ESZ); \
d9e4ce72
LZ
3670 continue; \
3671 } \
3672 do_##NAME(vd, vs2, i, env); \
3673 } \
f714361e 3674 env->vstart = 0; \
5eacf7d8 3675 vext_set_elems_1s(vd, vta, vl * ESZ, \
3676 total_elems * ESZ); \
d9e4ce72
LZ
3677}
3678
3679RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3680RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3681RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3682GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3683GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3684GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3685
e848a1e5
FC
3686/*
3687 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3688 *
3689 * Adapted from riscv-v-spec recip.c:
3690 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3691 */
3692static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3693{
3694 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3695 uint64_t exp = extract64(f, frac_size, exp_size);
3696 uint64_t frac = extract64(f, 0, frac_size);
3697
3698 const uint8_t lookup_table[] = {
3699 52, 51, 50, 48, 47, 46, 44, 43,
3700 42, 41, 40, 39, 38, 36, 35, 34,
3701 33, 32, 31, 30, 30, 29, 28, 27,
3702 26, 25, 24, 23, 23, 22, 21, 20,
3703 19, 19, 18, 17, 16, 16, 15, 14,
3704 14, 13, 12, 12, 11, 10, 10, 9,
3705 9, 8, 7, 7, 6, 6, 5, 4,
3706 4, 3, 3, 2, 2, 1, 1, 0,
3707 127, 125, 123, 121, 119, 118, 116, 114,
3708 113, 111, 109, 108, 106, 105, 103, 102,
3709 100, 99, 97, 96, 95, 93, 92, 91,
3710 90, 88, 87, 86, 85, 84, 83, 82,
3711 80, 79, 78, 77, 76, 75, 74, 73,
3712 72, 71, 70, 70, 69, 68, 67, 66,
3713 65, 64, 63, 63, 62, 61, 60, 59,
3714 59, 58, 57, 56, 56, 55, 54, 53
3715 };
3716 const int precision = 7;
3717
3718 if (exp == 0 && frac != 0) { /* subnormal */
3719 /* Normalize the subnormal. */
3720 while (extract64(frac, frac_size - 1, 1) == 0) {
3721 exp--;
3722 frac <<= 1;
3723 }
3724
3725 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3726 }
3727
3728 int idx = ((exp & 1) << (precision - 1)) |
c45eff30 3729 (frac >> (frac_size - precision + 1));
e848a1e5 3730 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3731 (frac_size - precision);
e848a1e5
FC
3732 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3733
3734 uint64_t val = 0;
3735 val = deposit64(val, 0, frac_size, out_frac);
3736 val = deposit64(val, frac_size, exp_size, out_exp);
3737 val = deposit64(val, frac_size + exp_size, 1, sign);
3738 return val;
3739}
3740
3741static float16 frsqrt7_h(float16 f, float_status *s)
3742{
3743 int exp_size = 5, frac_size = 10;
3744 bool sign = float16_is_neg(f);
3745
3746 /*
3747 * frsqrt7(sNaN) = canonical NaN
3748 * frsqrt7(-inf) = canonical NaN
3749 * frsqrt7(-normal) = canonical NaN
3750 * frsqrt7(-subnormal) = canonical NaN
3751 */
3752 if (float16_is_signaling_nan(f, s) ||
c45eff30
WL
3753 (float16_is_infinity(f) && sign) ||
3754 (float16_is_normal(f) && sign) ||
3755 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
e848a1e5
FC
3756 s->float_exception_flags |= float_flag_invalid;
3757 return float16_default_nan(s);
3758 }
3759
3760 /* frsqrt7(qNaN) = canonical NaN */
3761 if (float16_is_quiet_nan(f, s)) {
3762 return float16_default_nan(s);
3763 }
3764
3765 /* frsqrt7(+-0) = +-inf */
3766 if (float16_is_zero(f)) {
3767 s->float_exception_flags |= float_flag_divbyzero;
3768 return float16_set_sign(float16_infinity, sign);
3769 }
3770
3771 /* frsqrt7(+inf) = +0 */
3772 if (float16_is_infinity(f) && !sign) {
3773 return float16_set_sign(float16_zero, sign);
3774 }
3775
3776 /* +normal, +subnormal */
3777 uint64_t val = frsqrt7(f, exp_size, frac_size);
3778 return make_float16(val);
3779}
3780
3781static float32 frsqrt7_s(float32 f, float_status *s)
3782{
3783 int exp_size = 8, frac_size = 23;
3784 bool sign = float32_is_neg(f);
3785
3786 /*
3787 * frsqrt7(sNaN) = canonical NaN
3788 * frsqrt7(-inf) = canonical NaN
3789 * frsqrt7(-normal) = canonical NaN
3790 * frsqrt7(-subnormal) = canonical NaN
3791 */
3792 if (float32_is_signaling_nan(f, s) ||
c45eff30
WL
3793 (float32_is_infinity(f) && sign) ||
3794 (float32_is_normal(f) && sign) ||
3795 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
e848a1e5
FC
3796 s->float_exception_flags |= float_flag_invalid;
3797 return float32_default_nan(s);
3798 }
3799
3800 /* frsqrt7(qNaN) = canonical NaN */
3801 if (float32_is_quiet_nan(f, s)) {
3802 return float32_default_nan(s);
3803 }
3804
3805 /* frsqrt7(+-0) = +-inf */
3806 if (float32_is_zero(f)) {
3807 s->float_exception_flags |= float_flag_divbyzero;
3808 return float32_set_sign(float32_infinity, sign);
3809 }
3810
3811 /* frsqrt7(+inf) = +0 */
3812 if (float32_is_infinity(f) && !sign) {
3813 return float32_set_sign(float32_zero, sign);
3814 }
3815
3816 /* +normal, +subnormal */
3817 uint64_t val = frsqrt7(f, exp_size, frac_size);
3818 return make_float32(val);
3819}
3820
3821static float64 frsqrt7_d(float64 f, float_status *s)
3822{
3823 int exp_size = 11, frac_size = 52;
3824 bool sign = float64_is_neg(f);
3825
3826 /*
3827 * frsqrt7(sNaN) = canonical NaN
3828 * frsqrt7(-inf) = canonical NaN
3829 * frsqrt7(-normal) = canonical NaN
3830 * frsqrt7(-subnormal) = canonical NaN
3831 */
3832 if (float64_is_signaling_nan(f, s) ||
c45eff30
WL
3833 (float64_is_infinity(f) && sign) ||
3834 (float64_is_normal(f) && sign) ||
3835 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
e848a1e5
FC
3836 s->float_exception_flags |= float_flag_invalid;
3837 return float64_default_nan(s);
3838 }
3839
3840 /* frsqrt7(qNaN) = canonical NaN */
3841 if (float64_is_quiet_nan(f, s)) {
3842 return float64_default_nan(s);
3843 }
3844
3845 /* frsqrt7(+-0) = +-inf */
3846 if (float64_is_zero(f)) {
3847 s->float_exception_flags |= float_flag_divbyzero;
3848 return float64_set_sign(float64_infinity, sign);
3849 }
3850
3851 /* frsqrt7(+inf) = +0 */
3852 if (float64_is_infinity(f) && !sign) {
3853 return float64_set_sign(float64_zero, sign);
3854 }
3855
3856 /* +normal, +subnormal */
3857 uint64_t val = frsqrt7(f, exp_size, frac_size);
3858 return make_float64(val);
3859}
3860
3861RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3862RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3863RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3864GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3865GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3866GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3867
55c35407
FC
3868/*
3869 * Vector Floating-Point Reciprocal Estimate Instruction
3870 *
3871 * Adapted from riscv-v-spec recip.c:
3872 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3873 */
3874static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3875 float_status *s)
3876{
3877 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3878 uint64_t exp = extract64(f, frac_size, exp_size);
3879 uint64_t frac = extract64(f, 0, frac_size);
3880
3881 const uint8_t lookup_table[] = {
3882 127, 125, 123, 121, 119, 117, 116, 114,
3883 112, 110, 109, 107, 105, 104, 102, 100,
3884 99, 97, 96, 94, 93, 91, 90, 88,
3885 87, 85, 84, 83, 81, 80, 79, 77,
3886 76, 75, 74, 72, 71, 70, 69, 68,
3887 66, 65, 64, 63, 62, 61, 60, 59,
3888 58, 57, 56, 55, 54, 53, 52, 51,
3889 50, 49, 48, 47, 46, 45, 44, 43,
3890 42, 41, 40, 40, 39, 38, 37, 36,
3891 35, 35, 34, 33, 32, 31, 31, 30,
3892 29, 28, 28, 27, 26, 25, 25, 24,
3893 23, 23, 22, 21, 21, 20, 19, 19,
3894 18, 17, 17, 16, 15, 15, 14, 14,
3895 13, 12, 12, 11, 11, 10, 9, 9,
3896 8, 8, 7, 7, 6, 5, 5, 4,
3897 4, 3, 3, 2, 2, 1, 1, 0
3898 };
3899 const int precision = 7;
3900
3901 if (exp == 0 && frac != 0) { /* subnormal */
3902 /* Normalize the subnormal. */
3903 while (extract64(frac, frac_size - 1, 1) == 0) {
3904 exp--;
3905 frac <<= 1;
3906 }
3907
3908 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3909
3910 if (exp != 0 && exp != UINT64_MAX) {
3911 /*
3912 * Overflow to inf or max value of same sign,
3913 * depending on sign and rounding mode.
3914 */
3915 s->float_exception_flags |= (float_flag_inexact |
3916 float_flag_overflow);
3917
3918 if ((s->float_rounding_mode == float_round_to_zero) ||
3919 ((s->float_rounding_mode == float_round_down) && !sign) ||
3920 ((s->float_rounding_mode == float_round_up) && sign)) {
3921 /* Return greatest/negative finite value. */
3922 return (sign << (exp_size + frac_size)) |
c45eff30 3923 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
55c35407
FC
3924 } else {
3925 /* Return +-inf. */
3926 return (sign << (exp_size + frac_size)) |
c45eff30 3927 MAKE_64BIT_MASK(frac_size, exp_size);
55c35407
FC
3928 }
3929 }
3930 }
3931
3932 int idx = frac >> (frac_size - precision);
3933 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3934 (frac_size - precision);
55c35407
FC
3935 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3936
3937 if (out_exp == 0 || out_exp == UINT64_MAX) {
3938 /*
3939 * The result is subnormal, but don't raise the underflow exception,
3940 * because there's no additional loss of precision.
3941 */
3942 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3943 if (out_exp == UINT64_MAX) {
3944 out_frac >>= 1;
3945 out_exp = 0;
3946 }
3947 }
3948
3949 uint64_t val = 0;
3950 val = deposit64(val, 0, frac_size, out_frac);
3951 val = deposit64(val, frac_size, exp_size, out_exp);
3952 val = deposit64(val, frac_size + exp_size, 1, sign);
3953 return val;
3954}
3955
3956static float16 frec7_h(float16 f, float_status *s)
3957{
3958 int exp_size = 5, frac_size = 10;
3959 bool sign = float16_is_neg(f);
3960
3961 /* frec7(+-inf) = +-0 */
3962 if (float16_is_infinity(f)) {
3963 return float16_set_sign(float16_zero, sign);
3964 }
3965
3966 /* frec7(+-0) = +-inf */
3967 if (float16_is_zero(f)) {
3968 s->float_exception_flags |= float_flag_divbyzero;
3969 return float16_set_sign(float16_infinity, sign);
3970 }
3971
3972 /* frec7(sNaN) = canonical NaN */
3973 if (float16_is_signaling_nan(f, s)) {
3974 s->float_exception_flags |= float_flag_invalid;
3975 return float16_default_nan(s);
3976 }
3977
3978 /* frec7(qNaN) = canonical NaN */
3979 if (float16_is_quiet_nan(f, s)) {
3980 return float16_default_nan(s);
3981 }
3982
3983 /* +-normal, +-subnormal */
3984 uint64_t val = frec7(f, exp_size, frac_size, s);
3985 return make_float16(val);
3986}
3987
3988static float32 frec7_s(float32 f, float_status *s)
3989{
3990 int exp_size = 8, frac_size = 23;
3991 bool sign = float32_is_neg(f);
3992
3993 /* frec7(+-inf) = +-0 */
3994 if (float32_is_infinity(f)) {
3995 return float32_set_sign(float32_zero, sign);
3996 }
3997
3998 /* frec7(+-0) = +-inf */
3999 if (float32_is_zero(f)) {
4000 s->float_exception_flags |= float_flag_divbyzero;
4001 return float32_set_sign(float32_infinity, sign);
4002 }
4003
4004 /* frec7(sNaN) = canonical NaN */
4005 if (float32_is_signaling_nan(f, s)) {
4006 s->float_exception_flags |= float_flag_invalid;
4007 return float32_default_nan(s);
4008 }
4009
4010 /* frec7(qNaN) = canonical NaN */
4011 if (float32_is_quiet_nan(f, s)) {
4012 return float32_default_nan(s);
4013 }
4014
4015 /* +-normal, +-subnormal */
4016 uint64_t val = frec7(f, exp_size, frac_size, s);
4017 return make_float32(val);
4018}
4019
4020static float64 frec7_d(float64 f, float_status *s)
4021{
4022 int exp_size = 11, frac_size = 52;
4023 bool sign = float64_is_neg(f);
4024
4025 /* frec7(+-inf) = +-0 */
4026 if (float64_is_infinity(f)) {
4027 return float64_set_sign(float64_zero, sign);
4028 }
4029
4030 /* frec7(+-0) = +-inf */
4031 if (float64_is_zero(f)) {
4032 s->float_exception_flags |= float_flag_divbyzero;
4033 return float64_set_sign(float64_infinity, sign);
4034 }
4035
4036 /* frec7(sNaN) = canonical NaN */
4037 if (float64_is_signaling_nan(f, s)) {
4038 s->float_exception_flags |= float_flag_invalid;
4039 return float64_default_nan(s);
4040 }
4041
4042 /* frec7(qNaN) = canonical NaN */
4043 if (float64_is_quiet_nan(f, s)) {
4044 return float64_default_nan(s);
4045 }
4046
4047 /* +-normal, +-subnormal */
4048 uint64_t val = frec7(f, exp_size, frac_size, s);
4049 return make_float64(val);
4050}
4051
4052RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4053RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4054RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 4055GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4056GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4057GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 4058
230b53dd 4059/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
4060RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4061RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4062RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 4063GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4064GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4065GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
4066RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4067RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4068RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 4069GEN_VEXT_VF(vfmin_vf_h, 2)
4070GEN_VEXT_VF(vfmin_vf_w, 4)
4071GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 4072
49c5611a
FC
4073RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4074RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4075RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 4076GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4077GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4078GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
4079RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4080RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4081RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 4082GEN_VEXT_VF(vfmax_vf_h, 2)
4083GEN_VEXT_VF(vfmax_vf_w, 4)
4084GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
4085
4086/* Vector Floating-Point Sign-Injection Instructions */
4087static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4088{
4089 return deposit64(b, 0, 15, a);
4090}
4091
4092static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4093{
4094 return deposit64(b, 0, 31, a);
4095}
4096
4097static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4098{
4099 return deposit64(b, 0, 63, a);
4100}
4101
4102RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4103RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4104RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 4105GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4106GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4107GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
4108RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4109RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4110RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 4111GEN_VEXT_VF(vfsgnj_vf_h, 2)
4112GEN_VEXT_VF(vfsgnj_vf_w, 4)
4113GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
4114
4115static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4116{
4117 return deposit64(~b, 0, 15, a);
4118}
4119
4120static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4121{
4122 return deposit64(~b, 0, 31, a);
4123}
4124
4125static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4126{
4127 return deposit64(~b, 0, 63, a);
4128}
4129
4130RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4131RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4132RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 4133GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4134GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4135GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
4136RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4137RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4138RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 4139GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4140GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4141GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
4142
4143static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4144{
4145 return deposit64(b ^ a, 0, 15, a);
4146}
4147
4148static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4149{
4150 return deposit64(b ^ a, 0, 31, a);
4151}
4152
4153static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4154{
4155 return deposit64(b ^ a, 0, 63, a);
4156}
4157
4158RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4159RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4160RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 4161GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4162GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4163GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
4164RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4165RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4166RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 4167GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4168GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4169GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
4170
4171/* Vector Floating-Point Compare Instructions */
4172#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4173void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4174 CPURISCVState *env, uint32_t desc) \
4175{ \
2a68e9e5
LZ
4176 uint32_t vm = vext_vm(desc); \
4177 uint32_t vl = env->vl; \
86247c51 4178 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5eacf7d8 4179 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4180 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4181 uint32_t i; \
4182 \
f714361e 4183 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4184 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4185 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4186 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4187 /* set masked-off elements to 1s */ \
4188 if (vma) { \
4189 vext_set_elem_mask(vd, i, 1); \
4190 } \
2a68e9e5
LZ
4191 continue; \
4192 } \
f9298de5 4193 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4194 DO_OP(s2, s1, &env->fp_status)); \
4195 } \
f714361e 4196 env->vstart = 0; \
3b57254d
WL
4197 /*
4198 * mask destination register are always tail-agnostic
4199 * set tail elements to 1s
4200 */ \
5eacf7d8 4201 if (vta_all_1s) { \
4202 for (; i < total_elems; i++) { \
4203 vext_set_elem_mask(vd, i, 1); \
4204 } \
4205 } \
2a68e9e5
LZ
4206}
4207
2a68e9e5
LZ
4208GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4209GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4210GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4211
4212#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4213void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4214 CPURISCVState *env, uint32_t desc) \
4215{ \
2a68e9e5
LZ
4216 uint32_t vm = vext_vm(desc); \
4217 uint32_t vl = env->vl; \
86247c51 4218 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5eacf7d8 4219 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4220 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4221 uint32_t i; \
4222 \
f714361e 4223 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4224 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4225 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4226 /* set masked-off elements to 1s */ \
4227 if (vma) { \
4228 vext_set_elem_mask(vd, i, 1); \
4229 } \
2a68e9e5
LZ
4230 continue; \
4231 } \
f9298de5 4232 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4233 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4234 } \
f714361e 4235 env->vstart = 0; \
3b57254d
WL
4236 /*
4237 * mask destination register are always tail-agnostic
4238 * set tail elements to 1s
4239 */ \
5eacf7d8 4240 if (vta_all_1s) { \
4241 for (; i < total_elems; i++) { \
4242 vext_set_elem_mask(vd, i, 1); \
4243 } \
4244 } \
2a68e9e5
LZ
4245}
4246
4247GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4248GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4249GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4250
4251static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4252{
4253 FloatRelation compare = float16_compare_quiet(a, b, s);
4254 return compare != float_relation_equal;
4255}
4256
4257static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4258{
4259 FloatRelation compare = float32_compare_quiet(a, b, s);
4260 return compare != float_relation_equal;
4261}
4262
4263static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4264{
4265 FloatRelation compare = float64_compare_quiet(a, b, s);
4266 return compare != float_relation_equal;
4267}
4268
4269GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4270GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4271GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4272GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4273GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4274GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4275
2a68e9e5
LZ
4276GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4277GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4278GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4279GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4280GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4281GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4282
2a68e9e5
LZ
4283GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4284GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4285GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4286GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4287GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4288GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4289
4290static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4291{
4292 FloatRelation compare = float16_compare(a, b, s);
4293 return compare == float_relation_greater;
4294}
4295
4296static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4297{
4298 FloatRelation compare = float32_compare(a, b, s);
4299 return compare == float_relation_greater;
4300}
4301
4302static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4303{
4304 FloatRelation compare = float64_compare(a, b, s);
4305 return compare == float_relation_greater;
4306}
4307
4308GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4309GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4310GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4311
4312static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4313{
4314 FloatRelation compare = float16_compare(a, b, s);
4315 return compare == float_relation_greater ||
4316 compare == float_relation_equal;
4317}
4318
4319static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4320{
4321 FloatRelation compare = float32_compare(a, b, s);
4322 return compare == float_relation_greater ||
4323 compare == float_relation_equal;
4324}
4325
4326static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4327{
4328 FloatRelation compare = float64_compare(a, b, s);
4329 return compare == float_relation_greater ||
4330 compare == float_relation_equal;
4331}
4332
4333GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4334GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4335GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4336
121ddbb3
LZ
4337/* Vector Floating-Point Classify Instruction */
4338#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4339static void do_##NAME(void *vd, void *vs2, int i) \
4340{ \
4341 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4342 *((TD *)vd + HD(i)) = OP(s2); \
4343}
4344
5eacf7d8 4345#define GEN_VEXT_V(NAME, ESZ) \
121ddbb3
LZ
4346void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4347 CPURISCVState *env, uint32_t desc) \
4348{ \
121ddbb3
LZ
4349 uint32_t vm = vext_vm(desc); \
4350 uint32_t vl = env->vl; \
5eacf7d8 4351 uint32_t total_elems = \
4352 vext_get_total_elems(env, desc, ESZ); \
4353 uint32_t vta = vext_vta(desc); \
5b448f44 4354 uint32_t vma = vext_vma(desc); \
121ddbb3
LZ
4355 uint32_t i; \
4356 \
f714361e 4357 for (i = env->vstart; i < vl; i++) { \
f9298de5 4358 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4359 /* set masked-off elements to 1s */ \
4360 vext_set_elems_1s(vd, vma, i * ESZ, \
4361 (i + 1) * ESZ); \
121ddbb3
LZ
4362 continue; \
4363 } \
4364 do_##NAME(vd, vs2, i); \
4365 } \
f714361e 4366 env->vstart = 0; \
5eacf7d8 4367 /* set tail elements to 1s */ \
4368 vext_set_elems_1s(vd, vta, vl * ESZ, \
4369 total_elems * ESZ); \
121ddbb3
LZ
4370}
4371
4372target_ulong fclass_h(uint64_t frs1)
4373{
4374 float16 f = frs1;
4375 bool sign = float16_is_neg(f);
4376
4377 if (float16_is_infinity(f)) {
4378 return sign ? 1 << 0 : 1 << 7;
4379 } else if (float16_is_zero(f)) {
4380 return sign ? 1 << 3 : 1 << 4;
4381 } else if (float16_is_zero_or_denormal(f)) {
4382 return sign ? 1 << 2 : 1 << 5;
4383 } else if (float16_is_any_nan(f)) {
4384 float_status s = { }; /* for snan_bit_is_one */
4385 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4386 } else {
4387 return sign ? 1 << 1 : 1 << 6;
4388 }
4389}
4390
4391target_ulong fclass_s(uint64_t frs1)
4392{
4393 float32 f = frs1;
4394 bool sign = float32_is_neg(f);
4395
4396 if (float32_is_infinity(f)) {
4397 return sign ? 1 << 0 : 1 << 7;
4398 } else if (float32_is_zero(f)) {
4399 return sign ? 1 << 3 : 1 << 4;
4400 } else if (float32_is_zero_or_denormal(f)) {
4401 return sign ? 1 << 2 : 1 << 5;
4402 } else if (float32_is_any_nan(f)) {
4403 float_status s = { }; /* for snan_bit_is_one */
4404 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4405 } else {
4406 return sign ? 1 << 1 : 1 << 6;
4407 }
4408}
4409
4410target_ulong fclass_d(uint64_t frs1)
4411{
4412 float64 f = frs1;
4413 bool sign = float64_is_neg(f);
4414
4415 if (float64_is_infinity(f)) {
4416 return sign ? 1 << 0 : 1 << 7;
4417 } else if (float64_is_zero(f)) {
4418 return sign ? 1 << 3 : 1 << 4;
4419 } else if (float64_is_zero_or_denormal(f)) {
4420 return sign ? 1 << 2 : 1 << 5;
4421 } else if (float64_is_any_nan(f)) {
4422 float_status s = { }; /* for snan_bit_is_one */
4423 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4424 } else {
4425 return sign ? 1 << 1 : 1 << 6;
4426 }
4427}
4428
4429RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4430RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4431RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4432GEN_VEXT_V(vfclass_v_h, 2)
4433GEN_VEXT_V(vfclass_v_w, 4)
4434GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4435
4436/* Vector Floating-Point Merge Instruction */
5eacf7d8 4437
3479a814 4438#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4439void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4440 CPURISCVState *env, uint32_t desc) \
4441{ \
64ab5846
LZ
4442 uint32_t vm = vext_vm(desc); \
4443 uint32_t vl = env->vl; \
5eacf7d8 4444 uint32_t esz = sizeof(ETYPE); \
4445 uint32_t total_elems = \
4446 vext_get_total_elems(env, desc, esz); \
4447 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4448 uint32_t i; \
4449 \
f714361e 4450 for (i = env->vstart; i < vl; i++) { \
64ab5846 4451 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
c45eff30
WL
4452 *((ETYPE *)vd + H(i)) = \
4453 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4454 } \
f714361e 4455 env->vstart = 0; \
5eacf7d8 4456 /* set tail elements to 1s */ \
4457 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4458}
4459
3479a814
FC
4460GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4461GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4462GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4463
4464/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4465/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4466RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4467RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4468RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4469GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4470GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4471GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4472
4473/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4474RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4475RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4476RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4477GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4478GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4479GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4480
4481/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4482RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4483RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4484RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4485GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4486GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4487GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4488
4489/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4490RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4491RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4492RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4493GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4494GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4495GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4496
4497/* Widening Floating-Point/Integer Type-Convert Instructions */
4498/* (TD, T2, TX2) */
3ce4c09d 4499#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4500#define WOP_UU_H uint32_t, uint16_t, uint16_t
4501#define WOP_UU_W uint64_t, uint32_t, uint32_t
3b57254d
WL
4502/*
4503 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4504 */
4514b7b1
LZ
4505RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4506RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4507GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4508GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4509
4510/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4511RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4512RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4513GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4514GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1 4515
246f8796
WL
4516/*
4517 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4518 */
3ce4c09d 4519RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4520RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4521RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4522GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4523GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4524GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4525
4526/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4527RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4528RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4529RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4530GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4531GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4532GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4533
4534/*
246f8796 4535 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4514b7b1
LZ
4536 */
4537static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4538{
4539 return float16_to_float32(a, true, s);
4540}
4541
4542RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4543RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4544GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4545GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e
LZ
4546
4547/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4548/* (TD, T2, TX2) */
ff679b58 4549#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4550#define NOP_UU_H uint16_t, uint32_t, uint32_t
4551#define NOP_UU_W uint32_t, uint64_t, uint64_t
4552/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4553RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4554RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4555RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4556GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4557GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4558GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4559
4560/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4561RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4562RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4563RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4564GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4565GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4566GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e 4567
246f8796
WL
4568/*
4569 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4570 */
ff679b58
FC
4571RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4572RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4573GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4574GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4575
4576/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4577RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4578RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4579GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4580GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4581
4582/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4583static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4584{
4585 return float32_to_float16(a, true, s);
4586}
4587
ff679b58
FC
4588RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4589RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4590GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4591GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1
LZ
4592
4593/*
3b57254d 4594 * Vector Reduction Operations
fe5c9ab1
LZ
4595 */
4596/* Vector Single-Width Integer Reduction Instructions */
3479a814 4597#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1 4598void HELPER(NAME)(void *vd, void *v0, void *vs1, \
c45eff30
WL
4599 void *vs2, CPURISCVState *env, \
4600 uint32_t desc) \
fe5c9ab1 4601{ \
fe5c9ab1
LZ
4602 uint32_t vm = vext_vm(desc); \
4603 uint32_t vl = env->vl; \
df4f52a7 4604 uint32_t esz = sizeof(TD); \
4605 uint32_t vlenb = simd_maxsz(desc); \
4606 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4607 uint32_t i; \
fe5c9ab1
LZ
4608 TD s1 = *((TD *)vs1 + HD(0)); \
4609 \
f714361e 4610 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4611 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4612 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4613 continue; \
4614 } \
4615 s1 = OP(s1, (TD)s2); \
4616 } \
4617 *((TD *)vd + HD(0)) = s1; \
f714361e 4618 env->vstart = 0; \
df4f52a7 4619 /* set tail elements to 1s */ \
4620 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4621}
4622
4623/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4624GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4625GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4626GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4627GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4628
4629/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4630GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4631GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4632GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4633GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4634
4635/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4636GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4637GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4638GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4639GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4640
4641/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4642GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4643GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4644GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4645GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4646
4647/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4648GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4649GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4650GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4651GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4652
4653/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4654GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4655GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4656GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4657GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4658
4659/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4660GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4661GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4662GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4663GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4664
4665/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4666GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4667GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4668GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4669GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4670
4671/* Vector Widening Integer Reduction Instructions */
4672/* signed sum reduction into double-width accumulator */
3479a814
FC
4673GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4674GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4675GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4676
4677/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4678GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4679GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4680GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4681
4682/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4683#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4684void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4685 void *vs2, CPURISCVState *env, \
4686 uint32_t desc) \
4687{ \
523547f1
LZ
4688 uint32_t vm = vext_vm(desc); \
4689 uint32_t vl = env->vl; \
df4f52a7 4690 uint32_t esz = sizeof(TD); \
4691 uint32_t vlenb = simd_maxsz(desc); \
4692 uint32_t vta = vext_vta(desc); \
523547f1 4693 uint32_t i; \
523547f1
LZ
4694 TD s1 = *((TD *)vs1 + HD(0)); \
4695 \
f714361e 4696 for (i = env->vstart; i < vl; i++) { \
523547f1 4697 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4698 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4699 continue; \
4700 } \
4701 s1 = OP(s1, (TD)s2, &env->fp_status); \
4702 } \
4703 *((TD *)vd + HD(0)) = s1; \
f714361e 4704 env->vstart = 0; \
df4f52a7 4705 /* set tail elements to 1s */ \
4706 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4707}
4708
4709/* Unordered sum */
a3ab69f9
YL
4710GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4711GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4712GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4713
4714/* Ordered sum */
4715GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4716GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4717GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4718
4719/* Maximum value */
246f8796
WL
4720GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4721 float16_maximum_number)
4722GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4723 float32_maximum_number)
4724GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4725 float64_maximum_number)
523547f1
LZ
4726
4727/* Minimum value */
246f8796
WL
4728GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4729 float16_minimum_number)
4730GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4731 float32_minimum_number)
4732GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4733 float64_minimum_number)
696b0c26 4734
5bda21c0
YL
4735/* Vector Widening Floating-Point Add Instructions */
4736static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
696b0c26 4737{
5bda21c0 4738 return float32_add(a, float16_to_float32(b, true, s), s);
696b0c26
LZ
4739}
4740
5bda21c0 4741static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
696b0c26 4742{
5bda21c0 4743 return float64_add(a, float32_to_float64(b, s), s);
696b0c26 4744}
c21f34ae 4745
5bda21c0 4746/* Vector Widening Floating-Point Reduction Instructions */
a3ab69f9
YL
4747/* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4748GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4749GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4750GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4751GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
5bda21c0 4752
c21f34ae 4753/*
3b57254d 4754 * Vector Mask Operations
c21f34ae
LZ
4755 */
4756/* Vector Mask-Register Logical Instructions */
4757#define GEN_VEXT_MASK_VV(NAME, OP) \
4758void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4759 void *vs2, CPURISCVState *env, \
4760 uint32_t desc) \
4761{ \
c21f34ae 4762 uint32_t vl = env->vl; \
86247c51 4763 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
acc6ffd4 4764 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4765 uint32_t i; \
4766 int a, b; \
4767 \
f714361e 4768 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4769 a = vext_elem_mask(vs1, i); \
4770 b = vext_elem_mask(vs2, i); \
4771 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4772 } \
f714361e 4773 env->vstart = 0; \
3b57254d
WL
4774 /*
4775 * mask destination register are always tail-agnostic
4776 * set tail elements to 1s
acc6ffd4 4777 */ \
acc6ffd4 4778 if (vta_all_1s) { \
4779 for (; i < total_elems; i++) { \
4780 vext_set_elem_mask(vd, i, 1); \
4781 } \
4782 } \
c21f34ae
LZ
4783}
4784
4785#define DO_NAND(N, M) (!(N & M))
4786#define DO_ANDNOT(N, M) (N & !M)
4787#define DO_NOR(N, M) (!(N | M))
4788#define DO_ORNOT(N, M) (N | !M)
4789#define DO_XNOR(N, M) (!(N ^ M))
4790
4791GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4792GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4793GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4794GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4795GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4796GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4797GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4798GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4799
0014aa74
FC
4800/* Vector count population in mask vcpop */
4801target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4802 uint32_t desc)
2e88f551
LZ
4803{
4804 target_ulong cnt = 0;
2e88f551
LZ
4805 uint32_t vm = vext_vm(desc);
4806 uint32_t vl = env->vl;
4807 int i;
4808
f714361e 4809 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4810 if (vm || vext_elem_mask(v0, i)) {
4811 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4812 cnt++;
4813 }
4814 }
4815 }
f714361e 4816 env->vstart = 0;
2e88f551
LZ
4817 return cnt;
4818}
0db67e1c 4819
3b57254d 4820/* vfirst find-first-set mask bit */
d71a24fc
FC
4821target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4822 uint32_t desc)
0db67e1c 4823{
0db67e1c
LZ
4824 uint32_t vm = vext_vm(desc);
4825 uint32_t vl = env->vl;
4826 int i;
4827
f714361e 4828 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4829 if (vm || vext_elem_mask(v0, i)) {
4830 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4831 return i;
4832 }
4833 }
4834 }
f714361e 4835 env->vstart = 0;
0db67e1c
LZ
4836 return -1LL;
4837}
81fbf7da
LZ
4838
4839enum set_mask_type {
4840 ONLY_FIRST = 1,
4841 INCLUDE_FIRST,
4842 BEFORE_FIRST,
4843};
4844
4845static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4846 uint32_t desc, enum set_mask_type type)
4847{
81fbf7da
LZ
4848 uint32_t vm = vext_vm(desc);
4849 uint32_t vl = env->vl;
86247c51 4850 uint32_t total_elems = riscv_cpu_cfg(env)->vlen;
acc6ffd4 4851 uint32_t vta_all_1s = vext_vta_all_1s(desc);
35f2d795 4852 uint32_t vma = vext_vma(desc);
81fbf7da
LZ
4853 int i;
4854 bool first_mask_bit = false;
4855
f714361e 4856 for (i = env->vstart; i < vl; i++) {
f9298de5 4857 if (!vm && !vext_elem_mask(v0, i)) {
35f2d795
YTC
4858 /* set masked-off elements to 1s */
4859 if (vma) {
4860 vext_set_elem_mask(vd, i, 1);
4861 }
81fbf7da
LZ
4862 continue;
4863 }
4864 /* write a zero to all following active elements */
4865 if (first_mask_bit) {
f9298de5 4866 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4867 continue;
4868 }
f9298de5 4869 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4870 first_mask_bit = true;
4871 if (type == BEFORE_FIRST) {
f9298de5 4872 vext_set_elem_mask(vd, i, 0);
81fbf7da 4873 } else {
f9298de5 4874 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4875 }
4876 } else {
4877 if (type == ONLY_FIRST) {
f9298de5 4878 vext_set_elem_mask(vd, i, 0);
81fbf7da 4879 } else {
f9298de5 4880 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4881 }
4882 }
4883 }
f714361e 4884 env->vstart = 0;
3b57254d
WL
4885 /*
4886 * mask destination register are always tail-agnostic
4887 * set tail elements to 1s
4888 */
acc6ffd4 4889 if (vta_all_1s) {
4890 for (; i < total_elems; i++) {
4891 vext_set_elem_mask(vd, i, 1);
4892 }
4893 }
81fbf7da
LZ
4894}
4895
4896void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4897 uint32_t desc)
4898{
4899 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4900}
4901
4902void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4903 uint32_t desc)
4904{
4905 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4906}
4907
4908void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4909 uint32_t desc)
4910{
4911 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4912}
78d90cfe
LZ
4913
4914/* Vector Iota Instruction */
3479a814 4915#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4916void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4917 uint32_t desc) \
4918{ \
78d90cfe
LZ
4919 uint32_t vm = vext_vm(desc); \
4920 uint32_t vl = env->vl; \
acc6ffd4 4921 uint32_t esz = sizeof(ETYPE); \
4922 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4923 uint32_t vta = vext_vta(desc); \
35f2d795 4924 uint32_t vma = vext_vma(desc); \
78d90cfe
LZ
4925 uint32_t sum = 0; \
4926 int i; \
4927 \
f714361e 4928 for (i = env->vstart; i < vl; i++) { \
f9298de5 4929 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4930 /* set masked-off elements to 1s */ \
4931 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
78d90cfe
LZ
4932 continue; \
4933 } \
4934 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4935 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4936 sum++; \
4937 } \
4938 } \
f714361e 4939 env->vstart = 0; \
acc6ffd4 4940 /* set tail elements to 1s */ \
4941 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4942}
4943
3479a814
FC
4944GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4945GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4946GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4947GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4948
4949/* Vector Element Index Instruction */
3479a814 4950#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4951void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4952{ \
126bec3f
LZ
4953 uint32_t vm = vext_vm(desc); \
4954 uint32_t vl = env->vl; \
acc6ffd4 4955 uint32_t esz = sizeof(ETYPE); \
4956 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4957 uint32_t vta = vext_vta(desc); \
35f2d795 4958 uint32_t vma = vext_vma(desc); \
126bec3f
LZ
4959 int i; \
4960 \
f714361e 4961 for (i = env->vstart; i < vl; i++) { \
f9298de5 4962 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4963 /* set masked-off elements to 1s */ \
4964 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
126bec3f
LZ
4965 continue; \
4966 } \
4967 *((ETYPE *)vd + H(i)) = i; \
4968 } \
f714361e 4969 env->vstart = 0; \
acc6ffd4 4970 /* set tail elements to 1s */ \
4971 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4972}
4973
3479a814
FC
4974GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4975GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4976GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4977GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4978
4979/*
3b57254d 4980 * Vector Permutation Instructions
ec17e036
LZ
4981 */
4982
4983/* Vector Slide Instructions */
3479a814 4984#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4985void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4986 CPURISCVState *env, uint32_t desc) \
4987{ \
ec17e036
LZ
4988 uint32_t vm = vext_vm(desc); \
4989 uint32_t vl = env->vl; \
803963f7 4990 uint32_t esz = sizeof(ETYPE); \
4991 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4992 uint32_t vta = vext_vta(desc); \
edabcd0e 4993 uint32_t vma = vext_vma(desc); \
f714361e 4994 target_ulong offset = s1, i_min, i; \
ec17e036 4995 \
f714361e
FC
4996 i_min = MAX(env->vstart, offset); \
4997 for (i = i_min; i < vl; i++) { \
f9298de5 4998 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4999 /* set masked-off elements to 1s */ \
5000 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
ec17e036
LZ
5001 continue; \
5002 } \
5003 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5004 } \
803963f7 5005 /* set tail elements to 1s */ \
5006 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
5007}
5008
5009/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
5010GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5011GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5012GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5013GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 5014
3479a814 5015#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
5016void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5017 CPURISCVState *env, uint32_t desc) \
5018{ \
6438ed61 5019 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
5020 uint32_t vm = vext_vm(desc); \
5021 uint32_t vl = env->vl; \
803963f7 5022 uint32_t esz = sizeof(ETYPE); \
5023 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5024 uint32_t vta = vext_vta(desc); \
edabcd0e 5025 uint32_t vma = vext_vma(desc); \
6438ed61 5026 target_ulong i_max, i; \
ec17e036 5027 \
f714361e
FC
5028 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
5029 for (i = env->vstart; i < i_max; ++i) { \
edabcd0e
YTC
5030 if (!vm && !vext_elem_mask(v0, i)) { \
5031 /* set masked-off elements to 1s */ \
5032 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5033 continue; \
6438ed61 5034 } \
edabcd0e 5035 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
6438ed61
FC
5036 } \
5037 \
5038 for (i = i_max; i < vl; ++i) { \
5039 if (vm || vext_elem_mask(v0, i)) { \
5040 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 5041 } \
ec17e036 5042 } \
f714361e
FC
5043 \
5044 env->vstart = 0; \
803963f7 5045 /* set tail elements to 1s */ \
5046 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
5047}
5048
5049/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
5050GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5051GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5052GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5053GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 5054
c7b8a421 5055#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
8c89d50c 5056static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
5057 void *vs2, CPURISCVState *env, \
5058 uint32_t desc) \
8500d4ab 5059{ \
c7b8a421 5060 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5061 uint32_t vm = vext_vm(desc); \
5062 uint32_t vl = env->vl; \
803963f7 5063 uint32_t esz = sizeof(ETYPE); \
5064 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5065 uint32_t vta = vext_vta(desc); \
edabcd0e 5066 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
5067 uint32_t i; \
5068 \
f714361e 5069 for (i = env->vstart; i < vl; i++) { \
8500d4ab 5070 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5071 /* set masked-off elements to 1s */ \
5072 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
5073 continue; \
5074 } \
5075 if (i == 0) { \
5076 *((ETYPE *)vd + H(i)) = s1; \
5077 } else { \
5078 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5079 } \
5080 } \
f714361e 5081 env->vstart = 0; \
803963f7 5082 /* set tail elements to 1s */ \
5083 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5084}
5085
5086GEN_VEXT_VSLIE1UP(8, H1)
5087GEN_VEXT_VSLIE1UP(16, H2)
5088GEN_VEXT_VSLIE1UP(32, H4)
5089GEN_VEXT_VSLIE1UP(64, H8)
5090
c7b8a421 5091#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
5092void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5093 CPURISCVState *env, uint32_t desc) \
5094{ \
c7b8a421 5095 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5096}
5097
5098/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
5099GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5100GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5101GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5102GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5103
c7b8a421 5104#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
8c89d50c 5105static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
5106 void *vs2, CPURISCVState *env, \
5107 uint32_t desc) \
8500d4ab 5108{ \
c7b8a421 5109 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5110 uint32_t vm = vext_vm(desc); \
5111 uint32_t vl = env->vl; \
803963f7 5112 uint32_t esz = sizeof(ETYPE); \
5113 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5114 uint32_t vta = vext_vta(desc); \
edabcd0e 5115 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
5116 uint32_t i; \
5117 \
f714361e 5118 for (i = env->vstart; i < vl; i++) { \
8500d4ab 5119 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5120 /* set masked-off elements to 1s */ \
5121 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
5122 continue; \
5123 } \
5124 if (i == vl - 1) { \
5125 *((ETYPE *)vd + H(i)) = s1; \
5126 } else { \
5127 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5128 } \
5129 } \
f714361e 5130 env->vstart = 0; \
803963f7 5131 /* set tail elements to 1s */ \
5132 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5133}
5134
5135GEN_VEXT_VSLIDE1DOWN(8, H1)
5136GEN_VEXT_VSLIDE1DOWN(16, H2)
5137GEN_VEXT_VSLIDE1DOWN(32, H4)
5138GEN_VEXT_VSLIDE1DOWN(64, H8)
5139
c7b8a421 5140#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
5141void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5142 CPURISCVState *env, uint32_t desc) \
5143{ \
c7b8a421 5144 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5145}
5146
5147/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
5148GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5149GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5150GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5151GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5152
5153/* Vector Floating-Point Slide Instructions */
c7b8a421 5154#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
5155void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5156 CPURISCVState *env, uint32_t desc) \
5157{ \
c7b8a421 5158 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5159}
5160
5161/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5162GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5163GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5164GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5165
c7b8a421 5166#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
5167void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5168 CPURISCVState *env, uint32_t desc) \
5169{ \
c7b8a421 5170 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5171}
5172
5173/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5174GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5175GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5176GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
5177
5178/* Vector Register Gather Instruction */
50bfb45b 5179#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
5180void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5181 CPURISCVState *env, uint32_t desc) \
5182{ \
f714361e 5183 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
5184 uint32_t vm = vext_vm(desc); \
5185 uint32_t vl = env->vl; \
803963f7 5186 uint32_t esz = sizeof(TS2); \
5187 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5188 uint32_t vta = vext_vta(desc); \
edabcd0e 5189 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5190 uint64_t index; \
5191 uint32_t i; \
e4b83d5c 5192 \
f714361e 5193 for (i = env->vstart; i < vl; i++) { \
f9298de5 5194 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5195 /* set masked-off elements to 1s */ \
5196 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5197 continue; \
5198 } \
50bfb45b 5199 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 5200 if (index >= vlmax) { \
50bfb45b 5201 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 5202 } else { \
50bfb45b 5203 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
5204 } \
5205 } \
f714361e 5206 env->vstart = 0; \
803963f7 5207 /* set tail elements to 1s */ \
5208 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5209}
5210
5211/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
5212GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5213GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5214GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5215GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5216
5217GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5218GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5219GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5220GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5221
3479a814 5222#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5223void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5224 CPURISCVState *env, uint32_t desc) \
5225{ \
5a9f8e15 5226 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5227 uint32_t vm = vext_vm(desc); \
5228 uint32_t vl = env->vl; \
803963f7 5229 uint32_t esz = sizeof(ETYPE); \
5230 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5231 uint32_t vta = vext_vta(desc); \
edabcd0e 5232 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5233 uint64_t index = s1; \
5234 uint32_t i; \
e4b83d5c 5235 \
f714361e 5236 for (i = env->vstart; i < vl; i++) { \
f9298de5 5237 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5238 /* set masked-off elements to 1s */ \
5239 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5240 continue; \
5241 } \
5242 if (index >= vlmax) { \
5243 *((ETYPE *)vd + H(i)) = 0; \
5244 } else { \
5245 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5246 } \
5247 } \
f714361e 5248 env->vstart = 0; \
803963f7 5249 /* set tail elements to 1s */ \
5250 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5251}
5252
5253/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5254GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5255GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5256GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5257GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5258
5259/* Vector Compress Instruction */
3479a814 5260#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5261void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5262 CPURISCVState *env, uint32_t desc) \
5263{ \
31bf42a2 5264 uint32_t vl = env->vl; \
803963f7 5265 uint32_t esz = sizeof(ETYPE); \
5266 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5267 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5268 uint32_t num = 0, i; \
5269 \
f714361e 5270 for (i = env->vstart; i < vl; i++) { \
f9298de5 5271 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5272 continue; \
5273 } \
5274 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5275 num++; \
5276 } \
f714361e 5277 env->vstart = 0; \
803963f7 5278 /* set tail elements to 1s */ \
5279 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5280}
5281
5282/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5283GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5284GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5285GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5286GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5287
f714361e 5288/* Vector Whole Register Move */
f32d82f6
WL
5289void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5290{
f06193c4 5291 /* EEW = SEW */
f32d82f6 5292 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5293 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5294 uint32_t startb = env->vstart * sewb;
5295 uint32_t i = startb;
f32d82f6
WL
5296
5297 memcpy((uint8_t *)vd + H1(i),
5298 (uint8_t *)vs2 + H1(i),
f06193c4 5299 maxsz - startb);
f714361e 5300
f32d82f6
WL
5301 env->vstart = 0;
5302}
f714361e 5303
cd01340e
FC
5304/* Vector Integer Extension */
5305#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5306void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5307 CPURISCVState *env, uint32_t desc) \
5308{ \
5309 uint32_t vl = env->vl; \
5310 uint32_t vm = vext_vm(desc); \
803963f7 5311 uint32_t esz = sizeof(ETYPE); \
5312 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5313 uint32_t vta = vext_vta(desc); \
edabcd0e 5314 uint32_t vma = vext_vma(desc); \
cd01340e
FC
5315 uint32_t i; \
5316 \
f714361e 5317 for (i = env->vstart; i < vl; i++) { \
cd01340e 5318 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5319 /* set masked-off elements to 1s */ \
5320 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
cd01340e
FC
5321 continue; \
5322 } \
5323 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5324 } \
f714361e 5325 env->vstart = 0; \
803963f7 5326 /* set tail elements to 1s */ \
5327 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5328}
5329
5330GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5331GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5332GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5333GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5334GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5335GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5336
5337GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5338GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5339GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5340GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5341GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5342GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)