]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
Merge patch series "target/riscv: some vector_helper.c cleanups"
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc
LZ
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
ce2a0343 26#include "fpu/softfloat.h"
751538d5
LZ
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
2b7168fc
LZ
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 44
d9b7609a
FC
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
2b7168fc 57 /* only set vill bit. */
d96a271a
LZ
58 env->vill = 1;
59 env->vtype = 0;
2b7168fc
LZ
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
ac6bcf4d 74 env->vill = 0;
2b7168fc
LZ
75 return vl;
76}
751538d5
LZ
77
78/*
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
81 */
e03b5686 82#if HOST_BIG_ENDIAN
751538d5
LZ
83#define H1(x) ((x) ^ 7)
84#define H1_2(x) ((x) ^ 6)
85#define H1_4(x) ((x) ^ 4)
86#define H2(x) ((x) ^ 3)
87#define H4(x) ((x) ^ 1)
88#define H8(x) ((x))
89#else
90#define H1(x) (x)
91#define H1_2(x) (x)
92#define H1_4(x) (x)
93#define H2(x) (x)
94#define H4(x) (x)
95#define H8(x) (x)
96#endif
97
98static inline uint32_t vext_nf(uint32_t desc)
99{
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101}
102
751538d5
LZ
103static inline uint32_t vext_vm(uint32_t desc)
104{
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106}
107
33f1beaf
FC
108/*
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
119 */
120static inline int32_t vext_lmul(uint32_t desc)
751538d5 121{
33f1beaf 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
123}
124
f1eed927 125static inline uint32_t vext_vta(uint32_t desc)
126{
127 return FIELD_EX32(simd_data(desc), VDATA, VTA);
128}
129
355d5584
YTC
130static inline uint32_t vext_vma(uint32_t desc)
131{
132 return FIELD_EX32(simd_data(desc), VDATA, VMA);
133}
134
5c19fc15 135static inline uint32_t vext_vta_all_1s(uint32_t desc)
136{
137 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
138}
139
751538d5 140/*
5a9f8e15 141 * Get the maximum number of elements can be operated.
751538d5 142 *
c7b8a421 143 * log2_esz: log2 of element size in bytes.
751538d5 144 */
c7b8a421 145static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 146{
5a9f8e15 147 /*
8a4b5257 148 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
149 * so vlen in bytes (vlenb) is encoded as maxsz.
150 */
151 uint32_t vlenb = simd_maxsz(desc);
152
153 /* Return VLMAX */
c7b8a421 154 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 155 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
156}
157
f1eed927 158/*
159 * Get number of total elements, including prestart, body and tail elements.
160 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
161 * are held in the same vector register.
162 */
163static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
164 uint32_t esz)
165{
166 uint32_t vlenb = simd_maxsz(desc);
167 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
168 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
169 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
170 return (vlenb << emul) / esz;
171}
172
d6b9d930
LZ
173static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
174{
175 return (addr & env->cur_pmmask) | env->cur_pmbase;
176}
177
751538d5
LZ
178/*
179 * This function checks watchpoint before real load operation.
180 *
181 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
182 * In user mode, there is no watchpoint support now.
183 *
184 * It will trigger an exception if there is no mapping in TLB
185 * and page table walk can't fill the TLB entry. Then the guest
186 * software can return here after process the exception or never return.
187 */
188static void probe_pages(CPURISCVState *env, target_ulong addr,
189 target_ulong len, uintptr_t ra,
190 MMUAccessType access_type)
191{
192 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
193 target_ulong curlen = MIN(pagelen, len);
194
d6b9d930 195 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
196 cpu_mmu_index(env, false), ra);
197 if (len > curlen) {
198 addr += curlen;
199 curlen = len - curlen;
d6b9d930 200 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
201 cpu_mmu_index(env, false), ra);
202 }
203}
204
f1eed927 205/* set agnostic elements to 1s */
206static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
207 uint32_t tot)
208{
209 if (is_agnostic == 0) {
210 /* policy undisturbed */
211 return;
212 }
213 if (tot - cnt == 0) {
c1dadb84 214 return;
f1eed927 215 }
216 memset(base + cnt, -1, tot - cnt);
217}
218
f9298de5
FC
219static inline void vext_set_elem_mask(void *v0, int index,
220 uint8_t value)
3a6f8f68 221{
f9298de5
FC
222 int idx = index / 64;
223 int pos = index % 64;
3a6f8f68 224 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 225 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 226}
751538d5 227
f9298de5
FC
228/*
229 * Earlier designs (pre-0.9) had a varying number of bits
230 * per mask value (MLEN). In the 0.9 design, MLEN=1.
231 * (Section 4.5)
232 */
233static inline int vext_elem_mask(void *v0, int index)
751538d5 234{
f9298de5
FC
235 int idx = index / 64;
236 int pos = index % 64;
751538d5
LZ
237 return (((uint64_t *)v0)[idx] >> pos) & 1;
238}
239
240/* elements operations for load and store */
241typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
242 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 243
79556fb6 244#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
245static void NAME(CPURISCVState *env, abi_ptr addr, \
246 uint32_t idx, void *vd, uintptr_t retaddr)\
247{ \
751538d5 248 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 249 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
250} \
251
79556fb6
FC
252GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
253GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
254GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
255GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
256
257#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
258static void NAME(CPURISCVState *env, abi_ptr addr, \
259 uint32_t idx, void *vd, uintptr_t retaddr)\
260{ \
261 ETYPE data = *((ETYPE *)vd + H(idx)); \
262 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
263}
264
751538d5
LZ
265GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
266GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
267GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
268GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
269
e130683f
DHB
270static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl,
271 void *vd, uint32_t desc, uint32_t nf,
272 uint32_t esz, uint32_t max_elems)
273{
274 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
86247c51 275 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
e130683f
DHB
276 uint32_t vta = vext_vta(desc);
277 uint32_t registers_used;
278 int k;
279
280 for (k = 0; k < nf; ++k) {
281 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
282 (k * max_elems + max_elems) * esz);
283 }
284
285 if (nf * max_elems % total_elems != 0) {
286 registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
287 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
288 registers_used * vlenb);
289 }
290}
291
751538d5
LZ
292/*
293 *** stride: access vector element from strided memory
294 */
295static void
296vext_ldst_stride(void *vd, void *v0, target_ulong base,
297 target_ulong stride, CPURISCVState *env,
298 uint32_t desc, uint32_t vm,
3479a814 299 vext_ldst_elem_fn *ldst_elem,
c7b8a421 300 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
301{
302 uint32_t i, k;
303 uint32_t nf = vext_nf(desc);
c7b8a421 304 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 305 uint32_t esz = 1 << log2_esz;
265ecd4c 306 uint32_t vma = vext_vma(desc);
751538d5 307
f714361e 308 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
751538d5 309 k = 0;
751538d5 310 while (k < nf) {
265ecd4c
YTC
311 if (!vm && !vext_elem_mask(v0, i)) {
312 /* set masked-off elements to 1s */
313 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
314 (i + k * max_elems + 1) * esz);
315 k++;
316 continue;
317 }
c7b8a421 318 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 319 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
320 k++;
321 }
322 }
f714361e 323 env->vstart = 0;
e130683f
DHB
324
325 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
751538d5
LZ
326}
327
79556fb6 328#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
329void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
330 target_ulong stride, CPURISCVState *env, \
331 uint32_t desc) \
332{ \
333 uint32_t vm = vext_vm(desc); \
334 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 335 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
336}
337
79556fb6
FC
338GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
339GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
340GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
341GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
342
343#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
344void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
345 target_ulong stride, CPURISCVState *env, \
346 uint32_t desc) \
347{ \
348 uint32_t vm = vext_vm(desc); \
349 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 350 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
351}
352
79556fb6
FC
353GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
354GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
355GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
356GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
357
358/*
359 *** unit-stride: access elements stored contiguously in memory
360 */
361
362/* unmasked unit-stride load and store operation*/
363static void
364vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 365 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 366 uintptr_t ra)
751538d5
LZ
367{
368 uint32_t i, k;
369 uint32_t nf = vext_nf(desc);
c7b8a421 370 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 371 uint32_t esz = 1 << log2_esz;
751538d5 372
751538d5 373 /* load bytes from guest memory */
5c89e9c0 374 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
375 k = 0;
376 while (k < nf) {
c7b8a421 377 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 378 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
379 k++;
380 }
381 }
f714361e 382 env->vstart = 0;
e130683f
DHB
383
384 vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems);
751538d5
LZ
385}
386
387/*
388 * masked unit-stride load and store operation will be a special case of stride,
389 * stride = NF * sizeof (MTYPE)
390 */
391
79556fb6 392#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
393void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
394 CPURISCVState *env, uint32_t desc) \
395{ \
5a9f8e15 396 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 397 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 398 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
399} \
400 \
401void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
402 CPURISCVState *env, uint32_t desc) \
403{ \
3479a814 404 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 405 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
406}
407
79556fb6
FC
408GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
409GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
410GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
411GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
412
5c89e9c0
FC
413#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
414void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
415 CPURISCVState *env, uint32_t desc) \
416{ \
417 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
418 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 419 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
420} \
421 \
422void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
423 CPURISCVState *env, uint32_t desc) \
424{ \
425 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 426 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
427}
428
79556fb6
FC
429GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
430GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
431GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
432GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 433
26086aea
FC
434/*
435 *** unit stride mask load and store, EEW = 1
436 */
437void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
438 CPURISCVState *env, uint32_t desc)
439{
440 /* evl = ceil(vl/8) */
441 uint8_t evl = (env->vl + 7) >> 3;
442 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 443 0, evl, GETPC());
26086aea
FC
444}
445
446void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
447 CPURISCVState *env, uint32_t desc)
448{
449 /* evl = ceil(vl/8) */
450 uint8_t evl = (env->vl + 7) >> 3;
451 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 452 0, evl, GETPC());
26086aea
FC
453}
454
f732560e
LZ
455/*
456 *** index: access vector element from indexed memory
457 */
458typedef target_ulong vext_get_index_addr(target_ulong base,
459 uint32_t idx, void *vs2);
460
461#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
462static target_ulong NAME(target_ulong base, \
463 uint32_t idx, void *vs2) \
464{ \
465 return (base + *((ETYPE *)vs2 + H(idx))); \
466}
467
83fcd573
FC
468GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
469GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
470GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
471GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
472
473static inline void
474vext_ldst_index(void *vd, void *v0, target_ulong base,
475 void *vs2, CPURISCVState *env, uint32_t desc,
476 vext_get_index_addr get_index_addr,
477 vext_ldst_elem_fn *ldst_elem,
c7b8a421 478 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
479{
480 uint32_t i, k;
481 uint32_t nf = vext_nf(desc);
482 uint32_t vm = vext_vm(desc);
c7b8a421 483 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 484 uint32_t esz = 1 << log2_esz;
265ecd4c 485 uint32_t vma = vext_vma(desc);
f732560e 486
f732560e 487 /* load bytes from guest memory */
f714361e 488 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f714361e 489 k = 0;
f732560e 490 while (k < nf) {
265ecd4c
YTC
491 if (!vm && !vext_elem_mask(v0, i)) {
492 /* set masked-off elements to 1s */
493 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
494 (i + k * max_elems + 1) * esz);
495 k++;
496 continue;
497 }
c7b8a421 498 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 499 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
500 k++;
501 }
502 }
f714361e 503 env->vstart = 0;
e130683f
DHB
504
505 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
f732560e
LZ
506}
507
08b9d0ed 508#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
509void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
510 void *vs2, CPURISCVState *env, uint32_t desc) \
511{ \
512 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 513 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
514}
515
08b9d0ed
FC
516GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
517GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
518GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
519GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
520GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
521GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
522GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
523GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
524GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
525GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
526GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
527GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
528GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
529GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
530GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
531GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
532
533#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
534void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
535 void *vs2, CPURISCVState *env, uint32_t desc) \
536{ \
537 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 538 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 539 GETPC()); \
f732560e
LZ
540}
541
08b9d0ed
FC
542GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
543GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
544GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
545GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
546GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
547GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
548GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
549GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
550GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
551GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
552GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
553GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
554GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
555GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
556GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
557GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
558
559/*
560 *** unit-stride fault-only-fisrt load instructions
561 */
562static inline void
563vext_ldff(void *vd, void *v0, target_ulong base,
564 CPURISCVState *env, uint32_t desc,
565 vext_ldst_elem_fn *ldst_elem,
c7b8a421 566 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
567{
568 void *host;
569 uint32_t i, k, vl = 0;
022b4ecf
LZ
570 uint32_t nf = vext_nf(desc);
571 uint32_t vm = vext_vm(desc);
c7b8a421 572 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 573 uint32_t esz = 1 << log2_esz;
265ecd4c 574 uint32_t vma = vext_vma(desc);
022b4ecf
LZ
575 target_ulong addr, offset, remain;
576
577 /* probe every access*/
f714361e 578 for (i = env->vstart; i < env->vl; i++) {
f9298de5 579 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
580 continue;
581 }
c7b8a421 582 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 583 if (i == 0) {
c7b8a421 584 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
585 } else {
586 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 587 remain = nf << log2_esz;
022b4ecf
LZ
588 while (remain > 0) {
589 offset = -(addr | TARGET_PAGE_MASK);
590 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
591 cpu_mmu_index(env, false));
592 if (host) {
593#ifdef CONFIG_USER_ONLY
01d09525 594 if (page_check_range(addr, offset, PAGE_READ) < 0) {
022b4ecf
LZ
595 vl = i;
596 goto ProbeSuccess;
597 }
598#else
01d09525 599 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
600#endif
601 } else {
602 vl = i;
603 goto ProbeSuccess;
604 }
605 if (remain <= offset) {
606 break;
607 }
608 remain -= offset;
d6b9d930 609 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
610 }
611 }
612 }
613ProbeSuccess:
614 /* load bytes from guest memory */
615 if (vl != 0) {
616 env->vl = vl;
617 }
f714361e 618 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 619 k = 0;
022b4ecf 620 while (k < nf) {
265ecd4c
YTC
621 if (!vm && !vext_elem_mask(v0, i)) {
622 /* set masked-off elements to 1s */
623 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
624 (i + k * max_elems + 1) * esz);
625 k++;
626 continue;
627 }
c7b8a421 628 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 629 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
630 k++;
631 }
632 }
f714361e 633 env->vstart = 0;
e130683f
DHB
634
635 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
022b4ecf
LZ
636}
637
d3e5e2ff
FC
638#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
639void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
640 CPURISCVState *env, uint32_t desc) \
641{ \
642 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 643 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
644}
645
d3e5e2ff
FC
646GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
647GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
648GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
649GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 650
268fcca6
LZ
651#define DO_SWAP(N, M) (M)
652#define DO_AND(N, M) (N & M)
653#define DO_XOR(N, M) (N ^ M)
654#define DO_OR(N, M) (N | M)
655#define DO_ADD(N, M) (N + M)
656
268fcca6
LZ
657/* Signed min/max */
658#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
659#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
660
661/* Unsigned min/max */
662#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
663#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
664
30206bd8
FC
665/*
666 *** load and store whole register instructions
667 */
668static void
669vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 670 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 671{
f714361e 672 uint32_t i, k, off, pos;
30206bd8 673 uint32_t nf = vext_nf(desc);
86247c51 674 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
c7b8a421 675 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 676
f714361e
FC
677 k = env->vstart / max_elems;
678 off = env->vstart % max_elems;
30206bd8 679
f714361e
FC
680 if (off) {
681 /* load/store rest of elements of current segment pointed by vstart */
682 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 683 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
d6b9d930 684 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
f714361e
FC
685 }
686 k++;
687 }
688
689 /* load/store elements for rest of segments */
690 for (; k < nf; k++) {
691 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 692 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 693 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
694 }
695 }
f714361e
FC
696
697 env->vstart = 0;
30206bd8
FC
698}
699
700#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
701void HELPER(NAME)(void *vd, target_ulong base, \
702 CPURISCVState *env, uint32_t desc) \
703{ \
704 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 705 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
706}
707
708GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
709GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
710GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
711GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
712GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
713GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
714GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
715GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
716GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
717GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
718GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
719GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
720GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
721GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
722GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
723GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
724
725#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
726void HELPER(NAME)(void *vd, target_ulong base, \
727 CPURISCVState *env, uint32_t desc) \
728{ \
729 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 730 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
731}
732
733GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
734GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
735GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
736GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
737
43740e3a
LZ
738/*
739 *** Vector Integer Arithmetic Instructions
740 */
741
742/* expand macro args before macro */
743#define RVVCALL(macro, ...) macro(__VA_ARGS__)
744
745/* (TD, T1, T2, TX1, TX2) */
746#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
747#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
748#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
749#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
750#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
751#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
752#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
753#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
754#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
755#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
756#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
757#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
758#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
759#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
760#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
761#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
762#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
763#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
764#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
765#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
766#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
767#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
768#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
769#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
770#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
771#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
772#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
773#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
774#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
775#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
776
777/* operation of two vector elements */
778typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
779
780#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
781static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
782{ \
783 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
784 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
785 *((TD *)vd + HD(i)) = OP(s2, s1); \
786}
787#define DO_SUB(N, M) (N - M)
788#define DO_RSUB(N, M) (M - N)
789
790RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
791RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
792RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
793RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
794RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
795RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
796RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
797RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
798
799static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
800 CPURISCVState *env, uint32_t desc,
f1eed927 801 opivv2_fn *fn, uint32_t esz)
43740e3a 802{
43740e3a
LZ
803 uint32_t vm = vext_vm(desc);
804 uint32_t vl = env->vl;
f1eed927 805 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
806 uint32_t vta = vext_vta(desc);
355d5584 807 uint32_t vma = vext_vma(desc);
43740e3a
LZ
808 uint32_t i;
809
f714361e 810 for (i = env->vstart; i < vl; i++) {
f9298de5 811 if (!vm && !vext_elem_mask(v0, i)) {
355d5584
YTC
812 /* set masked-off elements to 1s */
813 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
814 continue;
815 }
816 fn(vd, vs1, vs2, i);
817 }
f714361e 818 env->vstart = 0;
f1eed927 819 /* set tail elements to 1s */
820 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
821}
822
823/* generate the helpers for OPIVV */
f1eed927 824#define GEN_VEXT_VV(NAME, ESZ) \
43740e3a
LZ
825void HELPER(NAME)(void *vd, void *v0, void *vs1, \
826 void *vs2, CPURISCVState *env, \
827 uint32_t desc) \
828{ \
8a085fb2 829 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
f1eed927 830 do_##NAME, ESZ); \
43740e3a
LZ
831}
832
f1eed927 833GEN_VEXT_VV(vadd_vv_b, 1)
834GEN_VEXT_VV(vadd_vv_h, 2)
835GEN_VEXT_VV(vadd_vv_w, 4)
836GEN_VEXT_VV(vadd_vv_d, 8)
837GEN_VEXT_VV(vsub_vv_b, 1)
838GEN_VEXT_VV(vsub_vv_h, 2)
839GEN_VEXT_VV(vsub_vv_w, 4)
840GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a
LZ
841
842typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
843
844/*
845 * (T1)s1 gives the real operator type.
846 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
847 */
848#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
849static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
850{ \
851 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
852 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
853}
854
855RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
856RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
857RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
858RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
859RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
860RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
861RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
862RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
863RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
864RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
865RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
866RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
867
868static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
869 CPURISCVState *env, uint32_t desc,
5c19fc15 870 opivx2_fn fn, uint32_t esz)
43740e3a 871{
43740e3a
LZ
872 uint32_t vm = vext_vm(desc);
873 uint32_t vl = env->vl;
5c19fc15 874 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
875 uint32_t vta = vext_vta(desc);
bce9a636 876 uint32_t vma = vext_vma(desc);
43740e3a
LZ
877 uint32_t i;
878
f714361e 879 for (i = env->vstart; i < vl; i++) {
f9298de5 880 if (!vm && !vext_elem_mask(v0, i)) {
bce9a636
YTC
881 /* set masked-off elements to 1s */
882 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
883 continue;
884 }
885 fn(vd, s1, vs2, i);
886 }
f714361e 887 env->vstart = 0;
5c19fc15 888 /* set tail elements to 1s */
889 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
890}
891
892/* generate the helpers for OPIVX */
5c19fc15 893#define GEN_VEXT_VX(NAME, ESZ) \
43740e3a
LZ
894void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
895 void *vs2, CPURISCVState *env, \
896 uint32_t desc) \
897{ \
8a085fb2 898 do_vext_vx(vd, v0, s1, vs2, env, desc, \
5c19fc15 899 do_##NAME, ESZ); \
3479a814
FC
900}
901
5c19fc15 902GEN_VEXT_VX(vadd_vx_b, 1)
903GEN_VEXT_VX(vadd_vx_h, 2)
904GEN_VEXT_VX(vadd_vx_w, 4)
905GEN_VEXT_VX(vadd_vx_d, 8)
906GEN_VEXT_VX(vsub_vx_b, 1)
907GEN_VEXT_VX(vsub_vx_h, 2)
908GEN_VEXT_VX(vsub_vx_w, 4)
909GEN_VEXT_VX(vsub_vx_d, 8)
910GEN_VEXT_VX(vrsub_vx_b, 1)
911GEN_VEXT_VX(vrsub_vx_h, 2)
912GEN_VEXT_VX(vrsub_vx_w, 4)
913GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
914
915void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
916{
917 intptr_t oprsz = simd_oprsz(desc);
918 intptr_t i;
919
920 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
921 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
922 }
923}
924
925void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
926{
927 intptr_t oprsz = simd_oprsz(desc);
928 intptr_t i;
929
930 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
931 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
932 }
933}
934
935void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
936{
937 intptr_t oprsz = simd_oprsz(desc);
938 intptr_t i;
939
940 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
941 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
942 }
943}
944
945void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
946{
947 intptr_t oprsz = simd_oprsz(desc);
948 intptr_t i;
949
950 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
951 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
952 }
953}
8fcdf776
LZ
954
955/* Vector Widening Integer Add/Subtract */
956#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
957#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
958#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
959#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
960#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
961#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
962#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
963#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
964#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
965#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
966#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
967#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
968RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
969RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
970RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
971RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
972RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
973RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
974RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
975RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
976RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
977RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
978RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
979RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
980RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
981RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
982RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
983RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
984RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
985RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
986RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
987RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
988RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
989RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
990RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
991RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 992GEN_VEXT_VV(vwaddu_vv_b, 2)
993GEN_VEXT_VV(vwaddu_vv_h, 4)
994GEN_VEXT_VV(vwaddu_vv_w, 8)
995GEN_VEXT_VV(vwsubu_vv_b, 2)
996GEN_VEXT_VV(vwsubu_vv_h, 4)
997GEN_VEXT_VV(vwsubu_vv_w, 8)
998GEN_VEXT_VV(vwadd_vv_b, 2)
999GEN_VEXT_VV(vwadd_vv_h, 4)
1000GEN_VEXT_VV(vwadd_vv_w, 8)
1001GEN_VEXT_VV(vwsub_vv_b, 2)
1002GEN_VEXT_VV(vwsub_vv_h, 4)
1003GEN_VEXT_VV(vwsub_vv_w, 8)
1004GEN_VEXT_VV(vwaddu_wv_b, 2)
1005GEN_VEXT_VV(vwaddu_wv_h, 4)
1006GEN_VEXT_VV(vwaddu_wv_w, 8)
1007GEN_VEXT_VV(vwsubu_wv_b, 2)
1008GEN_VEXT_VV(vwsubu_wv_h, 4)
1009GEN_VEXT_VV(vwsubu_wv_w, 8)
1010GEN_VEXT_VV(vwadd_wv_b, 2)
1011GEN_VEXT_VV(vwadd_wv_h, 4)
1012GEN_VEXT_VV(vwadd_wv_w, 8)
1013GEN_VEXT_VV(vwsub_wv_b, 2)
1014GEN_VEXT_VV(vwsub_wv_h, 4)
1015GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
1016
1017RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1018RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1019RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1020RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1021RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1022RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1023RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1024RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1025RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1026RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1027RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1028RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1029RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1030RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1031RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1032RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1033RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1034RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1035RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1036RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1037RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1038RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1039RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1040RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 1041GEN_VEXT_VX(vwaddu_vx_b, 2)
1042GEN_VEXT_VX(vwaddu_vx_h, 4)
1043GEN_VEXT_VX(vwaddu_vx_w, 8)
1044GEN_VEXT_VX(vwsubu_vx_b, 2)
1045GEN_VEXT_VX(vwsubu_vx_h, 4)
1046GEN_VEXT_VX(vwsubu_vx_w, 8)
1047GEN_VEXT_VX(vwadd_vx_b, 2)
1048GEN_VEXT_VX(vwadd_vx_h, 4)
1049GEN_VEXT_VX(vwadd_vx_w, 8)
1050GEN_VEXT_VX(vwsub_vx_b, 2)
1051GEN_VEXT_VX(vwsub_vx_h, 4)
1052GEN_VEXT_VX(vwsub_vx_w, 8)
1053GEN_VEXT_VX(vwaddu_wx_b, 2)
1054GEN_VEXT_VX(vwaddu_wx_h, 4)
1055GEN_VEXT_VX(vwaddu_wx_w, 8)
1056GEN_VEXT_VX(vwsubu_wx_b, 2)
1057GEN_VEXT_VX(vwsubu_wx_h, 4)
1058GEN_VEXT_VX(vwsubu_wx_w, 8)
1059GEN_VEXT_VX(vwadd_wx_b, 2)
1060GEN_VEXT_VX(vwadd_wx_h, 4)
1061GEN_VEXT_VX(vwadd_wx_w, 8)
1062GEN_VEXT_VX(vwsub_wx_b, 2)
1063GEN_VEXT_VX(vwsub_wx_h, 4)
1064GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
1065
1066/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1067#define DO_VADC(N, M, C) (N + M + C)
1068#define DO_VSBC(N, M, C) (N - M - C)
1069
3479a814 1070#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1071void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1072 CPURISCVState *env, uint32_t desc) \
1073{ \
3a6f8f68 1074 uint32_t vl = env->vl; \
5c19fc15 1075 uint32_t esz = sizeof(ETYPE); \
1076 uint32_t total_elems = \
1077 vext_get_total_elems(env, desc, esz); \
1078 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1079 uint32_t i; \
1080 \
f714361e 1081 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1082 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1083 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1084 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1085 \
1086 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1087 } \
f714361e 1088 env->vstart = 0; \
5c19fc15 1089 /* set tail elements to 1s */ \
1090 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1091}
1092
3479a814
FC
1093GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1094GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1095GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1096GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1097
3479a814
FC
1098GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1099GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1100GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1101GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 1102
3479a814 1103#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1104void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1105 CPURISCVState *env, uint32_t desc) \
1106{ \
3a6f8f68 1107 uint32_t vl = env->vl; \
5c19fc15 1108 uint32_t esz = sizeof(ETYPE); \
1109 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1110 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1111 uint32_t i; \
1112 \
f714361e 1113 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1114 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1115 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1116 \
1117 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1118 } \
f714361e 1119 env->vstart = 0; \
5c19fc15 1120 /* set tail elements to 1s */ \
1121 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1122}
1123
3479a814
FC
1124GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1125GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1126GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1127GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1128
3479a814
FC
1129GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1130GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1131GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1132GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
1133
1134#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1135 (__typeof(N))(N + M) < N)
1136#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1137
1138#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1139void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1140 CPURISCVState *env, uint32_t desc) \
1141{ \
3a6f8f68 1142 uint32_t vl = env->vl; \
bb45485a 1143 uint32_t vm = vext_vm(desc); \
86247c51 1144 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5c19fc15 1145 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1146 uint32_t i; \
1147 \
f714361e 1148 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1149 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1150 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1151 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1152 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1153 } \
f714361e 1154 env->vstart = 0; \
5c19fc15 1155 /* mask destination register are always tail-agnostic */ \
1156 /* set tail elements to 1s */ \
1157 if (vta_all_1s) { \
1158 for (; i < total_elems; i++) { \
1159 vext_set_elem_mask(vd, i, 1); \
1160 } \
1161 } \
3a6f8f68
LZ
1162}
1163
1164GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1165GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1166GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1167GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1168
1169GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1170GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1171GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1172GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1173
1174#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1175void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1176 void *vs2, CPURISCVState *env, uint32_t desc) \
1177{ \
3a6f8f68 1178 uint32_t vl = env->vl; \
bb45485a 1179 uint32_t vm = vext_vm(desc); \
86247c51 1180 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5c19fc15 1181 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1182 uint32_t i; \
1183 \
f714361e 1184 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1185 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1186 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1187 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1188 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1189 } \
f714361e 1190 env->vstart = 0; \
5c19fc15 1191 /* mask destination register are always tail-agnostic */ \
1192 /* set tail elements to 1s */ \
1193 if (vta_all_1s) { \
1194 for (; i < total_elems; i++) { \
1195 vext_set_elem_mask(vd, i, 1); \
1196 } \
1197 } \
3a6f8f68
LZ
1198}
1199
1200GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1201GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1202GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1203GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1204
1205GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1206GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1207GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1208GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1209
1210/* Vector Bitwise Logical Instructions */
1211RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1212RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1213RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1214RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1215RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1216RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1217RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1218RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1219RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1220RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1221RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1222RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1223GEN_VEXT_VV(vand_vv_b, 1)
1224GEN_VEXT_VV(vand_vv_h, 2)
1225GEN_VEXT_VV(vand_vv_w, 4)
1226GEN_VEXT_VV(vand_vv_d, 8)
1227GEN_VEXT_VV(vor_vv_b, 1)
1228GEN_VEXT_VV(vor_vv_h, 2)
1229GEN_VEXT_VV(vor_vv_w, 4)
1230GEN_VEXT_VV(vor_vv_d, 8)
1231GEN_VEXT_VV(vxor_vv_b, 1)
1232GEN_VEXT_VV(vxor_vv_h, 2)
1233GEN_VEXT_VV(vxor_vv_w, 4)
1234GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1235
1236RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1237RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1238RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1239RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1240RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1241RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1242RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1243RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1244RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1245RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1246RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1247RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1248GEN_VEXT_VX(vand_vx_b, 1)
1249GEN_VEXT_VX(vand_vx_h, 2)
1250GEN_VEXT_VX(vand_vx_w, 4)
1251GEN_VEXT_VX(vand_vx_d, 8)
1252GEN_VEXT_VX(vor_vx_b, 1)
1253GEN_VEXT_VX(vor_vx_h, 2)
1254GEN_VEXT_VX(vor_vx_w, 4)
1255GEN_VEXT_VX(vor_vx_d, 8)
1256GEN_VEXT_VX(vxor_vx_b, 1)
1257GEN_VEXT_VX(vxor_vx_h, 2)
1258GEN_VEXT_VX(vxor_vx_w, 4)
1259GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1260
1261/* Vector Single-Width Bit Shift Instructions */
1262#define DO_SLL(N, M) (N << (M))
1263#define DO_SRL(N, M) (N >> (M))
1264
1265/* generate the helpers for shift instructions with two vector operators */
3479a814 1266#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1267void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1268 void *vs2, CPURISCVState *env, uint32_t desc) \
1269{ \
3277d955
LZ
1270 uint32_t vm = vext_vm(desc); \
1271 uint32_t vl = env->vl; \
7b1bff41 1272 uint32_t esz = sizeof(TS1); \
1273 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1274 uint32_t vta = vext_vta(desc); \
fd93045e 1275 uint32_t vma = vext_vma(desc); \
3277d955
LZ
1276 uint32_t i; \
1277 \
f714361e 1278 for (i = env->vstart; i < vl; i++) { \
f9298de5 1279 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1280 /* set masked-off elements to 1s */ \
1281 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
3277d955
LZ
1282 continue; \
1283 } \
1284 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1285 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1286 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1287 } \
f714361e 1288 env->vstart = 0; \
7b1bff41 1289 /* set tail elements to 1s */ \
1290 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1291}
1292
3479a814
FC
1293GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1294GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1295GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1296GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1297
3479a814
FC
1298GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1299GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1300GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1301GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1302
3479a814
FC
1303GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1304GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1305GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1306GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955
LZ
1307
1308/* generate the helpers for shift instructions with one vector and one scalar */
3479a814
FC
1309#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1310void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1311 void *vs2, CPURISCVState *env, uint32_t desc) \
1312{ \
1313 uint32_t vm = vext_vm(desc); \
1314 uint32_t vl = env->vl; \
7b1bff41 1315 uint32_t esz = sizeof(TD); \
1316 uint32_t total_elems = \
1317 vext_get_total_elems(env, desc, esz); \
1318 uint32_t vta = vext_vta(desc); \
fd93045e 1319 uint32_t vma = vext_vma(desc); \
3479a814
FC
1320 uint32_t i; \
1321 \
f714361e 1322 for (i = env->vstart; i < vl; i++) { \
3479a814 1323 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1324 /* set masked-off elements to 1s */ \
1325 vext_set_elems_1s(vd, vma, i * esz, \
1326 (i + 1) * esz); \
3479a814
FC
1327 continue; \
1328 } \
1329 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1330 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1331 } \
f714361e 1332 env->vstart = 0; \
7b1bff41 1333 /* set tail elements to 1s */ \
1334 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1335}
1336
1337GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1338GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1339GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1340GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1341
1342GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1343GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1344GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1345GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1346
1347GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1348GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1349GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1350GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1351
1352/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1353GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1354GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1355GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1356GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1357GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1358GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1359GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1360GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1361GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1362GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1363GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1364GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1365
1366/* Vector Integer Comparison Instructions */
1367#define DO_MSEQ(N, M) (N == M)
1368#define DO_MSNE(N, M) (N != M)
1369#define DO_MSLT(N, M) (N < M)
1370#define DO_MSLE(N, M) (N <= M)
1371#define DO_MSGT(N, M) (N > M)
1372
1373#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1374void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1375 CPURISCVState *env, uint32_t desc) \
1376{ \
1366fc79
LZ
1377 uint32_t vm = vext_vm(desc); \
1378 uint32_t vl = env->vl; \
86247c51 1379 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
38581e5c 1380 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1381 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1382 uint32_t i; \
1383 \
f714361e 1384 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1385 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1386 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1387 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1388 /* set masked-off elements to 1s */ \
1389 if (vma) { \
1390 vext_set_elem_mask(vd, i, 1); \
1391 } \
1366fc79
LZ
1392 continue; \
1393 } \
f9298de5 1394 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1395 } \
f714361e 1396 env->vstart = 0; \
38581e5c 1397 /* mask destination register are always tail-agnostic */ \
1398 /* set tail elements to 1s */ \
1399 if (vta_all_1s) { \
1400 for (; i < total_elems; i++) { \
1401 vext_set_elem_mask(vd, i, 1); \
1402 } \
1403 } \
1366fc79
LZ
1404}
1405
1406GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1407GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1408GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1409GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1410
1411GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1412GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1413GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1414GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1415
1416GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1417GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1418GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1419GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1420
1421GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1422GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1423GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1424GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1425
1426GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1427GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1428GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1429GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1430
1431GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1432GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1433GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1434GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1435
1436#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1437void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1438 CPURISCVState *env, uint32_t desc) \
1439{ \
1366fc79
LZ
1440 uint32_t vm = vext_vm(desc); \
1441 uint32_t vl = env->vl; \
86247c51 1442 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
38581e5c 1443 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1444 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1445 uint32_t i; \
1446 \
f714361e 1447 for (i = env->vstart; i < vl; i++) { \
1366fc79 1448 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1449 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1450 /* set masked-off elements to 1s */ \
1451 if (vma) { \
1452 vext_set_elem_mask(vd, i, 1); \
1453 } \
1366fc79
LZ
1454 continue; \
1455 } \
f9298de5 1456 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1457 DO_OP(s2, (ETYPE)(target_long)s1)); \
1458 } \
f714361e 1459 env->vstart = 0; \
38581e5c 1460 /* mask destination register are always tail-agnostic */ \
1461 /* set tail elements to 1s */ \
1462 if (vta_all_1s) { \
1463 for (; i < total_elems; i++) { \
1464 vext_set_elem_mask(vd, i, 1); \
1465 } \
1466 } \
1366fc79
LZ
1467}
1468
1469GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1470GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1471GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1472GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1473
1474GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1475GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1476GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1477GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1478
1479GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1480GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1481GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1482GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1483
1484GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1485GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1486GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1487GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1488
1489GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1490GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1491GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1492GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1493
1494GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1495GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1496GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1497GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1498
1499GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1500GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1501GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1502GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1503
1504GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1505GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1506GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1507GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1508
1509/* Vector Integer Min/Max Instructions */
1510RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1511RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1512RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1513RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1514RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1515RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1516RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1517RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1518RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1519RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1520RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1521RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1522RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1523RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1524RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1525RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1526GEN_VEXT_VV(vminu_vv_b, 1)
1527GEN_VEXT_VV(vminu_vv_h, 2)
1528GEN_VEXT_VV(vminu_vv_w, 4)
1529GEN_VEXT_VV(vminu_vv_d, 8)
1530GEN_VEXT_VV(vmin_vv_b, 1)
1531GEN_VEXT_VV(vmin_vv_h, 2)
1532GEN_VEXT_VV(vmin_vv_w, 4)
1533GEN_VEXT_VV(vmin_vv_d, 8)
1534GEN_VEXT_VV(vmaxu_vv_b, 1)
1535GEN_VEXT_VV(vmaxu_vv_h, 2)
1536GEN_VEXT_VV(vmaxu_vv_w, 4)
1537GEN_VEXT_VV(vmaxu_vv_d, 8)
1538GEN_VEXT_VV(vmax_vv_b, 1)
1539GEN_VEXT_VV(vmax_vv_h, 2)
1540GEN_VEXT_VV(vmax_vv_w, 4)
1541GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1542
1543RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1544RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1545RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1546RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1547RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1548RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1549RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1550RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1551RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1552RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1553RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1554RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1555RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1556RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1557RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1558RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1559GEN_VEXT_VX(vminu_vx_b, 1)
1560GEN_VEXT_VX(vminu_vx_h, 2)
1561GEN_VEXT_VX(vminu_vx_w, 4)
1562GEN_VEXT_VX(vminu_vx_d, 8)
1563GEN_VEXT_VX(vmin_vx_b, 1)
1564GEN_VEXT_VX(vmin_vx_h, 2)
1565GEN_VEXT_VX(vmin_vx_w, 4)
1566GEN_VEXT_VX(vmin_vx_d, 8)
1567GEN_VEXT_VX(vmaxu_vx_b, 1)
1568GEN_VEXT_VX(vmaxu_vx_h, 2)
1569GEN_VEXT_VX(vmaxu_vx_w, 4)
1570GEN_VEXT_VX(vmaxu_vx_d, 8)
1571GEN_VEXT_VX(vmax_vx_b, 1)
1572GEN_VEXT_VX(vmax_vx_h, 2)
1573GEN_VEXT_VX(vmax_vx_w, 4)
1574GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1575
1576/* Vector Single-Width Integer Multiply Instructions */
1577#define DO_MUL(N, M) (N * M)
1578RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1579RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1580RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1581RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1582GEN_VEXT_VV(vmul_vv_b, 1)
1583GEN_VEXT_VV(vmul_vv_h, 2)
1584GEN_VEXT_VV(vmul_vv_w, 4)
1585GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1586
1587static int8_t do_mulh_b(int8_t s2, int8_t s1)
1588{
1589 return (int16_t)s2 * (int16_t)s1 >> 8;
1590}
1591
1592static int16_t do_mulh_h(int16_t s2, int16_t s1)
1593{
1594 return (int32_t)s2 * (int32_t)s1 >> 16;
1595}
1596
1597static int32_t do_mulh_w(int32_t s2, int32_t s1)
1598{
1599 return (int64_t)s2 * (int64_t)s1 >> 32;
1600}
1601
1602static int64_t do_mulh_d(int64_t s2, int64_t s1)
1603{
1604 uint64_t hi_64, lo_64;
1605
1606 muls64(&lo_64, &hi_64, s1, s2);
1607 return hi_64;
1608}
1609
1610static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1611{
1612 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1613}
1614
1615static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1616{
1617 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1618}
1619
1620static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1621{
1622 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1623}
1624
1625static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1626{
1627 uint64_t hi_64, lo_64;
1628
1629 mulu64(&lo_64, &hi_64, s2, s1);
1630 return hi_64;
1631}
1632
1633static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1634{
1635 return (int16_t)s2 * (uint16_t)s1 >> 8;
1636}
1637
1638static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1639{
1640 return (int32_t)s2 * (uint32_t)s1 >> 16;
1641}
1642
1643static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1644{
1645 return (int64_t)s2 * (uint64_t)s1 >> 32;
1646}
1647
1648/*
1649 * Let A = signed operand,
1650 * B = unsigned operand
1651 * P = mulu64(A, B), unsigned product
1652 *
1653 * LET X = 2 ** 64 - A, 2's complement of A
1654 * SP = signed product
1655 * THEN
1656 * IF A < 0
1657 * SP = -X * B
1658 * = -(2 ** 64 - A) * B
1659 * = A * B - 2 ** 64 * B
1660 * = P - 2 ** 64 * B
1661 * ELSE
1662 * SP = P
1663 * THEN
1664 * HI_P -= (A < 0 ? B : 0)
1665 */
1666
1667static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1668{
1669 uint64_t hi_64, lo_64;
1670
1671 mulu64(&lo_64, &hi_64, s2, s1);
1672
1673 hi_64 -= s2 < 0 ? s1 : 0;
1674 return hi_64;
1675}
1676
1677RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1678RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1679RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1680RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1681RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1682RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1683RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1684RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1685RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1686RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1687RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1688RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1689GEN_VEXT_VV(vmulh_vv_b, 1)
1690GEN_VEXT_VV(vmulh_vv_h, 2)
1691GEN_VEXT_VV(vmulh_vv_w, 4)
1692GEN_VEXT_VV(vmulh_vv_d, 8)
1693GEN_VEXT_VV(vmulhu_vv_b, 1)
1694GEN_VEXT_VV(vmulhu_vv_h, 2)
1695GEN_VEXT_VV(vmulhu_vv_w, 4)
1696GEN_VEXT_VV(vmulhu_vv_d, 8)
1697GEN_VEXT_VV(vmulhsu_vv_b, 1)
1698GEN_VEXT_VV(vmulhsu_vv_h, 2)
1699GEN_VEXT_VV(vmulhsu_vv_w, 4)
1700GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1701
1702RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1703RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1704RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1705RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1706RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1707RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1708RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1709RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1710RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1711RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1712RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1713RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1714RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1715RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1716RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1717RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1718GEN_VEXT_VX(vmul_vx_b, 1)
1719GEN_VEXT_VX(vmul_vx_h, 2)
1720GEN_VEXT_VX(vmul_vx_w, 4)
1721GEN_VEXT_VX(vmul_vx_d, 8)
1722GEN_VEXT_VX(vmulh_vx_b, 1)
1723GEN_VEXT_VX(vmulh_vx_h, 2)
1724GEN_VEXT_VX(vmulh_vx_w, 4)
1725GEN_VEXT_VX(vmulh_vx_d, 8)
1726GEN_VEXT_VX(vmulhu_vx_b, 1)
1727GEN_VEXT_VX(vmulhu_vx_h, 2)
1728GEN_VEXT_VX(vmulhu_vx_w, 4)
1729GEN_VEXT_VX(vmulhu_vx_d, 8)
1730GEN_VEXT_VX(vmulhsu_vx_b, 1)
1731GEN_VEXT_VX(vmulhsu_vx_h, 2)
1732GEN_VEXT_VX(vmulhsu_vx_w, 4)
1733GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1734
1735/* Vector Integer Divide Instructions */
1736#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1737#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1738#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1739 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1740#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1741 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1742
1743RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1744RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1745RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1746RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1747RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1748RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1749RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1750RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1751RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1752RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1753RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1754RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1755RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1756RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1757RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1758RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1759GEN_VEXT_VV(vdivu_vv_b, 1)
1760GEN_VEXT_VV(vdivu_vv_h, 2)
1761GEN_VEXT_VV(vdivu_vv_w, 4)
1762GEN_VEXT_VV(vdivu_vv_d, 8)
1763GEN_VEXT_VV(vdiv_vv_b, 1)
1764GEN_VEXT_VV(vdiv_vv_h, 2)
1765GEN_VEXT_VV(vdiv_vv_w, 4)
1766GEN_VEXT_VV(vdiv_vv_d, 8)
1767GEN_VEXT_VV(vremu_vv_b, 1)
1768GEN_VEXT_VV(vremu_vv_h, 2)
1769GEN_VEXT_VV(vremu_vv_w, 4)
1770GEN_VEXT_VV(vremu_vv_d, 8)
1771GEN_VEXT_VV(vrem_vv_b, 1)
1772GEN_VEXT_VV(vrem_vv_h, 2)
1773GEN_VEXT_VV(vrem_vv_w, 4)
1774GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1775
1776RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1777RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1778RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1779RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1780RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1781RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1782RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1783RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1784RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1785RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1786RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1787RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1788RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1789RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1790RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1791RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1792GEN_VEXT_VX(vdivu_vx_b, 1)
1793GEN_VEXT_VX(vdivu_vx_h, 2)
1794GEN_VEXT_VX(vdivu_vx_w, 4)
1795GEN_VEXT_VX(vdivu_vx_d, 8)
1796GEN_VEXT_VX(vdiv_vx_b, 1)
1797GEN_VEXT_VX(vdiv_vx_h, 2)
1798GEN_VEXT_VX(vdiv_vx_w, 4)
1799GEN_VEXT_VX(vdiv_vx_d, 8)
1800GEN_VEXT_VX(vremu_vx_b, 1)
1801GEN_VEXT_VX(vremu_vx_h, 2)
1802GEN_VEXT_VX(vremu_vx_w, 4)
1803GEN_VEXT_VX(vremu_vx_d, 8)
1804GEN_VEXT_VX(vrem_vx_b, 1)
1805GEN_VEXT_VX(vrem_vx_h, 2)
1806GEN_VEXT_VX(vrem_vx_w, 4)
1807GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1808
1809/* Vector Widening Integer Multiply Instructions */
1810RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1811RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1812RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1813RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1814RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1815RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1816RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1817RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1818RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1819GEN_VEXT_VV(vwmul_vv_b, 2)
1820GEN_VEXT_VV(vwmul_vv_h, 4)
1821GEN_VEXT_VV(vwmul_vv_w, 8)
1822GEN_VEXT_VV(vwmulu_vv_b, 2)
1823GEN_VEXT_VV(vwmulu_vv_h, 4)
1824GEN_VEXT_VV(vwmulu_vv_w, 8)
1825GEN_VEXT_VV(vwmulsu_vv_b, 2)
1826GEN_VEXT_VV(vwmulsu_vv_h, 4)
1827GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1828
1829RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1830RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1831RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1832RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1833RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1834RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1835RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1836RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1837RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1838GEN_VEXT_VX(vwmul_vx_b, 2)
1839GEN_VEXT_VX(vwmul_vx_h, 4)
1840GEN_VEXT_VX(vwmul_vx_w, 8)
1841GEN_VEXT_VX(vwmulu_vx_b, 2)
1842GEN_VEXT_VX(vwmulu_vx_h, 4)
1843GEN_VEXT_VX(vwmulu_vx_w, 8)
1844GEN_VEXT_VX(vwmulsu_vx_b, 2)
1845GEN_VEXT_VX(vwmulsu_vx_h, 4)
1846GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1847
1848/* Vector Single-Width Integer Multiply-Add Instructions */
1849#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1850static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1851{ \
1852 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1853 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1854 TD d = *((TD *)vd + HD(i)); \
1855 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1856}
1857
1858#define DO_MACC(N, M, D) (M * N + D)
1859#define DO_NMSAC(N, M, D) (-(M * N) + D)
1860#define DO_MADD(N, M, D) (M * D + N)
1861#define DO_NMSUB(N, M, D) (-(M * D) + N)
1862RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1863RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1864RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1865RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1866RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1867RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1868RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1869RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1870RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1871RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1872RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1873RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1874RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1875RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1876RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1877RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1878GEN_VEXT_VV(vmacc_vv_b, 1)
1879GEN_VEXT_VV(vmacc_vv_h, 2)
1880GEN_VEXT_VV(vmacc_vv_w, 4)
1881GEN_VEXT_VV(vmacc_vv_d, 8)
1882GEN_VEXT_VV(vnmsac_vv_b, 1)
1883GEN_VEXT_VV(vnmsac_vv_h, 2)
1884GEN_VEXT_VV(vnmsac_vv_w, 4)
1885GEN_VEXT_VV(vnmsac_vv_d, 8)
1886GEN_VEXT_VV(vmadd_vv_b, 1)
1887GEN_VEXT_VV(vmadd_vv_h, 2)
1888GEN_VEXT_VV(vmadd_vv_w, 4)
1889GEN_VEXT_VV(vmadd_vv_d, 8)
1890GEN_VEXT_VV(vnmsub_vv_b, 1)
1891GEN_VEXT_VV(vnmsub_vv_h, 2)
1892GEN_VEXT_VV(vnmsub_vv_w, 4)
1893GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1894
1895#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1896static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1897{ \
1898 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1899 TD d = *((TD *)vd + HD(i)); \
1900 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1901}
1902
1903RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1904RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1905RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1906RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1907RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1908RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1909RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1910RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1911RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1912RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1913RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1914RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1915RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1916RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1917RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1918RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1919GEN_VEXT_VX(vmacc_vx_b, 1)
1920GEN_VEXT_VX(vmacc_vx_h, 2)
1921GEN_VEXT_VX(vmacc_vx_w, 4)
1922GEN_VEXT_VX(vmacc_vx_d, 8)
1923GEN_VEXT_VX(vnmsac_vx_b, 1)
1924GEN_VEXT_VX(vnmsac_vx_h, 2)
1925GEN_VEXT_VX(vnmsac_vx_w, 4)
1926GEN_VEXT_VX(vnmsac_vx_d, 8)
1927GEN_VEXT_VX(vmadd_vx_b, 1)
1928GEN_VEXT_VX(vmadd_vx_h, 2)
1929GEN_VEXT_VX(vmadd_vx_w, 4)
1930GEN_VEXT_VX(vmadd_vx_d, 8)
1931GEN_VEXT_VX(vnmsub_vx_b, 1)
1932GEN_VEXT_VX(vnmsub_vx_h, 2)
1933GEN_VEXT_VX(vnmsub_vx_w, 4)
1934GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1935
1936/* Vector Widening Integer Multiply-Add Instructions */
1937RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1938RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1939RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1940RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1941RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1942RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1943RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1944RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1945RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1946GEN_VEXT_VV(vwmaccu_vv_b, 2)
1947GEN_VEXT_VV(vwmaccu_vv_h, 4)
1948GEN_VEXT_VV(vwmaccu_vv_w, 8)
1949GEN_VEXT_VV(vwmacc_vv_b, 2)
1950GEN_VEXT_VV(vwmacc_vv_h, 4)
1951GEN_VEXT_VV(vwmacc_vv_w, 8)
1952GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1953GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1954GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1955
1956RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1957RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1958RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1959RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1960RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1961RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1962RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1963RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1964RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1965RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1966RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1967RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1968GEN_VEXT_VX(vwmaccu_vx_b, 2)
1969GEN_VEXT_VX(vwmaccu_vx_h, 4)
1970GEN_VEXT_VX(vwmaccu_vx_w, 8)
1971GEN_VEXT_VX(vwmacc_vx_b, 2)
1972GEN_VEXT_VX(vwmacc_vx_h, 4)
1973GEN_VEXT_VX(vwmacc_vx_w, 8)
1974GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1975GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1976GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1977GEN_VEXT_VX(vwmaccus_vx_b, 2)
1978GEN_VEXT_VX(vwmaccus_vx_h, 4)
1979GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1980
1981/* Vector Integer Merge and Move Instructions */
3479a814 1982#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1983void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1984 uint32_t desc) \
1985{ \
1986 uint32_t vl = env->vl; \
89a32de2 1987 uint32_t esz = sizeof(ETYPE); \
1988 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1989 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1990 uint32_t i; \
1991 \
f714361e 1992 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1993 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1994 *((ETYPE *)vd + H(i)) = s1; \
1995 } \
f714361e 1996 env->vstart = 0; \
89a32de2 1997 /* set tail elements to 1s */ \
1998 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1999}
2000
3479a814
FC
2001GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2002GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2003GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2004GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 2005
3479a814 2006#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2007void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2008 uint32_t desc) \
2009{ \
2010 uint32_t vl = env->vl; \
89a32de2 2011 uint32_t esz = sizeof(ETYPE); \
2012 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2013 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2014 uint32_t i; \
2015 \
f714361e 2016 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2017 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2018 } \
f714361e 2019 env->vstart = 0; \
89a32de2 2020 /* set tail elements to 1s */ \
2021 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2022}
2023
3479a814
FC
2024GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2025GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2026GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2027GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 2028
3479a814 2029#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
2030void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2031 CPURISCVState *env, uint32_t desc) \
2032{ \
f020a7a1 2033 uint32_t vl = env->vl; \
89a32de2 2034 uint32_t esz = sizeof(ETYPE); \
2035 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2036 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2037 uint32_t i; \
2038 \
f714361e 2039 for (i = env->vstart; i < vl; i++) { \
f9298de5 2040 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
2041 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2042 } \
f714361e 2043 env->vstart = 0; \
89a32de2 2044 /* set tail elements to 1s */ \
2045 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2046}
2047
3479a814
FC
2048GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2049GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2050GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2051GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 2052
3479a814 2053#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2054void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2055 void *vs2, CPURISCVState *env, uint32_t desc) \
2056{ \
f020a7a1 2057 uint32_t vl = env->vl; \
89a32de2 2058 uint32_t esz = sizeof(ETYPE); \
2059 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2060 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2061 uint32_t i; \
2062 \
f714361e 2063 for (i = env->vstart; i < vl; i++) { \
f020a7a1 2064 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 2065 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
2066 (ETYPE)(target_long)s1); \
2067 *((ETYPE *)vd + H(i)) = d; \
2068 } \
f714361e 2069 env->vstart = 0; \
89a32de2 2070 /* set tail elements to 1s */ \
2071 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2072}
2073
3479a814
FC
2074GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2075GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2076GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2077GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
2078
2079/*
2080 *** Vector Fixed-Point Arithmetic Instructions
2081 */
2082
2083/* Vector Single-Width Saturating Add and Subtract */
2084
2085/*
2086 * As fixed point instructions probably have round mode and saturation,
2087 * define common macros for fixed point here.
2088 */
2089typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2090 CPURISCVState *env, int vxrm);
2091
2092#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2093static inline void \
2094do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2095 CPURISCVState *env, int vxrm) \
2096{ \
2097 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2098 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2099 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2100}
2101
2102static inline void
2103vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2104 CPURISCVState *env,
f9298de5 2105 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2106 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2107{
f714361e 2108 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2109 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2110 /* set masked-off elements to 1s */
2111 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2112 continue;
2113 }
2114 fn(vd, vs1, vs2, i, env, vxrm);
2115 }
f714361e 2116 env->vstart = 0;
eb2650e3
LZ
2117}
2118
2119static inline void
2120vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2121 CPURISCVState *env,
8a085fb2 2122 uint32_t desc,
09106eed 2123 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 2124{
eb2650e3
LZ
2125 uint32_t vm = vext_vm(desc);
2126 uint32_t vl = env->vl;
09106eed 2127 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2128 uint32_t vta = vext_vta(desc);
72e17a9f 2129 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2130
2131 switch (env->vxrm) {
2132 case 0: /* rnu */
2133 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2134 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2135 break;
2136 case 1: /* rne */
2137 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2138 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2139 break;
2140 case 2: /* rdn */
2141 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2142 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2143 break;
2144 default: /* rod */
2145 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2146 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2147 break;
2148 }
09106eed 2149 /* set tail elements to 1s */
2150 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2151}
2152
2153/* generate helpers for fixed point instructions with OPIVV format */
09106eed 2154#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
2155void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2156 CPURISCVState *env, uint32_t desc) \
2157{ \
8a085fb2 2158 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 2159 do_##NAME, ESZ); \
eb2650e3
LZ
2160}
2161
2162static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2163{
2164 uint8_t res = a + b;
2165 if (res < a) {
2166 res = UINT8_MAX;
2167 env->vxsat = 0x1;
2168 }
2169 return res;
2170}
2171
2172static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2173 uint16_t b)
2174{
2175 uint16_t res = a + b;
2176 if (res < a) {
2177 res = UINT16_MAX;
2178 env->vxsat = 0x1;
2179 }
2180 return res;
2181}
2182
2183static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2184 uint32_t b)
2185{
2186 uint32_t res = a + b;
2187 if (res < a) {
2188 res = UINT32_MAX;
2189 env->vxsat = 0x1;
2190 }
2191 return res;
2192}
2193
2194static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2195 uint64_t b)
2196{
2197 uint64_t res = a + b;
2198 if (res < a) {
2199 res = UINT64_MAX;
2200 env->vxsat = 0x1;
2201 }
2202 return res;
2203}
2204
2205RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2206RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2207RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2208RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2209GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2210GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2211GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2212GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2213
2214typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2215 CPURISCVState *env, int vxrm);
2216
2217#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2218static inline void \
2219do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2220 CPURISCVState *env, int vxrm) \
2221{ \
2222 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2223 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2224}
2225
2226static inline void
2227vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2228 CPURISCVState *env,
f9298de5 2229 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2230 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2231{
f714361e 2232 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2233 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2234 /* set masked-off elements to 1s */
2235 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2236 continue;
2237 }
2238 fn(vd, s1, vs2, i, env, vxrm);
2239 }
f714361e 2240 env->vstart = 0;
eb2650e3
LZ
2241}
2242
2243static inline void
2244vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2245 CPURISCVState *env,
8a085fb2 2246 uint32_t desc,
09106eed 2247 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2248{
eb2650e3
LZ
2249 uint32_t vm = vext_vm(desc);
2250 uint32_t vl = env->vl;
09106eed 2251 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2252 uint32_t vta = vext_vta(desc);
72e17a9f 2253 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2254
2255 switch (env->vxrm) {
2256 case 0: /* rnu */
2257 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2258 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2259 break;
2260 case 1: /* rne */
2261 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2262 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2263 break;
2264 case 2: /* rdn */
2265 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2266 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2267 break;
2268 default: /* rod */
2269 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2270 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2271 break;
2272 }
09106eed 2273 /* set tail elements to 1s */
2274 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2275}
2276
2277/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2278#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3
LZ
2279void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2280 void *vs2, CPURISCVState *env, uint32_t desc) \
2281{ \
8a085fb2 2282 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2283 do_##NAME, ESZ); \
eb2650e3
LZ
2284}
2285
2286RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2287RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2288RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2289RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2290GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2291GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2292GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2293GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2294
2295static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2296{
2297 int8_t res = a + b;
2298 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2299 res = a > 0 ? INT8_MAX : INT8_MIN;
2300 env->vxsat = 0x1;
2301 }
2302 return res;
2303}
2304
2305static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2306{
2307 int16_t res = a + b;
2308 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2309 res = a > 0 ? INT16_MAX : INT16_MIN;
2310 env->vxsat = 0x1;
2311 }
2312 return res;
2313}
2314
2315static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2316{
2317 int32_t res = a + b;
2318 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2319 res = a > 0 ? INT32_MAX : INT32_MIN;
2320 env->vxsat = 0x1;
2321 }
2322 return res;
2323}
2324
2325static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2326{
2327 int64_t res = a + b;
2328 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2329 res = a > 0 ? INT64_MAX : INT64_MIN;
2330 env->vxsat = 0x1;
2331 }
2332 return res;
2333}
2334
2335RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2336RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2337RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2338RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2339GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2340GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2341GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2342GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2343
2344RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2345RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2346RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2347RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2348GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2349GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2350GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2351GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3
LZ
2352
2353static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2354{
2355 uint8_t res = a - b;
2356 if (res > a) {
2357 res = 0;
2358 env->vxsat = 0x1;
2359 }
2360 return res;
2361}
2362
2363static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2364 uint16_t b)
2365{
2366 uint16_t res = a - b;
2367 if (res > a) {
2368 res = 0;
2369 env->vxsat = 0x1;
2370 }
2371 return res;
2372}
2373
2374static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2375 uint32_t b)
2376{
2377 uint32_t res = a - b;
2378 if (res > a) {
2379 res = 0;
2380 env->vxsat = 0x1;
2381 }
2382 return res;
2383}
2384
2385static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2386 uint64_t b)
2387{
2388 uint64_t res = a - b;
2389 if (res > a) {
2390 res = 0;
2391 env->vxsat = 0x1;
2392 }
2393 return res;
2394}
2395
2396RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2397RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2398RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2399RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2400GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2401GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2402GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2403GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2404
2405RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2406RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2407RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2408RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2409GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2410GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2411GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2412GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2413
2414static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2415{
2416 int8_t res = a - b;
2417 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2418 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2419 env->vxsat = 0x1;
2420 }
2421 return res;
2422}
2423
2424static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2425{
2426 int16_t res = a - b;
2427 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2428 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2429 env->vxsat = 0x1;
2430 }
2431 return res;
2432}
2433
2434static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2435{
2436 int32_t res = a - b;
2437 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2438 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2439 env->vxsat = 0x1;
2440 }
2441 return res;
2442}
2443
2444static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2445{
2446 int64_t res = a - b;
2447 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2448 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2449 env->vxsat = 0x1;
2450 }
2451 return res;
2452}
2453
2454RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2455RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2456RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2457RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2458GEN_VEXT_VV_RM(vssub_vv_b, 1)
2459GEN_VEXT_VV_RM(vssub_vv_h, 2)
2460GEN_VEXT_VV_RM(vssub_vv_w, 4)
2461GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2462
2463RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2464RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2465RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2466RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2467GEN_VEXT_VX_RM(vssub_vx_b, 1)
2468GEN_VEXT_VX_RM(vssub_vx_h, 2)
2469GEN_VEXT_VX_RM(vssub_vx_w, 4)
2470GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2471
2472/* Vector Single-Width Averaging Add and Subtract */
2473static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2474{
2475 uint8_t d = extract64(v, shift, 1);
2476 uint8_t d1;
2477 uint64_t D1, D2;
2478
2479 if (shift == 0 || shift > 64) {
2480 return 0;
2481 }
2482
2483 d1 = extract64(v, shift - 1, 1);
2484 D1 = extract64(v, 0, shift);
2485 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2486 return d1;
2487 } else if (vxrm == 1) { /* round-to-nearest-even */
2488 if (shift > 1) {
2489 D2 = extract64(v, 0, shift - 1);
2490 return d1 & ((D2 != 0) | d);
2491 } else {
2492 return d1 & d;
2493 }
2494 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2495 return !d & (D1 != 0);
2496 }
2497 return 0; /* round-down (truncate) */
2498}
2499
2500static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2501{
2502 int64_t res = (int64_t)a + b;
2503 uint8_t round = get_round(vxrm, res, 1);
2504
2505 return (res >> 1) + round;
2506}
2507
2508static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2509{
2510 int64_t res = a + b;
2511 uint8_t round = get_round(vxrm, res, 1);
2512 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2513
2514 /* With signed overflow, bit 64 is inverse of bit 63. */
2515 return ((res >> 1) ^ over) + round;
2516}
2517
2518RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2519RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2520RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2521RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2522GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2523GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2524GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2525GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2526
2527RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2528RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2529RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2530RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2531GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2532GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2533GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2534GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2535
8b99a110
FC
2536static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2537 uint32_t a, uint32_t b)
2538{
2539 uint64_t res = (uint64_t)a + b;
2540 uint8_t round = get_round(vxrm, res, 1);
2541
2542 return (res >> 1) + round;
2543}
2544
2545static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2546 uint64_t a, uint64_t b)
2547{
2548 uint64_t res = a + b;
2549 uint8_t round = get_round(vxrm, res, 1);
2550 uint64_t over = (uint64_t)(res < a) << 63;
2551
2552 return ((res >> 1) | over) + round;
2553}
2554
2555RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2556RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2557RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2558RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2559GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2560GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2561GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2562GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2563
2564RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2565RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2566RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2567RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2568GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2569GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2570GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2571GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2572
b7aee481
LZ
2573static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2574{
2575 int64_t res = (int64_t)a - b;
2576 uint8_t round = get_round(vxrm, res, 1);
2577
2578 return (res >> 1) + round;
2579}
2580
2581static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2582{
2583 int64_t res = (int64_t)a - b;
2584 uint8_t round = get_round(vxrm, res, 1);
2585 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2586
2587 /* With signed overflow, bit 64 is inverse of bit 63. */
2588 return ((res >> 1) ^ over) + round;
2589}
2590
2591RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2592RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2593RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2594RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2595GEN_VEXT_VV_RM(vasub_vv_b, 1)
2596GEN_VEXT_VV_RM(vasub_vv_h, 2)
2597GEN_VEXT_VV_RM(vasub_vv_w, 4)
2598GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2599
2600RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2601RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2602RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2603RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2604GEN_VEXT_VX_RM(vasub_vx_b, 1)
2605GEN_VEXT_VX_RM(vasub_vx_h, 2)
2606GEN_VEXT_VX_RM(vasub_vx_w, 4)
2607GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2608
8b99a110
FC
2609static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2610 uint32_t a, uint32_t b)
2611{
2612 int64_t res = (int64_t)a - b;
2613 uint8_t round = get_round(vxrm, res, 1);
2614
2615 return (res >> 1) + round;
2616}
2617
2618static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2619 uint64_t a, uint64_t b)
2620{
2621 uint64_t res = (uint64_t)a - b;
2622 uint8_t round = get_round(vxrm, res, 1);
2623 uint64_t over = (uint64_t)(res > a) << 63;
2624
2625 return ((res >> 1) | over) + round;
2626}
2627
2628RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2629RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2630RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2631RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2632GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2633GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2634GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2635GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2636
2637RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2638RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2639RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2640RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2641GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2642GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2643GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2644GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2645
9f0ff9e5
LZ
2646/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2647static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2648{
2649 uint8_t round;
2650 int16_t res;
2651
2652 res = (int16_t)a * (int16_t)b;
2653 round = get_round(vxrm, res, 7);
2654 res = (res >> 7) + round;
2655
2656 if (res > INT8_MAX) {
2657 env->vxsat = 0x1;
2658 return INT8_MAX;
2659 } else if (res < INT8_MIN) {
2660 env->vxsat = 0x1;
2661 return INT8_MIN;
2662 } else {
2663 return res;
2664 }
2665}
2666
2667static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2668{
2669 uint8_t round;
2670 int32_t res;
2671
2672 res = (int32_t)a * (int32_t)b;
2673 round = get_round(vxrm, res, 15);
2674 res = (res >> 15) + round;
2675
2676 if (res > INT16_MAX) {
2677 env->vxsat = 0x1;
2678 return INT16_MAX;
2679 } else if (res < INT16_MIN) {
2680 env->vxsat = 0x1;
2681 return INT16_MIN;
2682 } else {
2683 return res;
2684 }
2685}
2686
2687static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2688{
2689 uint8_t round;
2690 int64_t res;
2691
2692 res = (int64_t)a * (int64_t)b;
2693 round = get_round(vxrm, res, 31);
2694 res = (res >> 31) + round;
2695
2696 if (res > INT32_MAX) {
2697 env->vxsat = 0x1;
2698 return INT32_MAX;
2699 } else if (res < INT32_MIN) {
2700 env->vxsat = 0x1;
2701 return INT32_MIN;
2702 } else {
2703 return res;
2704 }
2705}
2706
2707static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2708{
2709 uint8_t round;
2710 uint64_t hi_64, lo_64;
2711 int64_t res;
2712
2713 if (a == INT64_MIN && b == INT64_MIN) {
2714 env->vxsat = 1;
2715 return INT64_MAX;
2716 }
2717
2718 muls64(&lo_64, &hi_64, a, b);
2719 round = get_round(vxrm, lo_64, 63);
2720 /*
2721 * Cannot overflow, as there are always
2722 * 2 sign bits after multiply.
2723 */
2724 res = (hi_64 << 1) | (lo_64 >> 63);
2725 if (round) {
2726 if (res == INT64_MAX) {
2727 env->vxsat = 1;
2728 } else {
2729 res += 1;
2730 }
2731 }
2732 return res;
2733}
2734
2735RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2736RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2737RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2738RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2739GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2740GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2741GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2742GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2743
2744RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2745RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2746RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2747RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2748GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2749GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2750GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2751GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2752
04a61406
LZ
2753/* Vector Single-Width Scaling Shift Instructions */
2754static inline uint8_t
2755vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2756{
2757 uint8_t round, shift = b & 0x7;
2758 uint8_t res;
2759
2760 round = get_round(vxrm, a, shift);
2761 res = (a >> shift) + round;
2762 return res;
2763}
2764static inline uint16_t
2765vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2766{
2767 uint8_t round, shift = b & 0xf;
04a61406
LZ
2768
2769 round = get_round(vxrm, a, shift);
66997c42 2770 return (a >> shift) + round;
04a61406
LZ
2771}
2772static inline uint32_t
2773vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2774{
2775 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2776
2777 round = get_round(vxrm, a, shift);
66997c42 2778 return (a >> shift) + round;
04a61406
LZ
2779}
2780static inline uint64_t
2781vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2782{
2783 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2784
2785 round = get_round(vxrm, a, shift);
66997c42 2786 return (a >> shift) + round;
04a61406
LZ
2787}
2788RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2789RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2790RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2791RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2792GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2793GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2794GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2795GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2796
2797RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2798RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2799RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2800RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2801GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2802GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2803GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2804GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2805
2806static inline int8_t
2807vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2808{
2809 uint8_t round, shift = b & 0x7;
04a61406
LZ
2810
2811 round = get_round(vxrm, a, shift);
66997c42 2812 return (a >> shift) + round;
04a61406
LZ
2813}
2814static inline int16_t
2815vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2816{
2817 uint8_t round, shift = b & 0xf;
04a61406
LZ
2818
2819 round = get_round(vxrm, a, shift);
66997c42 2820 return (a >> shift) + round;
04a61406
LZ
2821}
2822static inline int32_t
2823vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2824{
2825 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2826
2827 round = get_round(vxrm, a, shift);
66997c42 2828 return (a >> shift) + round;
04a61406
LZ
2829}
2830static inline int64_t
2831vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2832{
2833 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2834
2835 round = get_round(vxrm, a, shift);
66997c42 2836 return (a >> shift) + round;
04a61406 2837}
9ff3d287 2838
04a61406
LZ
2839RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2840RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2841RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2842RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2843GEN_VEXT_VV_RM(vssra_vv_b, 1)
2844GEN_VEXT_VV_RM(vssra_vv_h, 2)
2845GEN_VEXT_VV_RM(vssra_vv_w, 4)
2846GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2847
2848RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2849RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2850RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2851RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2852GEN_VEXT_VX_RM(vssra_vx_b, 1)
2853GEN_VEXT_VX_RM(vssra_vx_h, 2)
2854GEN_VEXT_VX_RM(vssra_vx_w, 4)
2855GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2856
2857/* Vector Narrowing Fixed-Point Clip Instructions */
2858static inline int8_t
2859vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2860{
2861 uint8_t round, shift = b & 0xf;
2862 int16_t res;
2863
2864 round = get_round(vxrm, a, shift);
2865 res = (a >> shift) + round;
2866 if (res > INT8_MAX) {
2867 env->vxsat = 0x1;
2868 return INT8_MAX;
2869 } else if (res < INT8_MIN) {
2870 env->vxsat = 0x1;
2871 return INT8_MIN;
2872 } else {
2873 return res;
2874 }
2875}
2876
2877static inline int16_t
2878vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2879{
2880 uint8_t round, shift = b & 0x1f;
2881 int32_t res;
2882
2883 round = get_round(vxrm, a, shift);
2884 res = (a >> shift) + round;
2885 if (res > INT16_MAX) {
2886 env->vxsat = 0x1;
2887 return INT16_MAX;
2888 } else if (res < INT16_MIN) {
2889 env->vxsat = 0x1;
2890 return INT16_MIN;
2891 } else {
2892 return res;
2893 }
2894}
2895
2896static inline int32_t
2897vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2898{
2899 uint8_t round, shift = b & 0x3f;
2900 int64_t res;
2901
2902 round = get_round(vxrm, a, shift);
2903 res = (a >> shift) + round;
2904 if (res > INT32_MAX) {
2905 env->vxsat = 0x1;
2906 return INT32_MAX;
2907 } else if (res < INT32_MIN) {
2908 env->vxsat = 0x1;
2909 return INT32_MIN;
2910 } else {
2911 return res;
2912 }
2913}
2914
a70b3a73
FC
2915RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2916RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2917RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2918GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2919GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2920GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2921
2922RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2923RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2924RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2925GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2926GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2927GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2928
2929static inline uint8_t
2930vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2931{
2932 uint8_t round, shift = b & 0xf;
2933 uint16_t res;
2934
2935 round = get_round(vxrm, a, shift);
2936 res = (a >> shift) + round;
2937 if (res > UINT8_MAX) {
2938 env->vxsat = 0x1;
2939 return UINT8_MAX;
2940 } else {
2941 return res;
2942 }
2943}
2944
2945static inline uint16_t
2946vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2947{
2948 uint8_t round, shift = b & 0x1f;
2949 uint32_t res;
2950
2951 round = get_round(vxrm, a, shift);
2952 res = (a >> shift) + round;
2953 if (res > UINT16_MAX) {
2954 env->vxsat = 0x1;
2955 return UINT16_MAX;
2956 } else {
2957 return res;
2958 }
2959}
2960
2961static inline uint32_t
2962vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2963{
2964 uint8_t round, shift = b & 0x3f;
a70b3a73 2965 uint64_t res;
9ff3d287
LZ
2966
2967 round = get_round(vxrm, a, shift);
2968 res = (a >> shift) + round;
2969 if (res > UINT32_MAX) {
2970 env->vxsat = 0x1;
2971 return UINT32_MAX;
2972 } else {
2973 return res;
2974 }
2975}
2976
a70b3a73
FC
2977RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2978RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2979RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 2980GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2981GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2982GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 2983
a70b3a73
FC
2984RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2985RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2986RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 2987GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
2988GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
2989GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
2990
2991/*
2992 *** Vector Float Point Arithmetic Instructions
2993 */
2994/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2995#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2996static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2997 CPURISCVState *env) \
2998{ \
2999 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3000 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3001 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3002}
3003
5eacf7d8 3004#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
3005void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3006 void *vs2, CPURISCVState *env, \
3007 uint32_t desc) \
3008{ \
ce2a0343
LZ
3009 uint32_t vm = vext_vm(desc); \
3010 uint32_t vl = env->vl; \
5eacf7d8 3011 uint32_t total_elems = \
3012 vext_get_total_elems(env, desc, ESZ); \
3013 uint32_t vta = vext_vta(desc); \
5b448f44 3014 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
3015 uint32_t i; \
3016 \
f714361e 3017 for (i = env->vstart; i < vl; i++) { \
f9298de5 3018 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3019 /* set masked-off elements to 1s */ \
3020 vext_set_elems_1s(vd, vma, i * ESZ, \
3021 (i + 1) * ESZ); \
ce2a0343
LZ
3022 continue; \
3023 } \
3024 do_##NAME(vd, vs1, vs2, i, env); \
3025 } \
f714361e 3026 env->vstart = 0; \
5eacf7d8 3027 /* set tail elements to 1s */ \
3028 vext_set_elems_1s(vd, vta, vl * ESZ, \
3029 total_elems * ESZ); \
ce2a0343
LZ
3030}
3031
3032RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3033RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3034RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 3035GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3036GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3037GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
3038
3039#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3040static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3041 CPURISCVState *env) \
3042{ \
3043 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3044 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3045}
3046
5eacf7d8 3047#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
3048void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3049 void *vs2, CPURISCVState *env, \
3050 uint32_t desc) \
3051{ \
ce2a0343
LZ
3052 uint32_t vm = vext_vm(desc); \
3053 uint32_t vl = env->vl; \
5eacf7d8 3054 uint32_t total_elems = \
3055 vext_get_total_elems(env, desc, ESZ); \
3056 uint32_t vta = vext_vta(desc); \
5b448f44 3057 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
3058 uint32_t i; \
3059 \
f714361e 3060 for (i = env->vstart; i < vl; i++) { \
f9298de5 3061 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3062 /* set masked-off elements to 1s */ \
3063 vext_set_elems_1s(vd, vma, i * ESZ, \
3064 (i + 1) * ESZ); \
ce2a0343
LZ
3065 continue; \
3066 } \
3067 do_##NAME(vd, s1, vs2, i, env); \
3068 } \
f714361e 3069 env->vstart = 0; \
5eacf7d8 3070 /* set tail elements to 1s */ \
3071 vext_set_elems_1s(vd, vta, vl * ESZ, \
3072 total_elems * ESZ); \
ce2a0343
LZ
3073}
3074
3075RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3076RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3077RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 3078GEN_VEXT_VF(vfadd_vf_h, 2)
3079GEN_VEXT_VF(vfadd_vf_w, 4)
3080GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
3081
3082RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3083RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3084RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 3085GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3086GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3087GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
3088RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3089RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3090RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 3091GEN_VEXT_VF(vfsub_vf_h, 2)
3092GEN_VEXT_VF(vfsub_vf_w, 4)
3093GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
3094
3095static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3096{
3097 return float16_sub(b, a, s);
3098}
3099
3100static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3101{
3102 return float32_sub(b, a, s);
3103}
3104
3105static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3106{
3107 return float64_sub(b, a, s);
3108}
3109
3110RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3111RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3112RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 3113GEN_VEXT_VF(vfrsub_vf_h, 2)
3114GEN_VEXT_VF(vfrsub_vf_w, 4)
3115GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
3116
3117/* Vector Widening Floating-Point Add/Subtract Instructions */
3118static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3119{
3120 return float32_add(float16_to_float32(a, true, s),
3121 float16_to_float32(b, true, s), s);
3122}
3123
3124static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3125{
3126 return float64_add(float32_to_float64(a, s),
3127 float32_to_float64(b, s), s);
3128
3129}
3130
3131RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3132RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 3133GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3134GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
3135RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3136RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 3137GEN_VEXT_VF(vfwadd_vf_h, 4)
3138GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
3139
3140static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3141{
3142 return float32_sub(float16_to_float32(a, true, s),
3143 float16_to_float32(b, true, s), s);
3144}
3145
3146static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3147{
3148 return float64_sub(float32_to_float64(a, s),
3149 float32_to_float64(b, s), s);
3150
3151}
3152
3153RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3154RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 3155GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3156GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
3157RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3158RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 3159GEN_VEXT_VF(vfwsub_vf_h, 4)
3160GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
3161
3162static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3163{
3164 return float32_add(a, float16_to_float32(b, true, s), s);
3165}
3166
3167static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3168{
3169 return float64_add(a, float32_to_float64(b, s), s);
3170}
3171
3172RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3173RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3174GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3175GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3176RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3177RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3178GEN_VEXT_VF(vfwadd_wf_h, 4)
3179GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3180
3181static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3182{
3183 return float32_sub(a, float16_to_float32(b, true, s), s);
3184}
3185
3186static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3187{
3188 return float64_sub(a, float32_to_float64(b, s), s);
3189}
3190
3191RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3192RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3193GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3194GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3195RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3196RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3197GEN_VEXT_VF(vfwsub_wf_h, 4)
3198GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3199
3200/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3201RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3202RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3203RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3204GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3205GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3206GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3207RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3208RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3209RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3210GEN_VEXT_VF(vfmul_vf_h, 2)
3211GEN_VEXT_VF(vfmul_vf_w, 4)
3212GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3213
3214RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3215RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3216RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3217GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3218GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3219GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3220RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3221RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3222RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3223GEN_VEXT_VF(vfdiv_vf_h, 2)
3224GEN_VEXT_VF(vfdiv_vf_w, 4)
3225GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3226
3227static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3228{
3229 return float16_div(b, a, s);
3230}
3231
3232static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3233{
3234 return float32_div(b, a, s);
3235}
3236
3237static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3238{
3239 return float64_div(b, a, s);
3240}
3241
3242RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3243RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3244RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3245GEN_VEXT_VF(vfrdiv_vf_h, 2)
3246GEN_VEXT_VF(vfrdiv_vf_w, 4)
3247GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3248
3249/* Vector Widening Floating-Point Multiply */
3250static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3251{
3252 return float32_mul(float16_to_float32(a, true, s),
3253 float16_to_float32(b, true, s), s);
3254}
3255
3256static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3257{
3258 return float64_mul(float32_to_float64(a, s),
3259 float32_to_float64(b, s), s);
3260
3261}
3262RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3263RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3264GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3265GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3266RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3267RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3268GEN_VEXT_VF(vfwmul_vf_h, 4)
3269GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3270
3271/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3272#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3273static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3274 CPURISCVState *env) \
3275{ \
3276 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3277 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3278 TD d = *((TD *)vd + HD(i)); \
3279 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3280}
3281
3282static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3283{
3284 return float16_muladd(a, b, d, 0, s);
3285}
3286
3287static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3288{
3289 return float32_muladd(a, b, d, 0, s);
3290}
3291
3292static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3293{
3294 return float64_muladd(a, b, d, 0, s);
3295}
3296
3297RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3298RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3299RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3300GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3301GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3302GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3303
3304#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3305static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3306 CPURISCVState *env) \
3307{ \
3308 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3309 TD d = *((TD *)vd + HD(i)); \
3310 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3311}
3312
3313RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3314RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3315RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3316GEN_VEXT_VF(vfmacc_vf_h, 2)
3317GEN_VEXT_VF(vfmacc_vf_w, 4)
3318GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3319
3320static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3321{
3322 return float16_muladd(a, b, d,
3323 float_muladd_negate_c | float_muladd_negate_product, s);
3324}
3325
3326static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3327{
3328 return float32_muladd(a, b, d,
3329 float_muladd_negate_c | float_muladd_negate_product, s);
3330}
3331
3332static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3333{
3334 return float64_muladd(a, b, d,
3335 float_muladd_negate_c | float_muladd_negate_product, s);
3336}
3337
3338RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3339RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3340RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3341GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3342GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3343GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3344RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3345RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3346RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3347GEN_VEXT_VF(vfnmacc_vf_h, 2)
3348GEN_VEXT_VF(vfnmacc_vf_w, 4)
3349GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3350
3351static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3352{
3353 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3354}
3355
3356static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3357{
3358 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3359}
3360
3361static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3362{
3363 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3364}
3365
3366RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3367RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3368RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3369GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3370GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3371GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3372RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3373RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3374RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3375GEN_VEXT_VF(vfmsac_vf_h, 2)
3376GEN_VEXT_VF(vfmsac_vf_w, 4)
3377GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3378
3379static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3380{
3381 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3382}
3383
3384static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3385{
3386 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3387}
3388
3389static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3390{
3391 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3392}
3393
3394RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3395RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3396RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3397GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3398GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3399GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3400RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3401RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3402RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3403GEN_VEXT_VF(vfnmsac_vf_h, 2)
3404GEN_VEXT_VF(vfnmsac_vf_w, 4)
3405GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3406
3407static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3408{
3409 return float16_muladd(d, b, a, 0, s);
3410}
3411
3412static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3413{
3414 return float32_muladd(d, b, a, 0, s);
3415}
3416
3417static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3418{
3419 return float64_muladd(d, b, a, 0, s);
3420}
3421
3422RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3423RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3424RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3425GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3426GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3427GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3428RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3429RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3430RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3431GEN_VEXT_VF(vfmadd_vf_h, 2)
3432GEN_VEXT_VF(vfmadd_vf_w, 4)
3433GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3434
3435static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3436{
3437 return float16_muladd(d, b, a,
3438 float_muladd_negate_c | float_muladd_negate_product, s);
3439}
3440
3441static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3442{
3443 return float32_muladd(d, b, a,
3444 float_muladd_negate_c | float_muladd_negate_product, s);
3445}
3446
3447static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3448{
3449 return float64_muladd(d, b, a,
3450 float_muladd_negate_c | float_muladd_negate_product, s);
3451}
3452
3453RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3454RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3455RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3456GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3457GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3458GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3459RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3460RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3461RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3462GEN_VEXT_VF(vfnmadd_vf_h, 2)
3463GEN_VEXT_VF(vfnmadd_vf_w, 4)
3464GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3465
3466static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3467{
3468 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3469}
3470
3471static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3472{
3473 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3474}
3475
3476static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3477{
3478 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3479}
3480
3481RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3482RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3483RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3484GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3485GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3486GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3487RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3488RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3489RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3490GEN_VEXT_VF(vfmsub_vf_h, 2)
3491GEN_VEXT_VF(vfmsub_vf_w, 4)
3492GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3493
3494static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3495{
3496 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3497}
3498
3499static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3500{
3501 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3502}
3503
3504static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3505{
3506 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3507}
3508
3509RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3510RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3511RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3512GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3513GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3514GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3515RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3516RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3517RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3518GEN_VEXT_VF(vfnmsub_vf_h, 2)
3519GEN_VEXT_VF(vfnmsub_vf_w, 4)
3520GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3521
3522/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3523static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3524{
3525 return float32_muladd(float16_to_float32(a, true, s),
3526 float16_to_float32(b, true, s), d, 0, s);
3527}
3528
3529static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3530{
3531 return float64_muladd(float32_to_float64(a, s),
3532 float32_to_float64(b, s), d, 0, s);
3533}
3534
3535RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3536RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3537GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3538GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3539RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3540RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3541GEN_VEXT_VF(vfwmacc_vf_h, 4)
3542GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959
LZ
3543
3544static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3545{
3546 return float32_muladd(float16_to_float32(a, true, s),
3547 float16_to_float32(b, true, s), d,
3548 float_muladd_negate_c | float_muladd_negate_product, s);
3549}
3550
3551static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3552{
3553 return float64_muladd(float32_to_float64(a, s),
3554 float32_to_float64(b, s), d,
3555 float_muladd_negate_c | float_muladd_negate_product, s);
3556}
3557
3558RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3559RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3560GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3561GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3562RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3563RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3564GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3565GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3566
3567static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3568{
3569 return float32_muladd(float16_to_float32(a, true, s),
3570 float16_to_float32(b, true, s), d,
3571 float_muladd_negate_c, s);
3572}
3573
3574static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3575{
3576 return float64_muladd(float32_to_float64(a, s),
3577 float32_to_float64(b, s), d,
3578 float_muladd_negate_c, s);
3579}
3580
3581RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3582RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3583GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3584GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3585RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3586RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3587GEN_VEXT_VF(vfwmsac_vf_h, 4)
3588GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3589
3590static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3591{
3592 return float32_muladd(float16_to_float32(a, true, s),
3593 float16_to_float32(b, true, s), d,
3594 float_muladd_negate_product, s);
3595}
3596
3597static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3598{
3599 return float64_muladd(float32_to_float64(a, s),
3600 float32_to_float64(b, s), d,
3601 float_muladd_negate_product, s);
3602}
3603
3604RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3605RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3606GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3607GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3608RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3609RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3610GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3611GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3612
3613/* Vector Floating-Point Square-Root Instruction */
3614/* (TD, T2, TX2) */
3615#define OP_UU_H uint16_t, uint16_t, uint16_t
3616#define OP_UU_W uint32_t, uint32_t, uint32_t
3617#define OP_UU_D uint64_t, uint64_t, uint64_t
3618
3619#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3620static void do_##NAME(void *vd, void *vs2, int i, \
3621 CPURISCVState *env) \
3622{ \
3623 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3624 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3625}
3626
5eacf7d8 3627#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72
LZ
3628void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3629 CPURISCVState *env, uint32_t desc) \
3630{ \
d9e4ce72
LZ
3631 uint32_t vm = vext_vm(desc); \
3632 uint32_t vl = env->vl; \
5eacf7d8 3633 uint32_t total_elems = \
3634 vext_get_total_elems(env, desc, ESZ); \
3635 uint32_t vta = vext_vta(desc); \
5b448f44 3636 uint32_t vma = vext_vma(desc); \
d9e4ce72
LZ
3637 uint32_t i; \
3638 \
3639 if (vl == 0) { \
3640 return; \
3641 } \
f714361e 3642 for (i = env->vstart; i < vl; i++) { \
f9298de5 3643 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3644 /* set masked-off elements to 1s */ \
3645 vext_set_elems_1s(vd, vma, i * ESZ, \
3646 (i + 1) * ESZ); \
d9e4ce72
LZ
3647 continue; \
3648 } \
3649 do_##NAME(vd, vs2, i, env); \
3650 } \
f714361e 3651 env->vstart = 0; \
5eacf7d8 3652 vext_set_elems_1s(vd, vta, vl * ESZ, \
3653 total_elems * ESZ); \
d9e4ce72
LZ
3654}
3655
3656RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3657RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3658RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3659GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3660GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3661GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3662
e848a1e5
FC
3663/*
3664 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3665 *
3666 * Adapted from riscv-v-spec recip.c:
3667 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3668 */
3669static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3670{
3671 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3672 uint64_t exp = extract64(f, frac_size, exp_size);
3673 uint64_t frac = extract64(f, 0, frac_size);
3674
3675 const uint8_t lookup_table[] = {
3676 52, 51, 50, 48, 47, 46, 44, 43,
3677 42, 41, 40, 39, 38, 36, 35, 34,
3678 33, 32, 31, 30, 30, 29, 28, 27,
3679 26, 25, 24, 23, 23, 22, 21, 20,
3680 19, 19, 18, 17, 16, 16, 15, 14,
3681 14, 13, 12, 12, 11, 10, 10, 9,
3682 9, 8, 7, 7, 6, 6, 5, 4,
3683 4, 3, 3, 2, 2, 1, 1, 0,
3684 127, 125, 123, 121, 119, 118, 116, 114,
3685 113, 111, 109, 108, 106, 105, 103, 102,
3686 100, 99, 97, 96, 95, 93, 92, 91,
3687 90, 88, 87, 86, 85, 84, 83, 82,
3688 80, 79, 78, 77, 76, 75, 74, 73,
3689 72, 71, 70, 70, 69, 68, 67, 66,
3690 65, 64, 63, 63, 62, 61, 60, 59,
3691 59, 58, 57, 56, 56, 55, 54, 53
3692 };
3693 const int precision = 7;
3694
3695 if (exp == 0 && frac != 0) { /* subnormal */
3696 /* Normalize the subnormal. */
3697 while (extract64(frac, frac_size - 1, 1) == 0) {
3698 exp--;
3699 frac <<= 1;
3700 }
3701
3702 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3703 }
3704
3705 int idx = ((exp & 1) << (precision - 1)) |
3706 (frac >> (frac_size - precision + 1));
3707 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3708 (frac_size - precision);
3709 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3710
3711 uint64_t val = 0;
3712 val = deposit64(val, 0, frac_size, out_frac);
3713 val = deposit64(val, frac_size, exp_size, out_exp);
3714 val = deposit64(val, frac_size + exp_size, 1, sign);
3715 return val;
3716}
3717
3718static float16 frsqrt7_h(float16 f, float_status *s)
3719{
3720 int exp_size = 5, frac_size = 10;
3721 bool sign = float16_is_neg(f);
3722
3723 /*
3724 * frsqrt7(sNaN) = canonical NaN
3725 * frsqrt7(-inf) = canonical NaN
3726 * frsqrt7(-normal) = canonical NaN
3727 * frsqrt7(-subnormal) = canonical NaN
3728 */
3729 if (float16_is_signaling_nan(f, s) ||
3730 (float16_is_infinity(f) && sign) ||
3731 (float16_is_normal(f) && sign) ||
3732 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3733 s->float_exception_flags |= float_flag_invalid;
3734 return float16_default_nan(s);
3735 }
3736
3737 /* frsqrt7(qNaN) = canonical NaN */
3738 if (float16_is_quiet_nan(f, s)) {
3739 return float16_default_nan(s);
3740 }
3741
3742 /* frsqrt7(+-0) = +-inf */
3743 if (float16_is_zero(f)) {
3744 s->float_exception_flags |= float_flag_divbyzero;
3745 return float16_set_sign(float16_infinity, sign);
3746 }
3747
3748 /* frsqrt7(+inf) = +0 */
3749 if (float16_is_infinity(f) && !sign) {
3750 return float16_set_sign(float16_zero, sign);
3751 }
3752
3753 /* +normal, +subnormal */
3754 uint64_t val = frsqrt7(f, exp_size, frac_size);
3755 return make_float16(val);
3756}
3757
3758static float32 frsqrt7_s(float32 f, float_status *s)
3759{
3760 int exp_size = 8, frac_size = 23;
3761 bool sign = float32_is_neg(f);
3762
3763 /*
3764 * frsqrt7(sNaN) = canonical NaN
3765 * frsqrt7(-inf) = canonical NaN
3766 * frsqrt7(-normal) = canonical NaN
3767 * frsqrt7(-subnormal) = canonical NaN
3768 */
3769 if (float32_is_signaling_nan(f, s) ||
3770 (float32_is_infinity(f) && sign) ||
3771 (float32_is_normal(f) && sign) ||
3772 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3773 s->float_exception_flags |= float_flag_invalid;
3774 return float32_default_nan(s);
3775 }
3776
3777 /* frsqrt7(qNaN) = canonical NaN */
3778 if (float32_is_quiet_nan(f, s)) {
3779 return float32_default_nan(s);
3780 }
3781
3782 /* frsqrt7(+-0) = +-inf */
3783 if (float32_is_zero(f)) {
3784 s->float_exception_flags |= float_flag_divbyzero;
3785 return float32_set_sign(float32_infinity, sign);
3786 }
3787
3788 /* frsqrt7(+inf) = +0 */
3789 if (float32_is_infinity(f) && !sign) {
3790 return float32_set_sign(float32_zero, sign);
3791 }
3792
3793 /* +normal, +subnormal */
3794 uint64_t val = frsqrt7(f, exp_size, frac_size);
3795 return make_float32(val);
3796}
3797
3798static float64 frsqrt7_d(float64 f, float_status *s)
3799{
3800 int exp_size = 11, frac_size = 52;
3801 bool sign = float64_is_neg(f);
3802
3803 /*
3804 * frsqrt7(sNaN) = canonical NaN
3805 * frsqrt7(-inf) = canonical NaN
3806 * frsqrt7(-normal) = canonical NaN
3807 * frsqrt7(-subnormal) = canonical NaN
3808 */
3809 if (float64_is_signaling_nan(f, s) ||
3810 (float64_is_infinity(f) && sign) ||
3811 (float64_is_normal(f) && sign) ||
3812 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3813 s->float_exception_flags |= float_flag_invalid;
3814 return float64_default_nan(s);
3815 }
3816
3817 /* frsqrt7(qNaN) = canonical NaN */
3818 if (float64_is_quiet_nan(f, s)) {
3819 return float64_default_nan(s);
3820 }
3821
3822 /* frsqrt7(+-0) = +-inf */
3823 if (float64_is_zero(f)) {
3824 s->float_exception_flags |= float_flag_divbyzero;
3825 return float64_set_sign(float64_infinity, sign);
3826 }
3827
3828 /* frsqrt7(+inf) = +0 */
3829 if (float64_is_infinity(f) && !sign) {
3830 return float64_set_sign(float64_zero, sign);
3831 }
3832
3833 /* +normal, +subnormal */
3834 uint64_t val = frsqrt7(f, exp_size, frac_size);
3835 return make_float64(val);
3836}
3837
3838RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3839RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3840RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3841GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3842GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3843GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3844
55c35407
FC
3845/*
3846 * Vector Floating-Point Reciprocal Estimate Instruction
3847 *
3848 * Adapted from riscv-v-spec recip.c:
3849 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3850 */
3851static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3852 float_status *s)
3853{
3854 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3855 uint64_t exp = extract64(f, frac_size, exp_size);
3856 uint64_t frac = extract64(f, 0, frac_size);
3857
3858 const uint8_t lookup_table[] = {
3859 127, 125, 123, 121, 119, 117, 116, 114,
3860 112, 110, 109, 107, 105, 104, 102, 100,
3861 99, 97, 96, 94, 93, 91, 90, 88,
3862 87, 85, 84, 83, 81, 80, 79, 77,
3863 76, 75, 74, 72, 71, 70, 69, 68,
3864 66, 65, 64, 63, 62, 61, 60, 59,
3865 58, 57, 56, 55, 54, 53, 52, 51,
3866 50, 49, 48, 47, 46, 45, 44, 43,
3867 42, 41, 40, 40, 39, 38, 37, 36,
3868 35, 35, 34, 33, 32, 31, 31, 30,
3869 29, 28, 28, 27, 26, 25, 25, 24,
3870 23, 23, 22, 21, 21, 20, 19, 19,
3871 18, 17, 17, 16, 15, 15, 14, 14,
3872 13, 12, 12, 11, 11, 10, 9, 9,
3873 8, 8, 7, 7, 6, 5, 5, 4,
3874 4, 3, 3, 2, 2, 1, 1, 0
3875 };
3876 const int precision = 7;
3877
3878 if (exp == 0 && frac != 0) { /* subnormal */
3879 /* Normalize the subnormal. */
3880 while (extract64(frac, frac_size - 1, 1) == 0) {
3881 exp--;
3882 frac <<= 1;
3883 }
3884
3885 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3886
3887 if (exp != 0 && exp != UINT64_MAX) {
3888 /*
3889 * Overflow to inf or max value of same sign,
3890 * depending on sign and rounding mode.
3891 */
3892 s->float_exception_flags |= (float_flag_inexact |
3893 float_flag_overflow);
3894
3895 if ((s->float_rounding_mode == float_round_to_zero) ||
3896 ((s->float_rounding_mode == float_round_down) && !sign) ||
3897 ((s->float_rounding_mode == float_round_up) && sign)) {
3898 /* Return greatest/negative finite value. */
3899 return (sign << (exp_size + frac_size)) |
3900 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3901 } else {
3902 /* Return +-inf. */
3903 return (sign << (exp_size + frac_size)) |
3904 MAKE_64BIT_MASK(frac_size, exp_size);
3905 }
3906 }
3907 }
3908
3909 int idx = frac >> (frac_size - precision);
3910 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3911 (frac_size - precision);
3912 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3913
3914 if (out_exp == 0 || out_exp == UINT64_MAX) {
3915 /*
3916 * The result is subnormal, but don't raise the underflow exception,
3917 * because there's no additional loss of precision.
3918 */
3919 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3920 if (out_exp == UINT64_MAX) {
3921 out_frac >>= 1;
3922 out_exp = 0;
3923 }
3924 }
3925
3926 uint64_t val = 0;
3927 val = deposit64(val, 0, frac_size, out_frac);
3928 val = deposit64(val, frac_size, exp_size, out_exp);
3929 val = deposit64(val, frac_size + exp_size, 1, sign);
3930 return val;
3931}
3932
3933static float16 frec7_h(float16 f, float_status *s)
3934{
3935 int exp_size = 5, frac_size = 10;
3936 bool sign = float16_is_neg(f);
3937
3938 /* frec7(+-inf) = +-0 */
3939 if (float16_is_infinity(f)) {
3940 return float16_set_sign(float16_zero, sign);
3941 }
3942
3943 /* frec7(+-0) = +-inf */
3944 if (float16_is_zero(f)) {
3945 s->float_exception_flags |= float_flag_divbyzero;
3946 return float16_set_sign(float16_infinity, sign);
3947 }
3948
3949 /* frec7(sNaN) = canonical NaN */
3950 if (float16_is_signaling_nan(f, s)) {
3951 s->float_exception_flags |= float_flag_invalid;
3952 return float16_default_nan(s);
3953 }
3954
3955 /* frec7(qNaN) = canonical NaN */
3956 if (float16_is_quiet_nan(f, s)) {
3957 return float16_default_nan(s);
3958 }
3959
3960 /* +-normal, +-subnormal */
3961 uint64_t val = frec7(f, exp_size, frac_size, s);
3962 return make_float16(val);
3963}
3964
3965static float32 frec7_s(float32 f, float_status *s)
3966{
3967 int exp_size = 8, frac_size = 23;
3968 bool sign = float32_is_neg(f);
3969
3970 /* frec7(+-inf) = +-0 */
3971 if (float32_is_infinity(f)) {
3972 return float32_set_sign(float32_zero, sign);
3973 }
3974
3975 /* frec7(+-0) = +-inf */
3976 if (float32_is_zero(f)) {
3977 s->float_exception_flags |= float_flag_divbyzero;
3978 return float32_set_sign(float32_infinity, sign);
3979 }
3980
3981 /* frec7(sNaN) = canonical NaN */
3982 if (float32_is_signaling_nan(f, s)) {
3983 s->float_exception_flags |= float_flag_invalid;
3984 return float32_default_nan(s);
3985 }
3986
3987 /* frec7(qNaN) = canonical NaN */
3988 if (float32_is_quiet_nan(f, s)) {
3989 return float32_default_nan(s);
3990 }
3991
3992 /* +-normal, +-subnormal */
3993 uint64_t val = frec7(f, exp_size, frac_size, s);
3994 return make_float32(val);
3995}
3996
3997static float64 frec7_d(float64 f, float_status *s)
3998{
3999 int exp_size = 11, frac_size = 52;
4000 bool sign = float64_is_neg(f);
4001
4002 /* frec7(+-inf) = +-0 */
4003 if (float64_is_infinity(f)) {
4004 return float64_set_sign(float64_zero, sign);
4005 }
4006
4007 /* frec7(+-0) = +-inf */
4008 if (float64_is_zero(f)) {
4009 s->float_exception_flags |= float_flag_divbyzero;
4010 return float64_set_sign(float64_infinity, sign);
4011 }
4012
4013 /* frec7(sNaN) = canonical NaN */
4014 if (float64_is_signaling_nan(f, s)) {
4015 s->float_exception_flags |= float_flag_invalid;
4016 return float64_default_nan(s);
4017 }
4018
4019 /* frec7(qNaN) = canonical NaN */
4020 if (float64_is_quiet_nan(f, s)) {
4021 return float64_default_nan(s);
4022 }
4023
4024 /* +-normal, +-subnormal */
4025 uint64_t val = frec7(f, exp_size, frac_size, s);
4026 return make_float64(val);
4027}
4028
4029RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4030RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4031RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 4032GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4033GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4034GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 4035
230b53dd 4036/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
4037RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4038RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4039RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 4040GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4041GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4042GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
4043RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4044RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4045RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 4046GEN_VEXT_VF(vfmin_vf_h, 2)
4047GEN_VEXT_VF(vfmin_vf_w, 4)
4048GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 4049
49c5611a
FC
4050RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4051RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4052RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 4053GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4054GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4055GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
4056RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4057RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4058RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 4059GEN_VEXT_VF(vfmax_vf_h, 2)
4060GEN_VEXT_VF(vfmax_vf_w, 4)
4061GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
4062
4063/* Vector Floating-Point Sign-Injection Instructions */
4064static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4065{
4066 return deposit64(b, 0, 15, a);
4067}
4068
4069static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4070{
4071 return deposit64(b, 0, 31, a);
4072}
4073
4074static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4075{
4076 return deposit64(b, 0, 63, a);
4077}
4078
4079RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4080RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4081RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 4082GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4083GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4084GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
4085RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4086RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4087RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 4088GEN_VEXT_VF(vfsgnj_vf_h, 2)
4089GEN_VEXT_VF(vfsgnj_vf_w, 4)
4090GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
4091
4092static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4093{
4094 return deposit64(~b, 0, 15, a);
4095}
4096
4097static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4098{
4099 return deposit64(~b, 0, 31, a);
4100}
4101
4102static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4103{
4104 return deposit64(~b, 0, 63, a);
4105}
4106
4107RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4108RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4109RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 4110GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4111GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4112GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
4113RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4114RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4115RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 4116GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4117GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4118GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
4119
4120static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4121{
4122 return deposit64(b ^ a, 0, 15, a);
4123}
4124
4125static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4126{
4127 return deposit64(b ^ a, 0, 31, a);
4128}
4129
4130static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4131{
4132 return deposit64(b ^ a, 0, 63, a);
4133}
4134
4135RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4136RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4137RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 4138GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4139GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4140GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
4141RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4142RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4143RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 4144GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4145GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4146GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
4147
4148/* Vector Floating-Point Compare Instructions */
4149#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4150void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4151 CPURISCVState *env, uint32_t desc) \
4152{ \
2a68e9e5
LZ
4153 uint32_t vm = vext_vm(desc); \
4154 uint32_t vl = env->vl; \
86247c51 4155 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5eacf7d8 4156 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4157 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4158 uint32_t i; \
4159 \
f714361e 4160 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4161 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4162 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4163 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4164 /* set masked-off elements to 1s */ \
4165 if (vma) { \
4166 vext_set_elem_mask(vd, i, 1); \
4167 } \
2a68e9e5
LZ
4168 continue; \
4169 } \
f9298de5 4170 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4171 DO_OP(s2, s1, &env->fp_status)); \
4172 } \
f714361e 4173 env->vstart = 0; \
5eacf7d8 4174 /* mask destination register are always tail-agnostic */ \
4175 /* set tail elements to 1s */ \
4176 if (vta_all_1s) { \
4177 for (; i < total_elems; i++) { \
4178 vext_set_elem_mask(vd, i, 1); \
4179 } \
4180 } \
2a68e9e5
LZ
4181}
4182
2a68e9e5
LZ
4183GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4184GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4185GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4186
4187#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4188void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4189 CPURISCVState *env, uint32_t desc) \
4190{ \
2a68e9e5
LZ
4191 uint32_t vm = vext_vm(desc); \
4192 uint32_t vl = env->vl; \
86247c51 4193 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5eacf7d8 4194 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4195 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4196 uint32_t i; \
4197 \
f714361e 4198 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4199 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4200 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4201 /* set masked-off elements to 1s */ \
4202 if (vma) { \
4203 vext_set_elem_mask(vd, i, 1); \
4204 } \
2a68e9e5
LZ
4205 continue; \
4206 } \
f9298de5 4207 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4208 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4209 } \
f714361e 4210 env->vstart = 0; \
5eacf7d8 4211 /* mask destination register are always tail-agnostic */ \
4212 /* set tail elements to 1s */ \
4213 if (vta_all_1s) { \
4214 for (; i < total_elems; i++) { \
4215 vext_set_elem_mask(vd, i, 1); \
4216 } \
4217 } \
2a68e9e5
LZ
4218}
4219
4220GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4221GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4222GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4223
4224static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4225{
4226 FloatRelation compare = float16_compare_quiet(a, b, s);
4227 return compare != float_relation_equal;
4228}
4229
4230static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4231{
4232 FloatRelation compare = float32_compare_quiet(a, b, s);
4233 return compare != float_relation_equal;
4234}
4235
4236static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4237{
4238 FloatRelation compare = float64_compare_quiet(a, b, s);
4239 return compare != float_relation_equal;
4240}
4241
4242GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4243GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4244GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4245GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4246GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4247GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4248
2a68e9e5
LZ
4249GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4250GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4251GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4252GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4253GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4254GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4255
2a68e9e5
LZ
4256GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4257GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4258GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4259GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4260GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4261GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4262
4263static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4264{
4265 FloatRelation compare = float16_compare(a, b, s);
4266 return compare == float_relation_greater;
4267}
4268
4269static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4270{
4271 FloatRelation compare = float32_compare(a, b, s);
4272 return compare == float_relation_greater;
4273}
4274
4275static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4276{
4277 FloatRelation compare = float64_compare(a, b, s);
4278 return compare == float_relation_greater;
4279}
4280
4281GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4282GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4283GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4284
4285static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4286{
4287 FloatRelation compare = float16_compare(a, b, s);
4288 return compare == float_relation_greater ||
4289 compare == float_relation_equal;
4290}
4291
4292static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4293{
4294 FloatRelation compare = float32_compare(a, b, s);
4295 return compare == float_relation_greater ||
4296 compare == float_relation_equal;
4297}
4298
4299static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4300{
4301 FloatRelation compare = float64_compare(a, b, s);
4302 return compare == float_relation_greater ||
4303 compare == float_relation_equal;
4304}
4305
4306GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4307GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4308GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4309
121ddbb3
LZ
4310/* Vector Floating-Point Classify Instruction */
4311#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4312static void do_##NAME(void *vd, void *vs2, int i) \
4313{ \
4314 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4315 *((TD *)vd + HD(i)) = OP(s2); \
4316}
4317
5eacf7d8 4318#define GEN_VEXT_V(NAME, ESZ) \
121ddbb3
LZ
4319void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4320 CPURISCVState *env, uint32_t desc) \
4321{ \
121ddbb3
LZ
4322 uint32_t vm = vext_vm(desc); \
4323 uint32_t vl = env->vl; \
5eacf7d8 4324 uint32_t total_elems = \
4325 vext_get_total_elems(env, desc, ESZ); \
4326 uint32_t vta = vext_vta(desc); \
5b448f44 4327 uint32_t vma = vext_vma(desc); \
121ddbb3
LZ
4328 uint32_t i; \
4329 \
f714361e 4330 for (i = env->vstart; i < vl; i++) { \
f9298de5 4331 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4332 /* set masked-off elements to 1s */ \
4333 vext_set_elems_1s(vd, vma, i * ESZ, \
4334 (i + 1) * ESZ); \
121ddbb3
LZ
4335 continue; \
4336 } \
4337 do_##NAME(vd, vs2, i); \
4338 } \
f714361e 4339 env->vstart = 0; \
5eacf7d8 4340 /* set tail elements to 1s */ \
4341 vext_set_elems_1s(vd, vta, vl * ESZ, \
4342 total_elems * ESZ); \
121ddbb3
LZ
4343}
4344
4345target_ulong fclass_h(uint64_t frs1)
4346{
4347 float16 f = frs1;
4348 bool sign = float16_is_neg(f);
4349
4350 if (float16_is_infinity(f)) {
4351 return sign ? 1 << 0 : 1 << 7;
4352 } else if (float16_is_zero(f)) {
4353 return sign ? 1 << 3 : 1 << 4;
4354 } else if (float16_is_zero_or_denormal(f)) {
4355 return sign ? 1 << 2 : 1 << 5;
4356 } else if (float16_is_any_nan(f)) {
4357 float_status s = { }; /* for snan_bit_is_one */
4358 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4359 } else {
4360 return sign ? 1 << 1 : 1 << 6;
4361 }
4362}
4363
4364target_ulong fclass_s(uint64_t frs1)
4365{
4366 float32 f = frs1;
4367 bool sign = float32_is_neg(f);
4368
4369 if (float32_is_infinity(f)) {
4370 return sign ? 1 << 0 : 1 << 7;
4371 } else if (float32_is_zero(f)) {
4372 return sign ? 1 << 3 : 1 << 4;
4373 } else if (float32_is_zero_or_denormal(f)) {
4374 return sign ? 1 << 2 : 1 << 5;
4375 } else if (float32_is_any_nan(f)) {
4376 float_status s = { }; /* for snan_bit_is_one */
4377 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4378 } else {
4379 return sign ? 1 << 1 : 1 << 6;
4380 }
4381}
4382
4383target_ulong fclass_d(uint64_t frs1)
4384{
4385 float64 f = frs1;
4386 bool sign = float64_is_neg(f);
4387
4388 if (float64_is_infinity(f)) {
4389 return sign ? 1 << 0 : 1 << 7;
4390 } else if (float64_is_zero(f)) {
4391 return sign ? 1 << 3 : 1 << 4;
4392 } else if (float64_is_zero_or_denormal(f)) {
4393 return sign ? 1 << 2 : 1 << 5;
4394 } else if (float64_is_any_nan(f)) {
4395 float_status s = { }; /* for snan_bit_is_one */
4396 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4397 } else {
4398 return sign ? 1 << 1 : 1 << 6;
4399 }
4400}
4401
4402RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4403RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4404RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4405GEN_VEXT_V(vfclass_v_h, 2)
4406GEN_VEXT_V(vfclass_v_w, 4)
4407GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4408
4409/* Vector Floating-Point Merge Instruction */
5eacf7d8 4410
3479a814 4411#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4412void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4413 CPURISCVState *env, uint32_t desc) \
4414{ \
64ab5846
LZ
4415 uint32_t vm = vext_vm(desc); \
4416 uint32_t vl = env->vl; \
5eacf7d8 4417 uint32_t esz = sizeof(ETYPE); \
4418 uint32_t total_elems = \
4419 vext_get_total_elems(env, desc, esz); \
4420 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4421 uint32_t i; \
4422 \
f714361e 4423 for (i = env->vstart; i < vl; i++) { \
64ab5846
LZ
4424 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4425 *((ETYPE *)vd + H(i)) \
f9298de5 4426 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4427 } \
f714361e 4428 env->vstart = 0; \
5eacf7d8 4429 /* set tail elements to 1s */ \
4430 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4431}
4432
3479a814
FC
4433GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4434GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4435GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4436
4437/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4438/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4439RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4440RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4441RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4442GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4443GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4444GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4445
4446/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4447RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4448RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4449RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4450GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4451GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4452GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4453
4454/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4455RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4456RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4457RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4458GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4459GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4460GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4461
4462/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4463RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4464RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4465RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4466GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4467GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4468GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4469
4470/* Widening Floating-Point/Integer Type-Convert Instructions */
4471/* (TD, T2, TX2) */
3ce4c09d 4472#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4473#define WOP_UU_H uint32_t, uint16_t, uint16_t
4474#define WOP_UU_W uint64_t, uint32_t, uint32_t
4475/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4476RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4477RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4478GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4479GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4480
4481/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4482RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4483RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4484GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4485GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1
LZ
4486
4487/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3ce4c09d 4488RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4489RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4490RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4491GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4492GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4493GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4494
4495/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4496RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4497RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4498RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4499GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4500GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4501GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4502
4503/*
3ce4c09d 4504 * vfwcvt.f.f.v vd, vs2, vm
4514b7b1
LZ
4505 * Convert single-width float to double-width float.
4506 */
4507static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4508{
4509 return float16_to_float32(a, true, s);
4510}
4511
4512RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4513RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4514GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4515GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e
LZ
4516
4517/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4518/* (TD, T2, TX2) */
ff679b58 4519#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4520#define NOP_UU_H uint16_t, uint32_t, uint32_t
4521#define NOP_UU_W uint32_t, uint64_t, uint64_t
4522/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4523RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4524RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4525RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4526GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4527GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4528GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4529
4530/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4531RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4532RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4533RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4534GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4535GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4536GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e
LZ
4537
4538/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
ff679b58
FC
4539RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4540RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4541GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4542GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4543
4544/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4545RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4546RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4547GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4548GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4549
4550/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4551static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4552{
4553 return float32_to_float16(a, true, s);
4554}
4555
ff679b58
FC
4556RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4557RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4558GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4559GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1
LZ
4560
4561/*
4562 *** Vector Reduction Operations
4563 */
4564/* Vector Single-Width Integer Reduction Instructions */
3479a814 4565#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1
LZ
4566void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4567 void *vs2, CPURISCVState *env, uint32_t desc) \
4568{ \
fe5c9ab1
LZ
4569 uint32_t vm = vext_vm(desc); \
4570 uint32_t vl = env->vl; \
df4f52a7 4571 uint32_t esz = sizeof(TD); \
4572 uint32_t vlenb = simd_maxsz(desc); \
4573 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4574 uint32_t i; \
fe5c9ab1
LZ
4575 TD s1 = *((TD *)vs1 + HD(0)); \
4576 \
f714361e 4577 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4578 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4579 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4580 continue; \
4581 } \
4582 s1 = OP(s1, (TD)s2); \
4583 } \
4584 *((TD *)vd + HD(0)) = s1; \
f714361e 4585 env->vstart = 0; \
df4f52a7 4586 /* set tail elements to 1s */ \
4587 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4588}
4589
4590/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4591GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4592GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4593GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4594GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4595
4596/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4597GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4598GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4599GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4600GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4601
4602/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4603GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4604GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4605GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4606GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4607
4608/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4609GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4610GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4611GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4612GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4613
4614/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4615GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4616GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4617GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4618GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4619
4620/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4621GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4622GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4623GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4624GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4625
4626/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4627GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4628GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4629GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4630GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4631
4632/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4633GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4634GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4635GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4636GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4637
4638/* Vector Widening Integer Reduction Instructions */
4639/* signed sum reduction into double-width accumulator */
3479a814
FC
4640GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4641GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4642GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4643
4644/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4645GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4646GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4647GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4648
4649/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4650#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4651void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4652 void *vs2, CPURISCVState *env, \
4653 uint32_t desc) \
4654{ \
523547f1
LZ
4655 uint32_t vm = vext_vm(desc); \
4656 uint32_t vl = env->vl; \
df4f52a7 4657 uint32_t esz = sizeof(TD); \
4658 uint32_t vlenb = simd_maxsz(desc); \
4659 uint32_t vta = vext_vta(desc); \
523547f1 4660 uint32_t i; \
523547f1
LZ
4661 TD s1 = *((TD *)vs1 + HD(0)); \
4662 \
f714361e 4663 for (i = env->vstart; i < vl; i++) { \
523547f1 4664 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4665 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4666 continue; \
4667 } \
4668 s1 = OP(s1, (TD)s2, &env->fp_status); \
4669 } \
4670 *((TD *)vd + HD(0)) = s1; \
f714361e 4671 env->vstart = 0; \
df4f52a7 4672 /* set tail elements to 1s */ \
4673 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4674}
4675
4676/* Unordered sum */
a3ab69f9
YL
4677GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4678GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4679GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4680
4681/* Ordered sum */
4682GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4683GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4684GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4685
4686/* Maximum value */
08b60eeb
FC
4687GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4688GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4689GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
523547f1
LZ
4690
4691/* Minimum value */
08b60eeb
FC
4692GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4693GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4694GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
696b0c26 4695
5bda21c0
YL
4696/* Vector Widening Floating-Point Add Instructions */
4697static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
696b0c26 4698{
5bda21c0 4699 return float32_add(a, float16_to_float32(b, true, s), s);
696b0c26
LZ
4700}
4701
5bda21c0 4702static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
696b0c26 4703{
5bda21c0 4704 return float64_add(a, float32_to_float64(b, s), s);
696b0c26 4705}
c21f34ae 4706
5bda21c0 4707/* Vector Widening Floating-Point Reduction Instructions */
a3ab69f9
YL
4708/* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4709GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4710GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4711GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4712GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
5bda21c0 4713
c21f34ae
LZ
4714/*
4715 *** Vector Mask Operations
4716 */
4717/* Vector Mask-Register Logical Instructions */
4718#define GEN_VEXT_MASK_VV(NAME, OP) \
4719void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4720 void *vs2, CPURISCVState *env, \
4721 uint32_t desc) \
4722{ \
c21f34ae 4723 uint32_t vl = env->vl; \
86247c51 4724 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
acc6ffd4 4725 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4726 uint32_t i; \
4727 int a, b; \
4728 \
f714361e 4729 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4730 a = vext_elem_mask(vs1, i); \
4731 b = vext_elem_mask(vs2, i); \
4732 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4733 } \
f714361e 4734 env->vstart = 0; \
acc6ffd4 4735 /* mask destination register are always tail- \
4736 * agnostic \
4737 */ \
4738 /* set tail elements to 1s */ \
4739 if (vta_all_1s) { \
4740 for (; i < total_elems; i++) { \
4741 vext_set_elem_mask(vd, i, 1); \
4742 } \
4743 } \
c21f34ae
LZ
4744}
4745
4746#define DO_NAND(N, M) (!(N & M))
4747#define DO_ANDNOT(N, M) (N & !M)
4748#define DO_NOR(N, M) (!(N | M))
4749#define DO_ORNOT(N, M) (N | !M)
4750#define DO_XNOR(N, M) (!(N ^ M))
4751
4752GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4753GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4754GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4755GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4756GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4757GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4758GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4759GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4760
0014aa74
FC
4761/* Vector count population in mask vcpop */
4762target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4763 uint32_t desc)
2e88f551
LZ
4764{
4765 target_ulong cnt = 0;
2e88f551
LZ
4766 uint32_t vm = vext_vm(desc);
4767 uint32_t vl = env->vl;
4768 int i;
4769
f714361e 4770 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4771 if (vm || vext_elem_mask(v0, i)) {
4772 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4773 cnt++;
4774 }
4775 }
4776 }
f714361e 4777 env->vstart = 0;
2e88f551
LZ
4778 return cnt;
4779}
0db67e1c 4780
d71a24fc
FC
4781/* vfirst find-first-set mask bit*/
4782target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4783 uint32_t desc)
0db67e1c 4784{
0db67e1c
LZ
4785 uint32_t vm = vext_vm(desc);
4786 uint32_t vl = env->vl;
4787 int i;
4788
f714361e 4789 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4790 if (vm || vext_elem_mask(v0, i)) {
4791 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4792 return i;
4793 }
4794 }
4795 }
f714361e 4796 env->vstart = 0;
0db67e1c
LZ
4797 return -1LL;
4798}
81fbf7da
LZ
4799
4800enum set_mask_type {
4801 ONLY_FIRST = 1,
4802 INCLUDE_FIRST,
4803 BEFORE_FIRST,
4804};
4805
4806static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4807 uint32_t desc, enum set_mask_type type)
4808{
81fbf7da
LZ
4809 uint32_t vm = vext_vm(desc);
4810 uint32_t vl = env->vl;
86247c51 4811 uint32_t total_elems = riscv_cpu_cfg(env)->vlen;
acc6ffd4 4812 uint32_t vta_all_1s = vext_vta_all_1s(desc);
35f2d795 4813 uint32_t vma = vext_vma(desc);
81fbf7da
LZ
4814 int i;
4815 bool first_mask_bit = false;
4816
f714361e 4817 for (i = env->vstart; i < vl; i++) {
f9298de5 4818 if (!vm && !vext_elem_mask(v0, i)) {
35f2d795
YTC
4819 /* set masked-off elements to 1s */
4820 if (vma) {
4821 vext_set_elem_mask(vd, i, 1);
4822 }
81fbf7da
LZ
4823 continue;
4824 }
4825 /* write a zero to all following active elements */
4826 if (first_mask_bit) {
f9298de5 4827 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4828 continue;
4829 }
f9298de5 4830 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4831 first_mask_bit = true;
4832 if (type == BEFORE_FIRST) {
f9298de5 4833 vext_set_elem_mask(vd, i, 0);
81fbf7da 4834 } else {
f9298de5 4835 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4836 }
4837 } else {
4838 if (type == ONLY_FIRST) {
f9298de5 4839 vext_set_elem_mask(vd, i, 0);
81fbf7da 4840 } else {
f9298de5 4841 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4842 }
4843 }
4844 }
f714361e 4845 env->vstart = 0;
acc6ffd4 4846 /* mask destination register are always tail-agnostic */
4847 /* set tail elements to 1s */
4848 if (vta_all_1s) {
4849 for (; i < total_elems; i++) {
4850 vext_set_elem_mask(vd, i, 1);
4851 }
4852 }
81fbf7da
LZ
4853}
4854
4855void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4856 uint32_t desc)
4857{
4858 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4859}
4860
4861void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4862 uint32_t desc)
4863{
4864 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4865}
4866
4867void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4868 uint32_t desc)
4869{
4870 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4871}
78d90cfe
LZ
4872
4873/* Vector Iota Instruction */
3479a814 4874#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4875void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4876 uint32_t desc) \
4877{ \
78d90cfe
LZ
4878 uint32_t vm = vext_vm(desc); \
4879 uint32_t vl = env->vl; \
acc6ffd4 4880 uint32_t esz = sizeof(ETYPE); \
4881 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4882 uint32_t vta = vext_vta(desc); \
35f2d795 4883 uint32_t vma = vext_vma(desc); \
78d90cfe
LZ
4884 uint32_t sum = 0; \
4885 int i; \
4886 \
f714361e 4887 for (i = env->vstart; i < vl; i++) { \
f9298de5 4888 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4889 /* set masked-off elements to 1s */ \
4890 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
78d90cfe
LZ
4891 continue; \
4892 } \
4893 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4894 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4895 sum++; \
4896 } \
4897 } \
f714361e 4898 env->vstart = 0; \
acc6ffd4 4899 /* set tail elements to 1s */ \
4900 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4901}
4902
3479a814
FC
4903GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4904GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4905GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4906GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4907
4908/* Vector Element Index Instruction */
3479a814 4909#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4910void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4911{ \
126bec3f
LZ
4912 uint32_t vm = vext_vm(desc); \
4913 uint32_t vl = env->vl; \
acc6ffd4 4914 uint32_t esz = sizeof(ETYPE); \
4915 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4916 uint32_t vta = vext_vta(desc); \
35f2d795 4917 uint32_t vma = vext_vma(desc); \
126bec3f
LZ
4918 int i; \
4919 \
f714361e 4920 for (i = env->vstart; i < vl; i++) { \
f9298de5 4921 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4922 /* set masked-off elements to 1s */ \
4923 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
126bec3f
LZ
4924 continue; \
4925 } \
4926 *((ETYPE *)vd + H(i)) = i; \
4927 } \
f714361e 4928 env->vstart = 0; \
acc6ffd4 4929 /* set tail elements to 1s */ \
4930 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4931}
4932
3479a814
FC
4933GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4934GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4935GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4936GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4937
4938/*
4939 *** Vector Permutation Instructions
4940 */
4941
4942/* Vector Slide Instructions */
3479a814 4943#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4944void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4945 CPURISCVState *env, uint32_t desc) \
4946{ \
ec17e036
LZ
4947 uint32_t vm = vext_vm(desc); \
4948 uint32_t vl = env->vl; \
803963f7 4949 uint32_t esz = sizeof(ETYPE); \
4950 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4951 uint32_t vta = vext_vta(desc); \
edabcd0e 4952 uint32_t vma = vext_vma(desc); \
f714361e 4953 target_ulong offset = s1, i_min, i; \
ec17e036 4954 \
f714361e
FC
4955 i_min = MAX(env->vstart, offset); \
4956 for (i = i_min; i < vl; i++) { \
f9298de5 4957 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4958 /* set masked-off elements to 1s */ \
4959 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
ec17e036
LZ
4960 continue; \
4961 } \
4962 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4963 } \
803963f7 4964 /* set tail elements to 1s */ \
4965 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4966}
4967
4968/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4969GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4970GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4971GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4972GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4973
3479a814 4974#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4975void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4976 CPURISCVState *env, uint32_t desc) \
4977{ \
6438ed61 4978 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4979 uint32_t vm = vext_vm(desc); \
4980 uint32_t vl = env->vl; \
803963f7 4981 uint32_t esz = sizeof(ETYPE); \
4982 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4983 uint32_t vta = vext_vta(desc); \
edabcd0e 4984 uint32_t vma = vext_vma(desc); \
6438ed61 4985 target_ulong i_max, i; \
ec17e036 4986 \
f714361e
FC
4987 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4988 for (i = env->vstart; i < i_max; ++i) { \
edabcd0e
YTC
4989 if (!vm && !vext_elem_mask(v0, i)) { \
4990 /* set masked-off elements to 1s */ \
4991 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4992 continue; \
6438ed61 4993 } \
edabcd0e 4994 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
6438ed61
FC
4995 } \
4996 \
4997 for (i = i_max; i < vl; ++i) { \
4998 if (vm || vext_elem_mask(v0, i)) { \
4999 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 5000 } \
ec17e036 5001 } \
f714361e
FC
5002 \
5003 env->vstart = 0; \
803963f7 5004 /* set tail elements to 1s */ \
5005 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
5006}
5007
5008/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
5009GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5010GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5011GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5012GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 5013
c7b8a421 5014#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
8c89d50c 5015static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c7b8a421 5016 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 5017{ \
c7b8a421 5018 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5019 uint32_t vm = vext_vm(desc); \
5020 uint32_t vl = env->vl; \
803963f7 5021 uint32_t esz = sizeof(ETYPE); \
5022 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5023 uint32_t vta = vext_vta(desc); \
edabcd0e 5024 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
5025 uint32_t i; \
5026 \
f714361e 5027 for (i = env->vstart; i < vl; i++) { \
8500d4ab 5028 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5029 /* set masked-off elements to 1s */ \
5030 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
5031 continue; \
5032 } \
5033 if (i == 0) { \
5034 *((ETYPE *)vd + H(i)) = s1; \
5035 } else { \
5036 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5037 } \
5038 } \
f714361e 5039 env->vstart = 0; \
803963f7 5040 /* set tail elements to 1s */ \
5041 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5042}
5043
5044GEN_VEXT_VSLIE1UP(8, H1)
5045GEN_VEXT_VSLIE1UP(16, H2)
5046GEN_VEXT_VSLIE1UP(32, H4)
5047GEN_VEXT_VSLIE1UP(64, H8)
5048
c7b8a421 5049#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
5050void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5051 CPURISCVState *env, uint32_t desc) \
5052{ \
c7b8a421 5053 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5054}
5055
5056/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
5057GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5058GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5059GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5060GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5061
c7b8a421 5062#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
8c89d50c 5063static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c7b8a421 5064 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 5065{ \
c7b8a421 5066 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5067 uint32_t vm = vext_vm(desc); \
5068 uint32_t vl = env->vl; \
803963f7 5069 uint32_t esz = sizeof(ETYPE); \
5070 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5071 uint32_t vta = vext_vta(desc); \
edabcd0e 5072 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
5073 uint32_t i; \
5074 \
f714361e 5075 for (i = env->vstart; i < vl; i++) { \
8500d4ab 5076 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5077 /* set masked-off elements to 1s */ \
5078 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
5079 continue; \
5080 } \
5081 if (i == vl - 1) { \
5082 *((ETYPE *)vd + H(i)) = s1; \
5083 } else { \
5084 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5085 } \
5086 } \
f714361e 5087 env->vstart = 0; \
803963f7 5088 /* set tail elements to 1s */ \
5089 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5090}
5091
5092GEN_VEXT_VSLIDE1DOWN(8, H1)
5093GEN_VEXT_VSLIDE1DOWN(16, H2)
5094GEN_VEXT_VSLIDE1DOWN(32, H4)
5095GEN_VEXT_VSLIDE1DOWN(64, H8)
5096
c7b8a421 5097#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
5098void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5099 CPURISCVState *env, uint32_t desc) \
5100{ \
c7b8a421 5101 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5102}
5103
5104/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
5105GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5106GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5107GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5108GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5109
5110/* Vector Floating-Point Slide Instructions */
c7b8a421 5111#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
5112void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5113 CPURISCVState *env, uint32_t desc) \
5114{ \
c7b8a421 5115 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5116}
5117
5118/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5119GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5120GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5121GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5122
c7b8a421 5123#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
5124void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5125 CPURISCVState *env, uint32_t desc) \
5126{ \
c7b8a421 5127 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5128}
5129
5130/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5131GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5132GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5133GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
5134
5135/* Vector Register Gather Instruction */
50bfb45b 5136#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
5137void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5138 CPURISCVState *env, uint32_t desc) \
5139{ \
f714361e 5140 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
5141 uint32_t vm = vext_vm(desc); \
5142 uint32_t vl = env->vl; \
803963f7 5143 uint32_t esz = sizeof(TS2); \
5144 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5145 uint32_t vta = vext_vta(desc); \
edabcd0e 5146 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5147 uint64_t index; \
5148 uint32_t i; \
e4b83d5c 5149 \
f714361e 5150 for (i = env->vstart; i < vl; i++) { \
f9298de5 5151 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5152 /* set masked-off elements to 1s */ \
5153 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5154 continue; \
5155 } \
50bfb45b 5156 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 5157 if (index >= vlmax) { \
50bfb45b 5158 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 5159 } else { \
50bfb45b 5160 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
5161 } \
5162 } \
f714361e 5163 env->vstart = 0; \
803963f7 5164 /* set tail elements to 1s */ \
5165 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5166}
5167
5168/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
5169GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5170GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5171GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5172GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5173
5174GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5175GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5176GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5177GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5178
3479a814 5179#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5180void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5181 CPURISCVState *env, uint32_t desc) \
5182{ \
5a9f8e15 5183 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5184 uint32_t vm = vext_vm(desc); \
5185 uint32_t vl = env->vl; \
803963f7 5186 uint32_t esz = sizeof(ETYPE); \
5187 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5188 uint32_t vta = vext_vta(desc); \
edabcd0e 5189 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5190 uint64_t index = s1; \
5191 uint32_t i; \
e4b83d5c 5192 \
f714361e 5193 for (i = env->vstart; i < vl; i++) { \
f9298de5 5194 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5195 /* set masked-off elements to 1s */ \
5196 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5197 continue; \
5198 } \
5199 if (index >= vlmax) { \
5200 *((ETYPE *)vd + H(i)) = 0; \
5201 } else { \
5202 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5203 } \
5204 } \
f714361e 5205 env->vstart = 0; \
803963f7 5206 /* set tail elements to 1s */ \
5207 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5208}
5209
5210/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5211GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5212GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5213GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5214GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5215
5216/* Vector Compress Instruction */
3479a814 5217#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5218void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5219 CPURISCVState *env, uint32_t desc) \
5220{ \
31bf42a2 5221 uint32_t vl = env->vl; \
803963f7 5222 uint32_t esz = sizeof(ETYPE); \
5223 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5224 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5225 uint32_t num = 0, i; \
5226 \
f714361e 5227 for (i = env->vstart; i < vl; i++) { \
f9298de5 5228 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5229 continue; \
5230 } \
5231 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5232 num++; \
5233 } \
f714361e 5234 env->vstart = 0; \
803963f7 5235 /* set tail elements to 1s */ \
5236 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5237}
5238
5239/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5240GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5241GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5242GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5243GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5244
f714361e 5245/* Vector Whole Register Move */
f32d82f6
WL
5246void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5247{
f06193c4 5248 /* EEW = SEW */
f32d82f6 5249 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5250 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5251 uint32_t startb = env->vstart * sewb;
5252 uint32_t i = startb;
f32d82f6
WL
5253
5254 memcpy((uint8_t *)vd + H1(i),
5255 (uint8_t *)vs2 + H1(i),
f06193c4 5256 maxsz - startb);
f714361e 5257
f32d82f6
WL
5258 env->vstart = 0;
5259}
f714361e 5260
cd01340e
FC
5261/* Vector Integer Extension */
5262#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5263void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5264 CPURISCVState *env, uint32_t desc) \
5265{ \
5266 uint32_t vl = env->vl; \
5267 uint32_t vm = vext_vm(desc); \
803963f7 5268 uint32_t esz = sizeof(ETYPE); \
5269 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5270 uint32_t vta = vext_vta(desc); \
edabcd0e 5271 uint32_t vma = vext_vma(desc); \
cd01340e
FC
5272 uint32_t i; \
5273 \
f714361e 5274 for (i = env->vstart; i < vl; i++) { \
cd01340e 5275 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5276 /* set masked-off elements to 1s */ \
5277 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
cd01340e
FC
5278 continue; \
5279 } \
5280 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5281 } \
f714361e 5282 env->vstart = 0; \
803963f7 5283 /* set tail elements to 1s */ \
5284 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5285}
5286
5287GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5288GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5289GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5290GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5291GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5292GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5293
5294GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5295GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5296GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5297GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5298GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5299GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)