]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
target/riscv: Fix initialized value for cur_pmmask
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc
LZ
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
ce2a0343 26#include "fpu/softfloat.h"
751538d5
LZ
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
2b7168fc
LZ
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 44
d9b7609a
FC
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
c45eff30 53 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
2b7168fc 54 /* only set vill bit. */
d96a271a
LZ
55 env->vill = 1;
56 env->vtype = 0;
2b7168fc
LZ
57 env->vl = 0;
58 env->vstart = 0;
59 return 0;
60 }
61
62 vlmax = vext_get_vlmax(cpu, s2);
63 if (s1 <= vlmax) {
64 vl = s1;
65 } else {
66 vl = vlmax;
67 }
68 env->vl = vl;
69 env->vtype = s2;
70 env->vstart = 0;
ac6bcf4d 71 env->vill = 0;
2b7168fc
LZ
72 return vl;
73}
751538d5
LZ
74
75/*
76 * Note that vector data is stored in host-endian 64-bit chunks,
77 * so addressing units smaller than that needs a host-endian fixup.
78 */
e03b5686 79#if HOST_BIG_ENDIAN
751538d5
LZ
80#define H1(x) ((x) ^ 7)
81#define H1_2(x) ((x) ^ 6)
82#define H1_4(x) ((x) ^ 4)
83#define H2(x) ((x) ^ 3)
84#define H4(x) ((x) ^ 1)
85#define H8(x) ((x))
86#else
87#define H1(x) (x)
88#define H1_2(x) (x)
89#define H1_4(x) (x)
90#define H2(x) (x)
91#define H4(x) (x)
92#define H8(x) (x)
93#endif
94
95static inline uint32_t vext_nf(uint32_t desc)
96{
97 return FIELD_EX32(simd_data(desc), VDATA, NF);
98}
99
751538d5
LZ
100static inline uint32_t vext_vm(uint32_t desc)
101{
102 return FIELD_EX32(simd_data(desc), VDATA, VM);
103}
104
33f1beaf
FC
105/*
106 * Encode LMUL to lmul as following:
107 * LMUL vlmul lmul
108 * 1 000 0
109 * 2 001 1
110 * 4 010 2
111 * 8 011 3
112 * - 100 -
113 * 1/8 101 -3
114 * 1/4 110 -2
115 * 1/2 111 -1
116 */
117static inline int32_t vext_lmul(uint32_t desc)
751538d5 118{
33f1beaf 119 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
120}
121
f1eed927 122static inline uint32_t vext_vta(uint32_t desc)
123{
124 return FIELD_EX32(simd_data(desc), VDATA, VTA);
125}
126
355d5584
YTC
127static inline uint32_t vext_vma(uint32_t desc)
128{
129 return FIELD_EX32(simd_data(desc), VDATA, VMA);
130}
131
5c19fc15 132static inline uint32_t vext_vta_all_1s(uint32_t desc)
133{
134 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
135}
136
751538d5 137/*
5a9f8e15 138 * Get the maximum number of elements can be operated.
751538d5 139 *
c7b8a421 140 * log2_esz: log2 of element size in bytes.
751538d5 141 */
c7b8a421 142static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 143{
5a9f8e15 144 /*
8a4b5257 145 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
146 * so vlen in bytes (vlenb) is encoded as maxsz.
147 */
148 uint32_t vlenb = simd_maxsz(desc);
149
150 /* Return VLMAX */
c7b8a421 151 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 152 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
153}
154
f1eed927 155/*
156 * Get number of total elements, including prestart, body and tail elements.
157 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
158 * are held in the same vector register.
159 */
160static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
161 uint32_t esz)
162{
163 uint32_t vlenb = simd_maxsz(desc);
164 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
165 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
166 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
167 return (vlenb << emul) / esz;
168}
169
d6b9d930
LZ
170static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
171{
7b945bdc 172 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
d6b9d930
LZ
173}
174
751538d5
LZ
175/*
176 * This function checks watchpoint before real load operation.
177 *
178 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
179 * In user mode, there is no watchpoint support now.
180 *
181 * It will trigger an exception if there is no mapping in TLB
182 * and page table walk can't fill the TLB entry. Then the guest
183 * software can return here after process the exception or never return.
184 */
185static void probe_pages(CPURISCVState *env, target_ulong addr,
186 target_ulong len, uintptr_t ra,
187 MMUAccessType access_type)
188{
189 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
190 target_ulong curlen = MIN(pagelen, len);
191
d6b9d930 192 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
193 cpu_mmu_index(env, false), ra);
194 if (len > curlen) {
195 addr += curlen;
196 curlen = len - curlen;
d6b9d930 197 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
198 cpu_mmu_index(env, false), ra);
199 }
200}
201
f1eed927 202/* set agnostic elements to 1s */
203static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
204 uint32_t tot)
205{
206 if (is_agnostic == 0) {
207 /* policy undisturbed */
208 return;
209 }
210 if (tot - cnt == 0) {
c1dadb84 211 return;
f1eed927 212 }
213 memset(base + cnt, -1, tot - cnt);
214}
215
f9298de5
FC
216static inline void vext_set_elem_mask(void *v0, int index,
217 uint8_t value)
3a6f8f68 218{
f9298de5
FC
219 int idx = index / 64;
220 int pos = index % 64;
3a6f8f68 221 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 222 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 223}
751538d5 224
f9298de5
FC
225/*
226 * Earlier designs (pre-0.9) had a varying number of bits
227 * per mask value (MLEN). In the 0.9 design, MLEN=1.
228 * (Section 4.5)
229 */
230static inline int vext_elem_mask(void *v0, int index)
751538d5 231{
f9298de5
FC
232 int idx = index / 64;
233 int pos = index % 64;
751538d5
LZ
234 return (((uint64_t *)v0)[idx] >> pos) & 1;
235}
236
237/* elements operations for load and store */
238typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
239 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 240
79556fb6 241#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
242static void NAME(CPURISCVState *env, abi_ptr addr, \
243 uint32_t idx, void *vd, uintptr_t retaddr)\
244{ \
751538d5 245 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 246 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
247} \
248
79556fb6
FC
249GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
250GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
251GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
252GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
253
254#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
255static void NAME(CPURISCVState *env, abi_ptr addr, \
256 uint32_t idx, void *vd, uintptr_t retaddr)\
257{ \
258 ETYPE data = *((ETYPE *)vd + H(idx)); \
259 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
260}
261
751538d5
LZ
262GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
263GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
264GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
265GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
266
e130683f
DHB
267static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl,
268 void *vd, uint32_t desc, uint32_t nf,
269 uint32_t esz, uint32_t max_elems)
270{
bc0ec52e 271 uint32_t total_elems, vlenb, registers_used;
e130683f 272 uint32_t vta = vext_vta(desc);
e130683f
DHB
273 int k;
274
bc0ec52e
DHB
275 if (vta == 0) {
276 return;
277 }
278
279 total_elems = vext_get_total_elems(env, desc, esz);
280 vlenb = riscv_cpu_cfg(env)->vlen >> 3;
281
e130683f
DHB
282 for (k = 0; k < nf; ++k) {
283 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
284 (k * max_elems + max_elems) * esz);
285 }
286
287 if (nf * max_elems % total_elems != 0) {
288 registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
289 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
290 registers_used * vlenb);
291 }
292}
293
751538d5 294/*
3b57254d 295 * stride: access vector element from strided memory
751538d5
LZ
296 */
297static void
298vext_ldst_stride(void *vd, void *v0, target_ulong base,
299 target_ulong stride, CPURISCVState *env,
300 uint32_t desc, uint32_t vm,
3479a814 301 vext_ldst_elem_fn *ldst_elem,
c7b8a421 302 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
303{
304 uint32_t i, k;
305 uint32_t nf = vext_nf(desc);
c7b8a421 306 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 307 uint32_t esz = 1 << log2_esz;
265ecd4c 308 uint32_t vma = vext_vma(desc);
751538d5 309
f714361e 310 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
751538d5 311 k = 0;
751538d5 312 while (k < nf) {
265ecd4c
YTC
313 if (!vm && !vext_elem_mask(v0, i)) {
314 /* set masked-off elements to 1s */
315 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
316 (i + k * max_elems + 1) * esz);
317 k++;
318 continue;
319 }
c7b8a421 320 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 321 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
322 k++;
323 }
324 }
f714361e 325 env->vstart = 0;
e130683f
DHB
326
327 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
751538d5
LZ
328}
329
79556fb6 330#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
331void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
332 target_ulong stride, CPURISCVState *env, \
333 uint32_t desc) \
334{ \
335 uint32_t vm = vext_vm(desc); \
336 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 337 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
338}
339
79556fb6
FC
340GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
341GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
342GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
343GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
344
345#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
346void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
347 target_ulong stride, CPURISCVState *env, \
348 uint32_t desc) \
349{ \
350 uint32_t vm = vext_vm(desc); \
351 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 352 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
353}
354
79556fb6
FC
355GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
356GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
357GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
358GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
359
360/*
3b57254d 361 * unit-stride: access elements stored contiguously in memory
751538d5
LZ
362 */
363
3b57254d 364/* unmasked unit-stride load and store operation */
751538d5
LZ
365static void
366vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 367 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 368 uintptr_t ra)
751538d5
LZ
369{
370 uint32_t i, k;
371 uint32_t nf = vext_nf(desc);
c7b8a421 372 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 373 uint32_t esz = 1 << log2_esz;
751538d5 374
751538d5 375 /* load bytes from guest memory */
5c89e9c0 376 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
377 k = 0;
378 while (k < nf) {
c7b8a421 379 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 380 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
381 k++;
382 }
383 }
f714361e 384 env->vstart = 0;
e130683f
DHB
385
386 vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems);
751538d5
LZ
387}
388
389/*
246f8796
WL
390 * masked unit-stride load and store operation will be a special case of
391 * stride, stride = NF * sizeof (MTYPE)
751538d5
LZ
392 */
393
79556fb6 394#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
395void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
396 CPURISCVState *env, uint32_t desc) \
397{ \
5a9f8e15 398 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 399 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 400 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
401} \
402 \
403void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
404 CPURISCVState *env, uint32_t desc) \
405{ \
3479a814 406 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 407 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
408}
409
79556fb6
FC
410GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
411GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
412GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
413GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
414
5c89e9c0
FC
415#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
416void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
417 CPURISCVState *env, uint32_t desc) \
418{ \
419 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
420 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 421 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
422} \
423 \
424void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
425 CPURISCVState *env, uint32_t desc) \
426{ \
427 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 428 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
429}
430
79556fb6
FC
431GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
432GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
433GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
434GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 435
26086aea 436/*
3b57254d 437 * unit stride mask load and store, EEW = 1
26086aea
FC
438 */
439void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
440 CPURISCVState *env, uint32_t desc)
441{
442 /* evl = ceil(vl/8) */
443 uint8_t evl = (env->vl + 7) >> 3;
444 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 445 0, evl, GETPC());
26086aea
FC
446}
447
448void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
449 CPURISCVState *env, uint32_t desc)
450{
451 /* evl = ceil(vl/8) */
452 uint8_t evl = (env->vl + 7) >> 3;
453 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 454 0, evl, GETPC());
26086aea
FC
455}
456
f732560e 457/*
3b57254d 458 * index: access vector element from indexed memory
f732560e
LZ
459 */
460typedef target_ulong vext_get_index_addr(target_ulong base,
461 uint32_t idx, void *vs2);
462
463#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
464static target_ulong NAME(target_ulong base, \
465 uint32_t idx, void *vs2) \
466{ \
467 return (base + *((ETYPE *)vs2 + H(idx))); \
468}
469
83fcd573
FC
470GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
471GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
472GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
473GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
474
475static inline void
476vext_ldst_index(void *vd, void *v0, target_ulong base,
477 void *vs2, CPURISCVState *env, uint32_t desc,
478 vext_get_index_addr get_index_addr,
479 vext_ldst_elem_fn *ldst_elem,
c7b8a421 480 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
481{
482 uint32_t i, k;
483 uint32_t nf = vext_nf(desc);
484 uint32_t vm = vext_vm(desc);
c7b8a421 485 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 486 uint32_t esz = 1 << log2_esz;
265ecd4c 487 uint32_t vma = vext_vma(desc);
f732560e 488
f732560e 489 /* load bytes from guest memory */
f714361e 490 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f714361e 491 k = 0;
f732560e 492 while (k < nf) {
265ecd4c
YTC
493 if (!vm && !vext_elem_mask(v0, i)) {
494 /* set masked-off elements to 1s */
495 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
496 (i + k * max_elems + 1) * esz);
497 k++;
498 continue;
499 }
c7b8a421 500 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 501 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
502 k++;
503 }
504 }
f714361e 505 env->vstart = 0;
e130683f
DHB
506
507 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
f732560e
LZ
508}
509
08b9d0ed 510#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
511void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
512 void *vs2, CPURISCVState *env, uint32_t desc) \
513{ \
514 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 515 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
516}
517
08b9d0ed
FC
518GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
519GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
520GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
521GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
522GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
523GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
524GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
525GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
526GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
527GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
528GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
529GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
530GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
531GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
532GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
533GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
534
535#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
536void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
537 void *vs2, CPURISCVState *env, uint32_t desc) \
538{ \
539 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 540 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 541 GETPC()); \
f732560e
LZ
542}
543
08b9d0ed
FC
544GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
545GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
546GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
547GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
548GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
549GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
550GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
551GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
552GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
553GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
554GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
555GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
556GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
557GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
558GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
559GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
560
561/*
3b57254d 562 * unit-stride fault-only-fisrt load instructions
022b4ecf
LZ
563 */
564static inline void
565vext_ldff(void *vd, void *v0, target_ulong base,
566 CPURISCVState *env, uint32_t desc,
567 vext_ldst_elem_fn *ldst_elem,
c7b8a421 568 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
569{
570 void *host;
571 uint32_t i, k, vl = 0;
022b4ecf
LZ
572 uint32_t nf = vext_nf(desc);
573 uint32_t vm = vext_vm(desc);
c7b8a421 574 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 575 uint32_t esz = 1 << log2_esz;
265ecd4c 576 uint32_t vma = vext_vma(desc);
022b4ecf
LZ
577 target_ulong addr, offset, remain;
578
3b57254d 579 /* probe every access */
f714361e 580 for (i = env->vstart; i < env->vl; i++) {
f9298de5 581 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
582 continue;
583 }
c7b8a421 584 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 585 if (i == 0) {
c7b8a421 586 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
587 } else {
588 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 589 remain = nf << log2_esz;
022b4ecf
LZ
590 while (remain > 0) {
591 offset = -(addr | TARGET_PAGE_MASK);
592 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
593 cpu_mmu_index(env, false));
594 if (host) {
595#ifdef CONFIG_USER_ONLY
01d09525 596 if (page_check_range(addr, offset, PAGE_READ) < 0) {
022b4ecf
LZ
597 vl = i;
598 goto ProbeSuccess;
599 }
600#else
01d09525 601 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
602#endif
603 } else {
604 vl = i;
605 goto ProbeSuccess;
606 }
607 if (remain <= offset) {
608 break;
609 }
610 remain -= offset;
d6b9d930 611 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
612 }
613 }
614 }
615ProbeSuccess:
616 /* load bytes from guest memory */
617 if (vl != 0) {
618 env->vl = vl;
619 }
f714361e 620 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 621 k = 0;
022b4ecf 622 while (k < nf) {
265ecd4c
YTC
623 if (!vm && !vext_elem_mask(v0, i)) {
624 /* set masked-off elements to 1s */
625 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
626 (i + k * max_elems + 1) * esz);
627 k++;
628 continue;
629 }
c7b8a421 630 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 631 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
632 k++;
633 }
634 }
f714361e 635 env->vstart = 0;
e130683f
DHB
636
637 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
022b4ecf
LZ
638}
639
d3e5e2ff
FC
640#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
641void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
642 CPURISCVState *env, uint32_t desc) \
643{ \
644 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 645 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
646}
647
d3e5e2ff
FC
648GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
649GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
650GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
651GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 652
268fcca6
LZ
653#define DO_SWAP(N, M) (M)
654#define DO_AND(N, M) (N & M)
655#define DO_XOR(N, M) (N ^ M)
656#define DO_OR(N, M) (N | M)
657#define DO_ADD(N, M) (N + M)
658
268fcca6
LZ
659/* Signed min/max */
660#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
661#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
662
663/* Unsigned min/max */
664#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
665#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
666
30206bd8 667/*
3b57254d 668 * load and store whole register instructions
30206bd8
FC
669 */
670static void
671vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 672 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 673{
f714361e 674 uint32_t i, k, off, pos;
30206bd8 675 uint32_t nf = vext_nf(desc);
86247c51 676 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
c7b8a421 677 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 678
f714361e
FC
679 k = env->vstart / max_elems;
680 off = env->vstart % max_elems;
30206bd8 681
f714361e
FC
682 if (off) {
683 /* load/store rest of elements of current segment pointed by vstart */
684 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 685 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
246f8796
WL
686 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd,
687 ra);
f714361e
FC
688 }
689 k++;
690 }
691
692 /* load/store elements for rest of segments */
693 for (; k < nf; k++) {
694 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 695 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 696 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
697 }
698 }
f714361e
FC
699
700 env->vstart = 0;
30206bd8
FC
701}
702
703#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
704void HELPER(NAME)(void *vd, target_ulong base, \
705 CPURISCVState *env, uint32_t desc) \
706{ \
707 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 708 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
709}
710
711GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
712GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
713GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
714GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
715GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
716GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
717GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
718GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
719GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
720GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
721GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
722GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
723GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
724GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
725GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
726GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
727
728#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
729void HELPER(NAME)(void *vd, target_ulong base, \
730 CPURISCVState *env, uint32_t desc) \
731{ \
732 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 733 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
734}
735
736GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
737GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
738GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
739GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
740
43740e3a 741/*
3b57254d 742 * Vector Integer Arithmetic Instructions
43740e3a
LZ
743 */
744
745/* expand macro args before macro */
746#define RVVCALL(macro, ...) macro(__VA_ARGS__)
747
748/* (TD, T1, T2, TX1, TX2) */
749#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
750#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
751#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
752#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
753#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
754#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
755#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
756#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
757#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
758#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
759#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
760#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
761#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
762#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
763#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
764#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
765#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
766#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
767#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
768#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
769#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
770#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
771#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
772#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
773#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
774#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
775#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
776#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
777#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
778#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
779
780/* operation of two vector elements */
781typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
782
783#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
784static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
785{ \
786 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
787 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
788 *((TD *)vd + HD(i)) = OP(s2, s1); \
789}
790#define DO_SUB(N, M) (N - M)
791#define DO_RSUB(N, M) (M - N)
792
793RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
794RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
795RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
796RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
797RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
798RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
799RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
800RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
801
802static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
803 CPURISCVState *env, uint32_t desc,
f1eed927 804 opivv2_fn *fn, uint32_t esz)
43740e3a 805{
43740e3a
LZ
806 uint32_t vm = vext_vm(desc);
807 uint32_t vl = env->vl;
f1eed927 808 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
809 uint32_t vta = vext_vta(desc);
355d5584 810 uint32_t vma = vext_vma(desc);
43740e3a
LZ
811 uint32_t i;
812
f714361e 813 for (i = env->vstart; i < vl; i++) {
f9298de5 814 if (!vm && !vext_elem_mask(v0, i)) {
355d5584
YTC
815 /* set masked-off elements to 1s */
816 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
817 continue;
818 }
819 fn(vd, vs1, vs2, i);
820 }
f714361e 821 env->vstart = 0;
f1eed927 822 /* set tail elements to 1s */
823 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
824}
825
826/* generate the helpers for OPIVV */
f1eed927 827#define GEN_VEXT_VV(NAME, ESZ) \
43740e3a
LZ
828void HELPER(NAME)(void *vd, void *v0, void *vs1, \
829 void *vs2, CPURISCVState *env, \
830 uint32_t desc) \
831{ \
8a085fb2 832 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
f1eed927 833 do_##NAME, ESZ); \
43740e3a
LZ
834}
835
f1eed927 836GEN_VEXT_VV(vadd_vv_b, 1)
837GEN_VEXT_VV(vadd_vv_h, 2)
838GEN_VEXT_VV(vadd_vv_w, 4)
839GEN_VEXT_VV(vadd_vv_d, 8)
840GEN_VEXT_VV(vsub_vv_b, 1)
841GEN_VEXT_VV(vsub_vv_h, 2)
842GEN_VEXT_VV(vsub_vv_w, 4)
843GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a
LZ
844
845typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
846
847/*
848 * (T1)s1 gives the real operator type.
849 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
850 */
851#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
852static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
853{ \
854 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
855 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
856}
857
858RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
859RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
860RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
861RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
862RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
863RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
864RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
865RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
866RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
867RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
868RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
869RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
870
871static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
872 CPURISCVState *env, uint32_t desc,
5c19fc15 873 opivx2_fn fn, uint32_t esz)
43740e3a 874{
43740e3a
LZ
875 uint32_t vm = vext_vm(desc);
876 uint32_t vl = env->vl;
5c19fc15 877 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
878 uint32_t vta = vext_vta(desc);
bce9a636 879 uint32_t vma = vext_vma(desc);
43740e3a
LZ
880 uint32_t i;
881
f714361e 882 for (i = env->vstart; i < vl; i++) {
f9298de5 883 if (!vm && !vext_elem_mask(v0, i)) {
bce9a636
YTC
884 /* set masked-off elements to 1s */
885 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
43740e3a
LZ
886 continue;
887 }
888 fn(vd, s1, vs2, i);
889 }
f714361e 890 env->vstart = 0;
5c19fc15 891 /* set tail elements to 1s */
892 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
893}
894
895/* generate the helpers for OPIVX */
5c19fc15 896#define GEN_VEXT_VX(NAME, ESZ) \
43740e3a
LZ
897void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
898 void *vs2, CPURISCVState *env, \
899 uint32_t desc) \
900{ \
8a085fb2 901 do_vext_vx(vd, v0, s1, vs2, env, desc, \
5c19fc15 902 do_##NAME, ESZ); \
3479a814
FC
903}
904
5c19fc15 905GEN_VEXT_VX(vadd_vx_b, 1)
906GEN_VEXT_VX(vadd_vx_h, 2)
907GEN_VEXT_VX(vadd_vx_w, 4)
908GEN_VEXT_VX(vadd_vx_d, 8)
909GEN_VEXT_VX(vsub_vx_b, 1)
910GEN_VEXT_VX(vsub_vx_h, 2)
911GEN_VEXT_VX(vsub_vx_w, 4)
912GEN_VEXT_VX(vsub_vx_d, 8)
913GEN_VEXT_VX(vrsub_vx_b, 1)
914GEN_VEXT_VX(vrsub_vx_h, 2)
915GEN_VEXT_VX(vrsub_vx_w, 4)
916GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
917
918void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
919{
920 intptr_t oprsz = simd_oprsz(desc);
921 intptr_t i;
922
923 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
924 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
925 }
926}
927
928void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
929{
930 intptr_t oprsz = simd_oprsz(desc);
931 intptr_t i;
932
933 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
934 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
935 }
936}
937
938void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
939{
940 intptr_t oprsz = simd_oprsz(desc);
941 intptr_t i;
942
943 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
944 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
945 }
946}
947
948void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
949{
950 intptr_t oprsz = simd_oprsz(desc);
951 intptr_t i;
952
953 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
954 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
955 }
956}
8fcdf776
LZ
957
958/* Vector Widening Integer Add/Subtract */
959#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
960#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
961#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
962#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
963#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
964#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
965#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
966#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
967#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
968#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
969#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
970#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
971RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
972RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
973RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
974RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
975RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
976RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
977RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
978RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
979RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
980RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
981RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
982RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
983RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
984RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
985RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
986RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
987RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
988RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
989RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
990RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
991RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
992RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
993RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
994RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 995GEN_VEXT_VV(vwaddu_vv_b, 2)
996GEN_VEXT_VV(vwaddu_vv_h, 4)
997GEN_VEXT_VV(vwaddu_vv_w, 8)
998GEN_VEXT_VV(vwsubu_vv_b, 2)
999GEN_VEXT_VV(vwsubu_vv_h, 4)
1000GEN_VEXT_VV(vwsubu_vv_w, 8)
1001GEN_VEXT_VV(vwadd_vv_b, 2)
1002GEN_VEXT_VV(vwadd_vv_h, 4)
1003GEN_VEXT_VV(vwadd_vv_w, 8)
1004GEN_VEXT_VV(vwsub_vv_b, 2)
1005GEN_VEXT_VV(vwsub_vv_h, 4)
1006GEN_VEXT_VV(vwsub_vv_w, 8)
1007GEN_VEXT_VV(vwaddu_wv_b, 2)
1008GEN_VEXT_VV(vwaddu_wv_h, 4)
1009GEN_VEXT_VV(vwaddu_wv_w, 8)
1010GEN_VEXT_VV(vwsubu_wv_b, 2)
1011GEN_VEXT_VV(vwsubu_wv_h, 4)
1012GEN_VEXT_VV(vwsubu_wv_w, 8)
1013GEN_VEXT_VV(vwadd_wv_b, 2)
1014GEN_VEXT_VV(vwadd_wv_h, 4)
1015GEN_VEXT_VV(vwadd_wv_w, 8)
1016GEN_VEXT_VV(vwsub_wv_b, 2)
1017GEN_VEXT_VV(vwsub_wv_h, 4)
1018GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
1019
1020RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1021RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1022RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1023RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1024RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1025RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1026RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1027RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1028RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1029RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1030RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1031RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1032RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1033RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1034RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1035RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1036RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1037RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1038RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1039RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1040RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1041RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1042RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1043RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 1044GEN_VEXT_VX(vwaddu_vx_b, 2)
1045GEN_VEXT_VX(vwaddu_vx_h, 4)
1046GEN_VEXT_VX(vwaddu_vx_w, 8)
1047GEN_VEXT_VX(vwsubu_vx_b, 2)
1048GEN_VEXT_VX(vwsubu_vx_h, 4)
1049GEN_VEXT_VX(vwsubu_vx_w, 8)
1050GEN_VEXT_VX(vwadd_vx_b, 2)
1051GEN_VEXT_VX(vwadd_vx_h, 4)
1052GEN_VEXT_VX(vwadd_vx_w, 8)
1053GEN_VEXT_VX(vwsub_vx_b, 2)
1054GEN_VEXT_VX(vwsub_vx_h, 4)
1055GEN_VEXT_VX(vwsub_vx_w, 8)
1056GEN_VEXT_VX(vwaddu_wx_b, 2)
1057GEN_VEXT_VX(vwaddu_wx_h, 4)
1058GEN_VEXT_VX(vwaddu_wx_w, 8)
1059GEN_VEXT_VX(vwsubu_wx_b, 2)
1060GEN_VEXT_VX(vwsubu_wx_h, 4)
1061GEN_VEXT_VX(vwsubu_wx_w, 8)
1062GEN_VEXT_VX(vwadd_wx_b, 2)
1063GEN_VEXT_VX(vwadd_wx_h, 4)
1064GEN_VEXT_VX(vwadd_wx_w, 8)
1065GEN_VEXT_VX(vwsub_wx_b, 2)
1066GEN_VEXT_VX(vwsub_wx_h, 4)
1067GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
1068
1069/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1070#define DO_VADC(N, M, C) (N + M + C)
1071#define DO_VSBC(N, M, C) (N - M - C)
1072
3479a814 1073#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1074void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1075 CPURISCVState *env, uint32_t desc) \
1076{ \
3a6f8f68 1077 uint32_t vl = env->vl; \
5c19fc15 1078 uint32_t esz = sizeof(ETYPE); \
1079 uint32_t total_elems = \
1080 vext_get_total_elems(env, desc, esz); \
1081 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1082 uint32_t i; \
1083 \
f714361e 1084 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1085 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1086 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1087 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1088 \
1089 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1090 } \
f714361e 1091 env->vstart = 0; \
5c19fc15 1092 /* set tail elements to 1s */ \
1093 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1094}
1095
3479a814
FC
1096GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1097GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1098GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1099GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1100
3479a814
FC
1101GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1102GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1103GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1104GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 1105
3479a814 1106#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1107void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1108 CPURISCVState *env, uint32_t desc) \
1109{ \
3a6f8f68 1110 uint32_t vl = env->vl; \
5c19fc15 1111 uint32_t esz = sizeof(ETYPE); \
1112 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1113 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1114 uint32_t i; \
1115 \
f714361e 1116 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1117 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1118 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1119 \
1120 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1121 } \
c45eff30 1122 env->vstart = 0; \
5c19fc15 1123 /* set tail elements to 1s */ \
1124 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1125}
1126
3479a814
FC
1127GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1128GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1129GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1130GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1131
3479a814
FC
1132GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1133GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1134GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1135GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
1136
1137#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1138 (__typeof(N))(N + M) < N)
1139#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1140
1141#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1142void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1143 CPURISCVState *env, uint32_t desc) \
1144{ \
3a6f8f68 1145 uint32_t vl = env->vl; \
bb45485a 1146 uint32_t vm = vext_vm(desc); \
86247c51 1147 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5c19fc15 1148 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1149 uint32_t i; \
1150 \
f714361e 1151 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1152 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1153 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1154 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1155 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1156 } \
f714361e 1157 env->vstart = 0; \
3b57254d
WL
1158 /*
1159 * mask destination register are always tail-agnostic
1160 * set tail elements to 1s
1161 */ \
5c19fc15 1162 if (vta_all_1s) { \
1163 for (; i < total_elems; i++) { \
1164 vext_set_elem_mask(vd, i, 1); \
1165 } \
1166 } \
3a6f8f68
LZ
1167}
1168
1169GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1170GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1171GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1172GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1173
1174GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1175GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1176GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1177GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1178
1179#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1180void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1181 void *vs2, CPURISCVState *env, uint32_t desc) \
1182{ \
3a6f8f68 1183 uint32_t vl = env->vl; \
bb45485a 1184 uint32_t vm = vext_vm(desc); \
86247c51 1185 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5c19fc15 1186 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1187 uint32_t i; \
1188 \
f714361e 1189 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1190 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1191 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1192 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1193 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1194 } \
f714361e 1195 env->vstart = 0; \
3b57254d
WL
1196 /*
1197 * mask destination register are always tail-agnostic
1198 * set tail elements to 1s
1199 */ \
5c19fc15 1200 if (vta_all_1s) { \
1201 for (; i < total_elems; i++) { \
1202 vext_set_elem_mask(vd, i, 1); \
1203 } \
1204 } \
3a6f8f68
LZ
1205}
1206
1207GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1208GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1209GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1210GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1211
1212GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1213GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1214GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1215GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1216
1217/* Vector Bitwise Logical Instructions */
1218RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1219RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1220RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1221RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1222RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1223RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1224RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1225RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1226RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1227RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1228RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1229RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1230GEN_VEXT_VV(vand_vv_b, 1)
1231GEN_VEXT_VV(vand_vv_h, 2)
1232GEN_VEXT_VV(vand_vv_w, 4)
1233GEN_VEXT_VV(vand_vv_d, 8)
1234GEN_VEXT_VV(vor_vv_b, 1)
1235GEN_VEXT_VV(vor_vv_h, 2)
1236GEN_VEXT_VV(vor_vv_w, 4)
1237GEN_VEXT_VV(vor_vv_d, 8)
1238GEN_VEXT_VV(vxor_vv_b, 1)
1239GEN_VEXT_VV(vxor_vv_h, 2)
1240GEN_VEXT_VV(vxor_vv_w, 4)
1241GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1242
1243RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1244RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1245RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1246RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1247RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1248RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1249RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1250RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1251RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1252RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1253RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1254RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1255GEN_VEXT_VX(vand_vx_b, 1)
1256GEN_VEXT_VX(vand_vx_h, 2)
1257GEN_VEXT_VX(vand_vx_w, 4)
1258GEN_VEXT_VX(vand_vx_d, 8)
1259GEN_VEXT_VX(vor_vx_b, 1)
1260GEN_VEXT_VX(vor_vx_h, 2)
1261GEN_VEXT_VX(vor_vx_w, 4)
1262GEN_VEXT_VX(vor_vx_d, 8)
1263GEN_VEXT_VX(vxor_vx_b, 1)
1264GEN_VEXT_VX(vxor_vx_h, 2)
1265GEN_VEXT_VX(vxor_vx_w, 4)
1266GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1267
1268/* Vector Single-Width Bit Shift Instructions */
1269#define DO_SLL(N, M) (N << (M))
1270#define DO_SRL(N, M) (N >> (M))
1271
1272/* generate the helpers for shift instructions with two vector operators */
3479a814 1273#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1274void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1275 void *vs2, CPURISCVState *env, uint32_t desc) \
1276{ \
3277d955
LZ
1277 uint32_t vm = vext_vm(desc); \
1278 uint32_t vl = env->vl; \
7b1bff41 1279 uint32_t esz = sizeof(TS1); \
1280 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1281 uint32_t vta = vext_vta(desc); \
fd93045e 1282 uint32_t vma = vext_vma(desc); \
3277d955
LZ
1283 uint32_t i; \
1284 \
f714361e 1285 for (i = env->vstart; i < vl; i++) { \
f9298de5 1286 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1287 /* set masked-off elements to 1s */ \
1288 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
3277d955
LZ
1289 continue; \
1290 } \
1291 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1292 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1293 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1294 } \
f714361e 1295 env->vstart = 0; \
7b1bff41 1296 /* set tail elements to 1s */ \
1297 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1298}
1299
3479a814
FC
1300GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1301GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1302GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1303GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1304
3479a814
FC
1305GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1306GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1307GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1308GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1309
3479a814
FC
1310GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1311GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1312GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1313GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1314
246f8796
WL
1315/*
1316 * generate the helpers for shift instructions with one vector and one scalar
1317 */
3479a814
FC
1318#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1319void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
1320 void *vs2, CPURISCVState *env, \
1321 uint32_t desc) \
3479a814
FC
1322{ \
1323 uint32_t vm = vext_vm(desc); \
1324 uint32_t vl = env->vl; \
7b1bff41 1325 uint32_t esz = sizeof(TD); \
1326 uint32_t total_elems = \
1327 vext_get_total_elems(env, desc, esz); \
1328 uint32_t vta = vext_vta(desc); \
fd93045e 1329 uint32_t vma = vext_vma(desc); \
3479a814
FC
1330 uint32_t i; \
1331 \
f714361e 1332 for (i = env->vstart; i < vl; i++) { \
3479a814 1333 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1334 /* set masked-off elements to 1s */ \
1335 vext_set_elems_1s(vd, vma, i * esz, \
1336 (i + 1) * esz); \
3479a814
FC
1337 continue; \
1338 } \
1339 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1340 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1341 } \
f714361e 1342 env->vstart = 0; \
7b1bff41 1343 /* set tail elements to 1s */ \
1344 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1345}
1346
1347GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1348GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1349GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1350GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1351
1352GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1353GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1354GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1355GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1356
1357GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1358GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1359GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1360GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1361
1362/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1363GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1364GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1365GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1366GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1367GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1368GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1369GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1370GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1371GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1372GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1373GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1374GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1375
1376/* Vector Integer Comparison Instructions */
1377#define DO_MSEQ(N, M) (N == M)
1378#define DO_MSNE(N, M) (N != M)
1379#define DO_MSLT(N, M) (N < M)
1380#define DO_MSLE(N, M) (N <= M)
1381#define DO_MSGT(N, M) (N > M)
1382
1383#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1384void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1385 CPURISCVState *env, uint32_t desc) \
1386{ \
1366fc79
LZ
1387 uint32_t vm = vext_vm(desc); \
1388 uint32_t vl = env->vl; \
86247c51 1389 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
38581e5c 1390 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1391 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1392 uint32_t i; \
1393 \
f714361e 1394 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1395 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1396 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1397 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1398 /* set masked-off elements to 1s */ \
1399 if (vma) { \
1400 vext_set_elem_mask(vd, i, 1); \
1401 } \
1366fc79
LZ
1402 continue; \
1403 } \
f9298de5 1404 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1405 } \
f714361e 1406 env->vstart = 0; \
3b57254d
WL
1407 /*
1408 * mask destination register are always tail-agnostic
1409 * set tail elements to 1s
1410 */ \
38581e5c 1411 if (vta_all_1s) { \
1412 for (; i < total_elems; i++) { \
1413 vext_set_elem_mask(vd, i, 1); \
1414 } \
1415 } \
1366fc79
LZ
1416}
1417
1418GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1419GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1420GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1421GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1422
1423GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1424GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1425GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1426GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1427
1428GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1429GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1430GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1431GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1432
1433GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1434GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1435GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1436GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1437
1438GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1439GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1440GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1441GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1442
1443GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1444GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1445GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1446GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1447
1448#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1449void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1450 CPURISCVState *env, uint32_t desc) \
1451{ \
1366fc79
LZ
1452 uint32_t vm = vext_vm(desc); \
1453 uint32_t vl = env->vl; \
86247c51 1454 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
38581e5c 1455 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1456 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1457 uint32_t i; \
1458 \
f714361e 1459 for (i = env->vstart; i < vl; i++) { \
1366fc79 1460 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1461 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1462 /* set masked-off elements to 1s */ \
1463 if (vma) { \
1464 vext_set_elem_mask(vd, i, 1); \
1465 } \
1366fc79
LZ
1466 continue; \
1467 } \
f9298de5 1468 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1469 DO_OP(s2, (ETYPE)(target_long)s1)); \
1470 } \
f714361e 1471 env->vstart = 0; \
3b57254d
WL
1472 /*
1473 * mask destination register are always tail-agnostic
1474 * set tail elements to 1s
1475 */ \
38581e5c 1476 if (vta_all_1s) { \
1477 for (; i < total_elems; i++) { \
1478 vext_set_elem_mask(vd, i, 1); \
1479 } \
1480 } \
1366fc79
LZ
1481}
1482
1483GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1484GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1485GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1486GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1487
1488GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1489GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1490GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1491GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1492
1493GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1494GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1495GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1496GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1497
1498GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1499GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1500GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1501GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1502
1503GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1504GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1505GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1506GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1507
1508GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1509GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1510GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1511GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1512
1513GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1514GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1515GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1516GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1517
1518GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1519GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1520GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1521GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1522
1523/* Vector Integer Min/Max Instructions */
1524RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1525RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1526RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1527RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1528RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1529RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1530RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1531RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1532RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1533RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1534RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1535RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1536RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1537RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1538RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1539RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1540GEN_VEXT_VV(vminu_vv_b, 1)
1541GEN_VEXT_VV(vminu_vv_h, 2)
1542GEN_VEXT_VV(vminu_vv_w, 4)
1543GEN_VEXT_VV(vminu_vv_d, 8)
1544GEN_VEXT_VV(vmin_vv_b, 1)
1545GEN_VEXT_VV(vmin_vv_h, 2)
1546GEN_VEXT_VV(vmin_vv_w, 4)
1547GEN_VEXT_VV(vmin_vv_d, 8)
1548GEN_VEXT_VV(vmaxu_vv_b, 1)
1549GEN_VEXT_VV(vmaxu_vv_h, 2)
1550GEN_VEXT_VV(vmaxu_vv_w, 4)
1551GEN_VEXT_VV(vmaxu_vv_d, 8)
1552GEN_VEXT_VV(vmax_vv_b, 1)
1553GEN_VEXT_VV(vmax_vv_h, 2)
1554GEN_VEXT_VV(vmax_vv_w, 4)
1555GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1556
1557RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1558RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1559RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1560RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1561RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1562RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1563RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1564RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1565RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1566RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1567RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1568RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1569RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1570RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1571RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1572RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1573GEN_VEXT_VX(vminu_vx_b, 1)
1574GEN_VEXT_VX(vminu_vx_h, 2)
1575GEN_VEXT_VX(vminu_vx_w, 4)
1576GEN_VEXT_VX(vminu_vx_d, 8)
1577GEN_VEXT_VX(vmin_vx_b, 1)
1578GEN_VEXT_VX(vmin_vx_h, 2)
1579GEN_VEXT_VX(vmin_vx_w, 4)
1580GEN_VEXT_VX(vmin_vx_d, 8)
1581GEN_VEXT_VX(vmaxu_vx_b, 1)
1582GEN_VEXT_VX(vmaxu_vx_h, 2)
1583GEN_VEXT_VX(vmaxu_vx_w, 4)
1584GEN_VEXT_VX(vmaxu_vx_d, 8)
1585GEN_VEXT_VX(vmax_vx_b, 1)
1586GEN_VEXT_VX(vmax_vx_h, 2)
1587GEN_VEXT_VX(vmax_vx_w, 4)
1588GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1589
1590/* Vector Single-Width Integer Multiply Instructions */
1591#define DO_MUL(N, M) (N * M)
1592RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1593RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1594RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1595RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1596GEN_VEXT_VV(vmul_vv_b, 1)
1597GEN_VEXT_VV(vmul_vv_h, 2)
1598GEN_VEXT_VV(vmul_vv_w, 4)
1599GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1600
1601static int8_t do_mulh_b(int8_t s2, int8_t s1)
1602{
1603 return (int16_t)s2 * (int16_t)s1 >> 8;
1604}
1605
1606static int16_t do_mulh_h(int16_t s2, int16_t s1)
1607{
1608 return (int32_t)s2 * (int32_t)s1 >> 16;
1609}
1610
1611static int32_t do_mulh_w(int32_t s2, int32_t s1)
1612{
1613 return (int64_t)s2 * (int64_t)s1 >> 32;
1614}
1615
1616static int64_t do_mulh_d(int64_t s2, int64_t s1)
1617{
1618 uint64_t hi_64, lo_64;
1619
1620 muls64(&lo_64, &hi_64, s1, s2);
1621 return hi_64;
1622}
1623
1624static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1625{
1626 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1627}
1628
1629static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1630{
1631 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1632}
1633
1634static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1635{
1636 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1637}
1638
1639static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1640{
1641 uint64_t hi_64, lo_64;
1642
1643 mulu64(&lo_64, &hi_64, s2, s1);
1644 return hi_64;
1645}
1646
1647static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1648{
1649 return (int16_t)s2 * (uint16_t)s1 >> 8;
1650}
1651
1652static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1653{
1654 return (int32_t)s2 * (uint32_t)s1 >> 16;
1655}
1656
1657static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1658{
1659 return (int64_t)s2 * (uint64_t)s1 >> 32;
1660}
1661
1662/*
1663 * Let A = signed operand,
1664 * B = unsigned operand
1665 * P = mulu64(A, B), unsigned product
1666 *
1667 * LET X = 2 ** 64 - A, 2's complement of A
1668 * SP = signed product
1669 * THEN
1670 * IF A < 0
1671 * SP = -X * B
1672 * = -(2 ** 64 - A) * B
1673 * = A * B - 2 ** 64 * B
1674 * = P - 2 ** 64 * B
1675 * ELSE
1676 * SP = P
1677 * THEN
1678 * HI_P -= (A < 0 ? B : 0)
1679 */
1680
1681static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1682{
1683 uint64_t hi_64, lo_64;
1684
1685 mulu64(&lo_64, &hi_64, s2, s1);
1686
1687 hi_64 -= s2 < 0 ? s1 : 0;
1688 return hi_64;
1689}
1690
1691RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1692RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1693RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1694RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1695RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1696RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1697RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1698RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1699RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1700RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1701RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1702RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1703GEN_VEXT_VV(vmulh_vv_b, 1)
1704GEN_VEXT_VV(vmulh_vv_h, 2)
1705GEN_VEXT_VV(vmulh_vv_w, 4)
1706GEN_VEXT_VV(vmulh_vv_d, 8)
1707GEN_VEXT_VV(vmulhu_vv_b, 1)
1708GEN_VEXT_VV(vmulhu_vv_h, 2)
1709GEN_VEXT_VV(vmulhu_vv_w, 4)
1710GEN_VEXT_VV(vmulhu_vv_d, 8)
1711GEN_VEXT_VV(vmulhsu_vv_b, 1)
1712GEN_VEXT_VV(vmulhsu_vv_h, 2)
1713GEN_VEXT_VV(vmulhsu_vv_w, 4)
1714GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1715
1716RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1717RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1718RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1719RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1720RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1721RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1722RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1723RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1724RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1725RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1726RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1727RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1728RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1729RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1730RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1731RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1732GEN_VEXT_VX(vmul_vx_b, 1)
1733GEN_VEXT_VX(vmul_vx_h, 2)
1734GEN_VEXT_VX(vmul_vx_w, 4)
1735GEN_VEXT_VX(vmul_vx_d, 8)
1736GEN_VEXT_VX(vmulh_vx_b, 1)
1737GEN_VEXT_VX(vmulh_vx_h, 2)
1738GEN_VEXT_VX(vmulh_vx_w, 4)
1739GEN_VEXT_VX(vmulh_vx_d, 8)
1740GEN_VEXT_VX(vmulhu_vx_b, 1)
1741GEN_VEXT_VX(vmulhu_vx_h, 2)
1742GEN_VEXT_VX(vmulhu_vx_w, 4)
1743GEN_VEXT_VX(vmulhu_vx_d, 8)
1744GEN_VEXT_VX(vmulhsu_vx_b, 1)
1745GEN_VEXT_VX(vmulhsu_vx_h, 2)
1746GEN_VEXT_VX(vmulhsu_vx_w, 4)
1747GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1748
1749/* Vector Integer Divide Instructions */
1750#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1751#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
c45eff30 1752#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
85e6658c 1753 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
c45eff30 1754#define DO_REM(N, M) (unlikely(M == 0) ? N : \
85e6658c
LZ
1755 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1756
1757RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1758RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1759RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1760RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1761RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1762RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1763RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1764RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1765RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1766RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1767RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1768RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1769RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1770RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1771RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1772RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1773GEN_VEXT_VV(vdivu_vv_b, 1)
1774GEN_VEXT_VV(vdivu_vv_h, 2)
1775GEN_VEXT_VV(vdivu_vv_w, 4)
1776GEN_VEXT_VV(vdivu_vv_d, 8)
1777GEN_VEXT_VV(vdiv_vv_b, 1)
1778GEN_VEXT_VV(vdiv_vv_h, 2)
1779GEN_VEXT_VV(vdiv_vv_w, 4)
1780GEN_VEXT_VV(vdiv_vv_d, 8)
1781GEN_VEXT_VV(vremu_vv_b, 1)
1782GEN_VEXT_VV(vremu_vv_h, 2)
1783GEN_VEXT_VV(vremu_vv_w, 4)
1784GEN_VEXT_VV(vremu_vv_d, 8)
1785GEN_VEXT_VV(vrem_vv_b, 1)
1786GEN_VEXT_VV(vrem_vv_h, 2)
1787GEN_VEXT_VV(vrem_vv_w, 4)
1788GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1789
1790RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1791RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1792RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1793RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1794RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1795RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1796RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1797RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1798RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1799RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1800RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1801RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1802RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1803RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1804RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1805RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1806GEN_VEXT_VX(vdivu_vx_b, 1)
1807GEN_VEXT_VX(vdivu_vx_h, 2)
1808GEN_VEXT_VX(vdivu_vx_w, 4)
1809GEN_VEXT_VX(vdivu_vx_d, 8)
1810GEN_VEXT_VX(vdiv_vx_b, 1)
1811GEN_VEXT_VX(vdiv_vx_h, 2)
1812GEN_VEXT_VX(vdiv_vx_w, 4)
1813GEN_VEXT_VX(vdiv_vx_d, 8)
1814GEN_VEXT_VX(vremu_vx_b, 1)
1815GEN_VEXT_VX(vremu_vx_h, 2)
1816GEN_VEXT_VX(vremu_vx_w, 4)
1817GEN_VEXT_VX(vremu_vx_d, 8)
1818GEN_VEXT_VX(vrem_vx_b, 1)
1819GEN_VEXT_VX(vrem_vx_h, 2)
1820GEN_VEXT_VX(vrem_vx_w, 4)
1821GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1822
1823/* Vector Widening Integer Multiply Instructions */
1824RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1825RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1826RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1827RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1828RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1829RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1830RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1831RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1832RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1833GEN_VEXT_VV(vwmul_vv_b, 2)
1834GEN_VEXT_VV(vwmul_vv_h, 4)
1835GEN_VEXT_VV(vwmul_vv_w, 8)
1836GEN_VEXT_VV(vwmulu_vv_b, 2)
1837GEN_VEXT_VV(vwmulu_vv_h, 4)
1838GEN_VEXT_VV(vwmulu_vv_w, 8)
1839GEN_VEXT_VV(vwmulsu_vv_b, 2)
1840GEN_VEXT_VV(vwmulsu_vv_h, 4)
1841GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1842
1843RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1844RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1845RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1846RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1847RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1848RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1849RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1850RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1851RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1852GEN_VEXT_VX(vwmul_vx_b, 2)
1853GEN_VEXT_VX(vwmul_vx_h, 4)
1854GEN_VEXT_VX(vwmul_vx_w, 8)
1855GEN_VEXT_VX(vwmulu_vx_b, 2)
1856GEN_VEXT_VX(vwmulu_vx_h, 4)
1857GEN_VEXT_VX(vwmulu_vx_w, 8)
1858GEN_VEXT_VX(vwmulsu_vx_b, 2)
1859GEN_VEXT_VX(vwmulsu_vx_h, 4)
1860GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1861
1862/* Vector Single-Width Integer Multiply-Add Instructions */
c45eff30 1863#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
54df813a
LZ
1864static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1865{ \
1866 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1867 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1868 TD d = *((TD *)vd + HD(i)); \
1869 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1870}
1871
1872#define DO_MACC(N, M, D) (M * N + D)
1873#define DO_NMSAC(N, M, D) (-(M * N) + D)
1874#define DO_MADD(N, M, D) (M * D + N)
1875#define DO_NMSUB(N, M, D) (-(M * D) + N)
1876RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1877RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1878RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1879RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1880RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1881RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1882RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1883RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1884RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1885RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1886RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1887RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1888RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1889RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1890RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1891RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1892GEN_VEXT_VV(vmacc_vv_b, 1)
1893GEN_VEXT_VV(vmacc_vv_h, 2)
1894GEN_VEXT_VV(vmacc_vv_w, 4)
1895GEN_VEXT_VV(vmacc_vv_d, 8)
1896GEN_VEXT_VV(vnmsac_vv_b, 1)
1897GEN_VEXT_VV(vnmsac_vv_h, 2)
1898GEN_VEXT_VV(vnmsac_vv_w, 4)
1899GEN_VEXT_VV(vnmsac_vv_d, 8)
1900GEN_VEXT_VV(vmadd_vv_b, 1)
1901GEN_VEXT_VV(vmadd_vv_h, 2)
1902GEN_VEXT_VV(vmadd_vv_w, 4)
1903GEN_VEXT_VV(vmadd_vv_d, 8)
1904GEN_VEXT_VV(vnmsub_vv_b, 1)
1905GEN_VEXT_VV(vnmsub_vv_h, 2)
1906GEN_VEXT_VV(vnmsub_vv_w, 4)
1907GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1908
1909#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1910static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1911{ \
1912 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1913 TD d = *((TD *)vd + HD(i)); \
1914 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1915}
1916
1917RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1918RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1919RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1920RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1921RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1922RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1923RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1924RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1925RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1926RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1927RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1928RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1929RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1930RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1931RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1932RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1933GEN_VEXT_VX(vmacc_vx_b, 1)
1934GEN_VEXT_VX(vmacc_vx_h, 2)
1935GEN_VEXT_VX(vmacc_vx_w, 4)
1936GEN_VEXT_VX(vmacc_vx_d, 8)
1937GEN_VEXT_VX(vnmsac_vx_b, 1)
1938GEN_VEXT_VX(vnmsac_vx_h, 2)
1939GEN_VEXT_VX(vnmsac_vx_w, 4)
1940GEN_VEXT_VX(vnmsac_vx_d, 8)
1941GEN_VEXT_VX(vmadd_vx_b, 1)
1942GEN_VEXT_VX(vmadd_vx_h, 2)
1943GEN_VEXT_VX(vmadd_vx_w, 4)
1944GEN_VEXT_VX(vmadd_vx_d, 8)
1945GEN_VEXT_VX(vnmsub_vx_b, 1)
1946GEN_VEXT_VX(vnmsub_vx_h, 2)
1947GEN_VEXT_VX(vnmsub_vx_w, 4)
1948GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1949
1950/* Vector Widening Integer Multiply-Add Instructions */
1951RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1952RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1953RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1954RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1955RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1956RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1957RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1958RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1959RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1960GEN_VEXT_VV(vwmaccu_vv_b, 2)
1961GEN_VEXT_VV(vwmaccu_vv_h, 4)
1962GEN_VEXT_VV(vwmaccu_vv_w, 8)
1963GEN_VEXT_VV(vwmacc_vv_b, 2)
1964GEN_VEXT_VV(vwmacc_vv_h, 4)
1965GEN_VEXT_VV(vwmacc_vv_w, 8)
1966GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1967GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1968GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1969
1970RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1971RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1972RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1973RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1974RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1975RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1976RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1977RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1978RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1979RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1980RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1981RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1982GEN_VEXT_VX(vwmaccu_vx_b, 2)
1983GEN_VEXT_VX(vwmaccu_vx_h, 4)
1984GEN_VEXT_VX(vwmaccu_vx_w, 8)
1985GEN_VEXT_VX(vwmacc_vx_b, 2)
1986GEN_VEXT_VX(vwmacc_vx_h, 4)
1987GEN_VEXT_VX(vwmacc_vx_w, 8)
1988GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1989GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1990GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1991GEN_VEXT_VX(vwmaccus_vx_b, 2)
1992GEN_VEXT_VX(vwmaccus_vx_h, 4)
1993GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1994
1995/* Vector Integer Merge and Move Instructions */
3479a814 1996#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1997void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1998 uint32_t desc) \
1999{ \
2000 uint32_t vl = env->vl; \
89a32de2 2001 uint32_t esz = sizeof(ETYPE); \
2002 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2003 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2004 uint32_t i; \
2005 \
f714361e 2006 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2007 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2008 *((ETYPE *)vd + H(i)) = s1; \
2009 } \
f714361e 2010 env->vstart = 0; \
89a32de2 2011 /* set tail elements to 1s */ \
2012 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2013}
2014
3479a814
FC
2015GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2016GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2017GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2018GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 2019
3479a814 2020#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2021void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2022 uint32_t desc) \
2023{ \
2024 uint32_t vl = env->vl; \
89a32de2 2025 uint32_t esz = sizeof(ETYPE); \
2026 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2027 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2028 uint32_t i; \
2029 \
f714361e 2030 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2031 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2032 } \
f714361e 2033 env->vstart = 0; \
89a32de2 2034 /* set tail elements to 1s */ \
2035 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2036}
2037
3479a814
FC
2038GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2039GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2040GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2041GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 2042
3479a814 2043#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
2044void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2045 CPURISCVState *env, uint32_t desc) \
2046{ \
f020a7a1 2047 uint32_t vl = env->vl; \
89a32de2 2048 uint32_t esz = sizeof(ETYPE); \
2049 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2050 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2051 uint32_t i; \
2052 \
f714361e 2053 for (i = env->vstart; i < vl; i++) { \
f9298de5 2054 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
2055 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2056 } \
f714361e 2057 env->vstart = 0; \
89a32de2 2058 /* set tail elements to 1s */ \
2059 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2060}
2061
3479a814
FC
2062GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2063GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2064GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2065GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 2066
3479a814 2067#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2068void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2069 void *vs2, CPURISCVState *env, uint32_t desc) \
2070{ \
f020a7a1 2071 uint32_t vl = env->vl; \
89a32de2 2072 uint32_t esz = sizeof(ETYPE); \
2073 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2074 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2075 uint32_t i; \
2076 \
f714361e 2077 for (i = env->vstart; i < vl; i++) { \
f020a7a1 2078 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 2079 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
2080 (ETYPE)(target_long)s1); \
2081 *((ETYPE *)vd + H(i)) = d; \
2082 } \
f714361e 2083 env->vstart = 0; \
89a32de2 2084 /* set tail elements to 1s */ \
2085 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2086}
2087
3479a814
FC
2088GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2089GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2090GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2091GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
2092
2093/*
3b57254d 2094 * Vector Fixed-Point Arithmetic Instructions
eb2650e3
LZ
2095 */
2096
2097/* Vector Single-Width Saturating Add and Subtract */
2098
2099/*
2100 * As fixed point instructions probably have round mode and saturation,
2101 * define common macros for fixed point here.
2102 */
2103typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2104 CPURISCVState *env, int vxrm);
2105
2106#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2107static inline void \
2108do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2109 CPURISCVState *env, int vxrm) \
2110{ \
2111 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2112 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2113 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2114}
2115
2116static inline void
2117vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2118 CPURISCVState *env,
f9298de5 2119 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2120 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2121{
f714361e 2122 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2123 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2124 /* set masked-off elements to 1s */
2125 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2126 continue;
2127 }
2128 fn(vd, vs1, vs2, i, env, vxrm);
2129 }
f714361e 2130 env->vstart = 0;
eb2650e3
LZ
2131}
2132
2133static inline void
2134vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2135 CPURISCVState *env,
8a085fb2 2136 uint32_t desc,
09106eed 2137 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 2138{
eb2650e3
LZ
2139 uint32_t vm = vext_vm(desc);
2140 uint32_t vl = env->vl;
09106eed 2141 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2142 uint32_t vta = vext_vta(desc);
72e17a9f 2143 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2144
2145 switch (env->vxrm) {
2146 case 0: /* rnu */
2147 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2148 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2149 break;
2150 case 1: /* rne */
2151 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2152 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2153 break;
2154 case 2: /* rdn */
2155 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2156 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2157 break;
2158 default: /* rod */
2159 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 2160 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2161 break;
2162 }
09106eed 2163 /* set tail elements to 1s */
2164 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2165}
2166
2167/* generate helpers for fixed point instructions with OPIVV format */
09106eed 2168#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
2169void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2170 CPURISCVState *env, uint32_t desc) \
2171{ \
8a085fb2 2172 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 2173 do_##NAME, ESZ); \
eb2650e3
LZ
2174}
2175
246f8796
WL
2176static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2177 uint8_t b)
eb2650e3
LZ
2178{
2179 uint8_t res = a + b;
2180 if (res < a) {
2181 res = UINT8_MAX;
2182 env->vxsat = 0x1;
2183 }
2184 return res;
2185}
2186
2187static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2188 uint16_t b)
2189{
2190 uint16_t res = a + b;
2191 if (res < a) {
2192 res = UINT16_MAX;
2193 env->vxsat = 0x1;
2194 }
2195 return res;
2196}
2197
2198static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2199 uint32_t b)
2200{
2201 uint32_t res = a + b;
2202 if (res < a) {
2203 res = UINT32_MAX;
2204 env->vxsat = 0x1;
2205 }
2206 return res;
2207}
2208
2209static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2210 uint64_t b)
2211{
2212 uint64_t res = a + b;
2213 if (res < a) {
2214 res = UINT64_MAX;
2215 env->vxsat = 0x1;
2216 }
2217 return res;
2218}
2219
2220RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2221RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2222RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2223RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2224GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2225GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2226GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2227GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2228
2229typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2230 CPURISCVState *env, int vxrm);
2231
2232#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2233static inline void \
2234do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2235 CPURISCVState *env, int vxrm) \
2236{ \
2237 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2238 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2239}
2240
2241static inline void
2242vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2243 CPURISCVState *env,
f9298de5 2244 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2245 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2246{
f714361e 2247 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2248 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2249 /* set masked-off elements to 1s */
2250 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2251 continue;
2252 }
2253 fn(vd, s1, vs2, i, env, vxrm);
2254 }
f714361e 2255 env->vstart = 0;
eb2650e3
LZ
2256}
2257
2258static inline void
2259vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2260 CPURISCVState *env,
8a085fb2 2261 uint32_t desc,
09106eed 2262 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2263{
eb2650e3
LZ
2264 uint32_t vm = vext_vm(desc);
2265 uint32_t vl = env->vl;
09106eed 2266 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2267 uint32_t vta = vext_vta(desc);
72e17a9f 2268 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2269
2270 switch (env->vxrm) {
2271 case 0: /* rnu */
2272 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2273 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2274 break;
2275 case 1: /* rne */
2276 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2277 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2278 break;
2279 case 2: /* rdn */
2280 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2281 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2282 break;
2283 default: /* rod */
2284 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2285 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2286 break;
2287 }
09106eed 2288 /* set tail elements to 1s */
2289 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2290}
2291
2292/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2293#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3 2294void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
2295 void *vs2, CPURISCVState *env, \
2296 uint32_t desc) \
eb2650e3 2297{ \
8a085fb2 2298 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2299 do_##NAME, ESZ); \
eb2650e3
LZ
2300}
2301
2302RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2303RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2304RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2305RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2306GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2307GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2308GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2309GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2310
2311static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2312{
2313 int8_t res = a + b;
2314 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2315 res = a > 0 ? INT8_MAX : INT8_MIN;
2316 env->vxsat = 0x1;
2317 }
2318 return res;
2319}
2320
246f8796
WL
2321static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2322 int16_t b)
eb2650e3
LZ
2323{
2324 int16_t res = a + b;
2325 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2326 res = a > 0 ? INT16_MAX : INT16_MIN;
2327 env->vxsat = 0x1;
2328 }
2329 return res;
2330}
2331
246f8796
WL
2332static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2333 int32_t b)
eb2650e3
LZ
2334{
2335 int32_t res = a + b;
2336 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2337 res = a > 0 ? INT32_MAX : INT32_MIN;
2338 env->vxsat = 0x1;
2339 }
2340 return res;
2341}
2342
246f8796
WL
2343static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2344 int64_t b)
eb2650e3
LZ
2345{
2346 int64_t res = a + b;
2347 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2348 res = a > 0 ? INT64_MAX : INT64_MIN;
2349 env->vxsat = 0x1;
2350 }
2351 return res;
2352}
2353
2354RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2355RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2356RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2357RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2358GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2359GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2360GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2361GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2362
2363RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2364RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2365RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2366RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2367GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2368GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2369GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2370GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3 2371
246f8796
WL
2372static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2373 uint8_t b)
eb2650e3
LZ
2374{
2375 uint8_t res = a - b;
2376 if (res > a) {
2377 res = 0;
2378 env->vxsat = 0x1;
2379 }
2380 return res;
2381}
2382
2383static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2384 uint16_t b)
2385{
2386 uint16_t res = a - b;
2387 if (res > a) {
2388 res = 0;
2389 env->vxsat = 0x1;
2390 }
2391 return res;
2392}
2393
2394static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2395 uint32_t b)
2396{
2397 uint32_t res = a - b;
2398 if (res > a) {
2399 res = 0;
2400 env->vxsat = 0x1;
2401 }
2402 return res;
2403}
2404
2405static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2406 uint64_t b)
2407{
2408 uint64_t res = a - b;
2409 if (res > a) {
2410 res = 0;
2411 env->vxsat = 0x1;
2412 }
2413 return res;
2414}
2415
2416RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2417RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2418RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2419RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2420GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2421GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2422GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2423GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2424
2425RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2426RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2427RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2428RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2429GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2430GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2431GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2432GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2433
2434static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2435{
2436 int8_t res = a - b;
2437 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2438 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2439 env->vxsat = 0x1;
2440 }
2441 return res;
2442}
2443
246f8796
WL
2444static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2445 int16_t b)
eb2650e3
LZ
2446{
2447 int16_t res = a - b;
2448 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2449 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2450 env->vxsat = 0x1;
2451 }
2452 return res;
2453}
2454
246f8796
WL
2455static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2456 int32_t b)
eb2650e3
LZ
2457{
2458 int32_t res = a - b;
2459 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2460 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2461 env->vxsat = 0x1;
2462 }
2463 return res;
2464}
2465
246f8796
WL
2466static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2467 int64_t b)
eb2650e3
LZ
2468{
2469 int64_t res = a - b;
2470 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2471 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2472 env->vxsat = 0x1;
2473 }
2474 return res;
2475}
2476
2477RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2478RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2479RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2480RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2481GEN_VEXT_VV_RM(vssub_vv_b, 1)
2482GEN_VEXT_VV_RM(vssub_vv_h, 2)
2483GEN_VEXT_VV_RM(vssub_vv_w, 4)
2484GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2485
2486RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2487RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2488RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2489RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2490GEN_VEXT_VX_RM(vssub_vx_b, 1)
2491GEN_VEXT_VX_RM(vssub_vx_h, 2)
2492GEN_VEXT_VX_RM(vssub_vx_w, 4)
2493GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2494
2495/* Vector Single-Width Averaging Add and Subtract */
2496static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2497{
2498 uint8_t d = extract64(v, shift, 1);
2499 uint8_t d1;
2500 uint64_t D1, D2;
2501
2502 if (shift == 0 || shift > 64) {
2503 return 0;
2504 }
2505
2506 d1 = extract64(v, shift - 1, 1);
2507 D1 = extract64(v, 0, shift);
2508 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2509 return d1;
2510 } else if (vxrm == 1) { /* round-to-nearest-even */
2511 if (shift > 1) {
2512 D2 = extract64(v, 0, shift - 1);
2513 return d1 & ((D2 != 0) | d);
2514 } else {
2515 return d1 & d;
2516 }
2517 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2518 return !d & (D1 != 0);
2519 }
2520 return 0; /* round-down (truncate) */
2521}
2522
246f8796
WL
2523static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2524 int32_t b)
b7aee481
LZ
2525{
2526 int64_t res = (int64_t)a + b;
2527 uint8_t round = get_round(vxrm, res, 1);
2528
2529 return (res >> 1) + round;
2530}
2531
246f8796
WL
2532static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2533 int64_t b)
b7aee481
LZ
2534{
2535 int64_t res = a + b;
2536 uint8_t round = get_round(vxrm, res, 1);
2537 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2538
2539 /* With signed overflow, bit 64 is inverse of bit 63. */
2540 return ((res >> 1) ^ over) + round;
2541}
2542
2543RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2544RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2545RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2546RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2547GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2548GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2549GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2550GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2551
2552RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2553RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2554RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2555RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2556GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2557GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2558GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2559GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2560
8b99a110
FC
2561static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2562 uint32_t a, uint32_t b)
2563{
2564 uint64_t res = (uint64_t)a + b;
2565 uint8_t round = get_round(vxrm, res, 1);
2566
2567 return (res >> 1) + round;
2568}
2569
2570static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2571 uint64_t a, uint64_t b)
2572{
2573 uint64_t res = a + b;
2574 uint8_t round = get_round(vxrm, res, 1);
2575 uint64_t over = (uint64_t)(res < a) << 63;
2576
2577 return ((res >> 1) | over) + round;
2578}
2579
2580RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2581RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2582RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2583RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2584GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2585GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2586GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2587GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2588
2589RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2590RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2591RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2592RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2593GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2594GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2595GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2596GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2597
246f8796
WL
2598static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2599 int32_t b)
b7aee481
LZ
2600{
2601 int64_t res = (int64_t)a - b;
2602 uint8_t round = get_round(vxrm, res, 1);
2603
2604 return (res >> 1) + round;
2605}
2606
246f8796
WL
2607static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2608 int64_t b)
b7aee481
LZ
2609{
2610 int64_t res = (int64_t)a - b;
2611 uint8_t round = get_round(vxrm, res, 1);
2612 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2613
2614 /* With signed overflow, bit 64 is inverse of bit 63. */
2615 return ((res >> 1) ^ over) + round;
2616}
2617
2618RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2619RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2620RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2621RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2622GEN_VEXT_VV_RM(vasub_vv_b, 1)
2623GEN_VEXT_VV_RM(vasub_vv_h, 2)
2624GEN_VEXT_VV_RM(vasub_vv_w, 4)
2625GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2626
2627RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2628RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2629RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2630RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2631GEN_VEXT_VX_RM(vasub_vx_b, 1)
2632GEN_VEXT_VX_RM(vasub_vx_h, 2)
2633GEN_VEXT_VX_RM(vasub_vx_w, 4)
2634GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2635
8b99a110
FC
2636static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2637 uint32_t a, uint32_t b)
2638{
2639 int64_t res = (int64_t)a - b;
2640 uint8_t round = get_round(vxrm, res, 1);
2641
2642 return (res >> 1) + round;
2643}
2644
2645static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2646 uint64_t a, uint64_t b)
2647{
2648 uint64_t res = (uint64_t)a - b;
2649 uint8_t round = get_round(vxrm, res, 1);
2650 uint64_t over = (uint64_t)(res > a) << 63;
2651
2652 return ((res >> 1) | over) + round;
2653}
2654
2655RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2656RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2657RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2658RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2659GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2660GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2661GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2662GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2663
2664RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2665RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2666RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2667RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2668GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2669GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2670GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2671GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2672
9f0ff9e5
LZ
2673/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2674static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2675{
2676 uint8_t round;
2677 int16_t res;
2678
2679 res = (int16_t)a * (int16_t)b;
2680 round = get_round(vxrm, res, 7);
c45eff30 2681 res = (res >> 7) + round;
9f0ff9e5
LZ
2682
2683 if (res > INT8_MAX) {
2684 env->vxsat = 0x1;
2685 return INT8_MAX;
2686 } else if (res < INT8_MIN) {
2687 env->vxsat = 0x1;
2688 return INT8_MIN;
2689 } else {
2690 return res;
2691 }
2692}
2693
2694static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2695{
2696 uint8_t round;
2697 int32_t res;
2698
2699 res = (int32_t)a * (int32_t)b;
2700 round = get_round(vxrm, res, 15);
c45eff30 2701 res = (res >> 15) + round;
9f0ff9e5
LZ
2702
2703 if (res > INT16_MAX) {
2704 env->vxsat = 0x1;
2705 return INT16_MAX;
2706 } else if (res < INT16_MIN) {
2707 env->vxsat = 0x1;
2708 return INT16_MIN;
2709 } else {
2710 return res;
2711 }
2712}
2713
2714static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2715{
2716 uint8_t round;
2717 int64_t res;
2718
2719 res = (int64_t)a * (int64_t)b;
2720 round = get_round(vxrm, res, 31);
c45eff30 2721 res = (res >> 31) + round;
9f0ff9e5
LZ
2722
2723 if (res > INT32_MAX) {
2724 env->vxsat = 0x1;
2725 return INT32_MAX;
2726 } else if (res < INT32_MIN) {
2727 env->vxsat = 0x1;
2728 return INT32_MIN;
2729 } else {
2730 return res;
2731 }
2732}
2733
2734static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2735{
2736 uint8_t round;
2737 uint64_t hi_64, lo_64;
2738 int64_t res;
2739
2740 if (a == INT64_MIN && b == INT64_MIN) {
2741 env->vxsat = 1;
2742 return INT64_MAX;
2743 }
2744
2745 muls64(&lo_64, &hi_64, a, b);
2746 round = get_round(vxrm, lo_64, 63);
2747 /*
2748 * Cannot overflow, as there are always
2749 * 2 sign bits after multiply.
2750 */
2751 res = (hi_64 << 1) | (lo_64 >> 63);
2752 if (round) {
2753 if (res == INT64_MAX) {
2754 env->vxsat = 1;
2755 } else {
2756 res += 1;
2757 }
2758 }
2759 return res;
2760}
2761
2762RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2763RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2764RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2765RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2766GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2767GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2768GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2769GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2770
2771RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2772RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2773RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2774RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2775GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2776GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2777GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2778GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2779
04a61406
LZ
2780/* Vector Single-Width Scaling Shift Instructions */
2781static inline uint8_t
2782vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2783{
2784 uint8_t round, shift = b & 0x7;
2785 uint8_t res;
2786
2787 round = get_round(vxrm, a, shift);
c45eff30 2788 res = (a >> shift) + round;
04a61406
LZ
2789 return res;
2790}
2791static inline uint16_t
2792vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2793{
2794 uint8_t round, shift = b & 0xf;
04a61406
LZ
2795
2796 round = get_round(vxrm, a, shift);
66997c42 2797 return (a >> shift) + round;
04a61406
LZ
2798}
2799static inline uint32_t
2800vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2801{
2802 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2803
2804 round = get_round(vxrm, a, shift);
66997c42 2805 return (a >> shift) + round;
04a61406
LZ
2806}
2807static inline uint64_t
2808vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2809{
2810 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2811
2812 round = get_round(vxrm, a, shift);
66997c42 2813 return (a >> shift) + round;
04a61406
LZ
2814}
2815RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2816RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2817RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2818RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2819GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2820GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2821GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2822GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2823
2824RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2825RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2826RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2827RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2828GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2829GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2830GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2831GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2832
2833static inline int8_t
2834vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2835{
2836 uint8_t round, shift = b & 0x7;
04a61406
LZ
2837
2838 round = get_round(vxrm, a, shift);
66997c42 2839 return (a >> shift) + round;
04a61406
LZ
2840}
2841static inline int16_t
2842vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2843{
2844 uint8_t round, shift = b & 0xf;
04a61406
LZ
2845
2846 round = get_round(vxrm, a, shift);
66997c42 2847 return (a >> shift) + round;
04a61406
LZ
2848}
2849static inline int32_t
2850vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2851{
2852 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2853
2854 round = get_round(vxrm, a, shift);
66997c42 2855 return (a >> shift) + round;
04a61406
LZ
2856}
2857static inline int64_t
2858vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2859{
2860 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2861
2862 round = get_round(vxrm, a, shift);
66997c42 2863 return (a >> shift) + round;
04a61406 2864}
9ff3d287 2865
04a61406
LZ
2866RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2867RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2868RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2869RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2870GEN_VEXT_VV_RM(vssra_vv_b, 1)
2871GEN_VEXT_VV_RM(vssra_vv_h, 2)
2872GEN_VEXT_VV_RM(vssra_vv_w, 4)
2873GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2874
2875RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2876RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2877RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2878RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2879GEN_VEXT_VX_RM(vssra_vx_b, 1)
2880GEN_VEXT_VX_RM(vssra_vx_h, 2)
2881GEN_VEXT_VX_RM(vssra_vx_w, 4)
2882GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2883
2884/* Vector Narrowing Fixed-Point Clip Instructions */
2885static inline int8_t
2886vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2887{
2888 uint8_t round, shift = b & 0xf;
2889 int16_t res;
2890
2891 round = get_round(vxrm, a, shift);
c45eff30 2892 res = (a >> shift) + round;
9ff3d287
LZ
2893 if (res > INT8_MAX) {
2894 env->vxsat = 0x1;
2895 return INT8_MAX;
2896 } else if (res < INT8_MIN) {
2897 env->vxsat = 0x1;
2898 return INT8_MIN;
2899 } else {
2900 return res;
2901 }
2902}
2903
2904static inline int16_t
2905vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2906{
2907 uint8_t round, shift = b & 0x1f;
2908 int32_t res;
2909
2910 round = get_round(vxrm, a, shift);
c45eff30 2911 res = (a >> shift) + round;
9ff3d287
LZ
2912 if (res > INT16_MAX) {
2913 env->vxsat = 0x1;
2914 return INT16_MAX;
2915 } else if (res < INT16_MIN) {
2916 env->vxsat = 0x1;
2917 return INT16_MIN;
2918 } else {
2919 return res;
2920 }
2921}
2922
2923static inline int32_t
2924vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2925{
2926 uint8_t round, shift = b & 0x3f;
2927 int64_t res;
2928
2929 round = get_round(vxrm, a, shift);
c45eff30 2930 res = (a >> shift) + round;
9ff3d287
LZ
2931 if (res > INT32_MAX) {
2932 env->vxsat = 0x1;
2933 return INT32_MAX;
2934 } else if (res < INT32_MIN) {
2935 env->vxsat = 0x1;
2936 return INT32_MIN;
2937 } else {
2938 return res;
2939 }
2940}
2941
a70b3a73
FC
2942RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2943RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2944RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2945GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2946GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2947GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2948
2949RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2950RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2951RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2952GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2953GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2954GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2955
2956static inline uint8_t
2957vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2958{
2959 uint8_t round, shift = b & 0xf;
2960 uint16_t res;
2961
2962 round = get_round(vxrm, a, shift);
c45eff30 2963 res = (a >> shift) + round;
9ff3d287
LZ
2964 if (res > UINT8_MAX) {
2965 env->vxsat = 0x1;
2966 return UINT8_MAX;
2967 } else {
2968 return res;
2969 }
2970}
2971
2972static inline uint16_t
2973vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2974{
2975 uint8_t round, shift = b & 0x1f;
2976 uint32_t res;
2977
2978 round = get_round(vxrm, a, shift);
c45eff30 2979 res = (a >> shift) + round;
9ff3d287
LZ
2980 if (res > UINT16_MAX) {
2981 env->vxsat = 0x1;
2982 return UINT16_MAX;
2983 } else {
2984 return res;
2985 }
2986}
2987
2988static inline uint32_t
2989vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2990{
2991 uint8_t round, shift = b & 0x3f;
a70b3a73 2992 uint64_t res;
9ff3d287
LZ
2993
2994 round = get_round(vxrm, a, shift);
c45eff30 2995 res = (a >> shift) + round;
9ff3d287
LZ
2996 if (res > UINT32_MAX) {
2997 env->vxsat = 0x1;
2998 return UINT32_MAX;
2999 } else {
3000 return res;
3001 }
3002}
3003
a70b3a73
FC
3004RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3005RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3006RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 3007GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3008GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3009GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 3010
a70b3a73
FC
3011RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3012RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3013RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 3014GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3015GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3016GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
3017
3018/*
3b57254d 3019 * Vector Float Point Arithmetic Instructions
ce2a0343
LZ
3020 */
3021/* Vector Single-Width Floating-Point Add/Subtract Instructions */
3022#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3023static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3024 CPURISCVState *env) \
3025{ \
3026 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3027 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3028 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3029}
3030
5eacf7d8 3031#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
3032void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3033 void *vs2, CPURISCVState *env, \
3034 uint32_t desc) \
3035{ \
ce2a0343
LZ
3036 uint32_t vm = vext_vm(desc); \
3037 uint32_t vl = env->vl; \
5eacf7d8 3038 uint32_t total_elems = \
3039 vext_get_total_elems(env, desc, ESZ); \
3040 uint32_t vta = vext_vta(desc); \
5b448f44 3041 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
3042 uint32_t i; \
3043 \
f714361e 3044 for (i = env->vstart; i < vl; i++) { \
f9298de5 3045 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3046 /* set masked-off elements to 1s */ \
3047 vext_set_elems_1s(vd, vma, i * ESZ, \
3048 (i + 1) * ESZ); \
ce2a0343
LZ
3049 continue; \
3050 } \
3051 do_##NAME(vd, vs1, vs2, i, env); \
3052 } \
f714361e 3053 env->vstart = 0; \
5eacf7d8 3054 /* set tail elements to 1s */ \
3055 vext_set_elems_1s(vd, vta, vl * ESZ, \
3056 total_elems * ESZ); \
ce2a0343
LZ
3057}
3058
3059RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3060RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3061RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 3062GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3063GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3064GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
3065
3066#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3067static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3068 CPURISCVState *env) \
3069{ \
3070 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3071 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3072}
3073
5eacf7d8 3074#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
3075void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3076 void *vs2, CPURISCVState *env, \
3077 uint32_t desc) \
3078{ \
ce2a0343
LZ
3079 uint32_t vm = vext_vm(desc); \
3080 uint32_t vl = env->vl; \
5eacf7d8 3081 uint32_t total_elems = \
c45eff30 3082 vext_get_total_elems(env, desc, ESZ); \
5eacf7d8 3083 uint32_t vta = vext_vta(desc); \
5b448f44 3084 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
3085 uint32_t i; \
3086 \
f714361e 3087 for (i = env->vstart; i < vl; i++) { \
f9298de5 3088 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3089 /* set masked-off elements to 1s */ \
3090 vext_set_elems_1s(vd, vma, i * ESZ, \
3091 (i + 1) * ESZ); \
ce2a0343
LZ
3092 continue; \
3093 } \
3094 do_##NAME(vd, s1, vs2, i, env); \
3095 } \
f714361e 3096 env->vstart = 0; \
5eacf7d8 3097 /* set tail elements to 1s */ \
3098 vext_set_elems_1s(vd, vta, vl * ESZ, \
3099 total_elems * ESZ); \
ce2a0343
LZ
3100}
3101
3102RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3103RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3104RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 3105GEN_VEXT_VF(vfadd_vf_h, 2)
3106GEN_VEXT_VF(vfadd_vf_w, 4)
3107GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
3108
3109RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3110RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3111RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 3112GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3113GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3114GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
3115RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3116RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3117RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 3118GEN_VEXT_VF(vfsub_vf_h, 2)
3119GEN_VEXT_VF(vfsub_vf_w, 4)
3120GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
3121
3122static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3123{
3124 return float16_sub(b, a, s);
3125}
3126
3127static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3128{
3129 return float32_sub(b, a, s);
3130}
3131
3132static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3133{
3134 return float64_sub(b, a, s);
3135}
3136
3137RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3138RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3139RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 3140GEN_VEXT_VF(vfrsub_vf_h, 2)
3141GEN_VEXT_VF(vfrsub_vf_w, 4)
3142GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
3143
3144/* Vector Widening Floating-Point Add/Subtract Instructions */
3145static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3146{
3147 return float32_add(float16_to_float32(a, true, s),
c45eff30 3148 float16_to_float32(b, true, s), s);
eeffab2e
LZ
3149}
3150
3151static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3152{
3153 return float64_add(float32_to_float64(a, s),
c45eff30 3154 float32_to_float64(b, s), s);
eeffab2e
LZ
3155
3156}
3157
3158RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3159RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 3160GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3161GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
3162RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3163RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 3164GEN_VEXT_VF(vfwadd_vf_h, 4)
3165GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
3166
3167static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3168{
3169 return float32_sub(float16_to_float32(a, true, s),
c45eff30 3170 float16_to_float32(b, true, s), s);
eeffab2e
LZ
3171}
3172
3173static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3174{
3175 return float64_sub(float32_to_float64(a, s),
c45eff30 3176 float32_to_float64(b, s), s);
eeffab2e
LZ
3177
3178}
3179
3180RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3181RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 3182GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3183GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
3184RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3185RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 3186GEN_VEXT_VF(vfwsub_vf_h, 4)
3187GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
3188
3189static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3190{
3191 return float32_add(a, float16_to_float32(b, true, s), s);
3192}
3193
3194static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3195{
3196 return float64_add(a, float32_to_float64(b, s), s);
3197}
3198
3199RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3200RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3201GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3202GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3203RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3204RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3205GEN_VEXT_VF(vfwadd_wf_h, 4)
3206GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3207
3208static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3209{
3210 return float32_sub(a, float16_to_float32(b, true, s), s);
3211}
3212
3213static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3214{
3215 return float64_sub(a, float32_to_float64(b, s), s);
3216}
3217
3218RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3219RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3220GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3221GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3222RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3223RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3224GEN_VEXT_VF(vfwsub_wf_h, 4)
3225GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3226
3227/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3228RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3229RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3230RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3231GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3232GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3233GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3234RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3235RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3236RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3237GEN_VEXT_VF(vfmul_vf_h, 2)
3238GEN_VEXT_VF(vfmul_vf_w, 4)
3239GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3240
3241RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3242RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3243RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3244GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3245GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3246GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3247RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3248RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3249RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3250GEN_VEXT_VF(vfdiv_vf_h, 2)
3251GEN_VEXT_VF(vfdiv_vf_w, 4)
3252GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3253
3254static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3255{
3256 return float16_div(b, a, s);
3257}
3258
3259static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3260{
3261 return float32_div(b, a, s);
3262}
3263
3264static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3265{
3266 return float64_div(b, a, s);
3267}
3268
3269RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3270RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3271RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3272GEN_VEXT_VF(vfrdiv_vf_h, 2)
3273GEN_VEXT_VF(vfrdiv_vf_w, 4)
3274GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3275
3276/* Vector Widening Floating-Point Multiply */
3277static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3278{
3279 return float32_mul(float16_to_float32(a, true, s),
c45eff30 3280 float16_to_float32(b, true, s), s);
f7c7b7cd
LZ
3281}
3282
3283static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3284{
3285 return float64_mul(float32_to_float64(a, s),
c45eff30 3286 float32_to_float64(b, s), s);
f7c7b7cd
LZ
3287
3288}
3289RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3290RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3291GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3292GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3293RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3294RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3295GEN_VEXT_VF(vfwmul_vf_h, 4)
3296GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3297
3298/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3299#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3300static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
c45eff30 3301 CPURISCVState *env) \
4aa5a8fe
LZ
3302{ \
3303 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3304 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3305 TD d = *((TD *)vd + HD(i)); \
3306 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3307}
3308
3309static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3310{
3311 return float16_muladd(a, b, d, 0, s);
3312}
3313
3314static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3315{
3316 return float32_muladd(a, b, d, 0, s);
3317}
3318
3319static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3320{
3321 return float64_muladd(a, b, d, 0, s);
3322}
3323
3324RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3325RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3326RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3327GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3328GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3329GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3330
3331#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3332static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
c45eff30 3333 CPURISCVState *env) \
4aa5a8fe
LZ
3334{ \
3335 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3336 TD d = *((TD *)vd + HD(i)); \
3337 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3338}
3339
3340RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3341RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3342RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3343GEN_VEXT_VF(vfmacc_vf_h, 2)
3344GEN_VEXT_VF(vfmacc_vf_w, 4)
3345GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3346
3347static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3348{
c45eff30
WL
3349 return float16_muladd(a, b, d, float_muladd_negate_c |
3350 float_muladd_negate_product, s);
4aa5a8fe
LZ
3351}
3352
3353static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3354{
c45eff30
WL
3355 return float32_muladd(a, b, d, float_muladd_negate_c |
3356 float_muladd_negate_product, s);
4aa5a8fe
LZ
3357}
3358
3359static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3360{
c45eff30
WL
3361 return float64_muladd(a, b, d, float_muladd_negate_c |
3362 float_muladd_negate_product, s);
4aa5a8fe
LZ
3363}
3364
3365RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3366RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3367RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3368GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3369GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3370GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3371RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3372RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3373RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3374GEN_VEXT_VF(vfnmacc_vf_h, 2)
3375GEN_VEXT_VF(vfnmacc_vf_w, 4)
3376GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3377
3378static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3379{
3380 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3381}
3382
3383static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3384{
3385 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3386}
3387
3388static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3389{
3390 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3391}
3392
3393RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3394RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3395RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3396GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3397GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3398GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3399RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3400RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3401RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3402GEN_VEXT_VF(vfmsac_vf_h, 2)
3403GEN_VEXT_VF(vfmsac_vf_w, 4)
3404GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3405
3406static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3407{
3408 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3409}
3410
3411static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3412{
3413 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3414}
3415
3416static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3417{
3418 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3419}
3420
3421RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3422RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3423RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3424GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3425GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3426GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3427RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3428RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3429RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3430GEN_VEXT_VF(vfnmsac_vf_h, 2)
3431GEN_VEXT_VF(vfnmsac_vf_w, 4)
3432GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3433
3434static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3435{
3436 return float16_muladd(d, b, a, 0, s);
3437}
3438
3439static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3440{
3441 return float32_muladd(d, b, a, 0, s);
3442}
3443
3444static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3445{
3446 return float64_muladd(d, b, a, 0, s);
3447}
3448
3449RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3450RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3451RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3452GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3453GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3454GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3455RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3456RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3457RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3458GEN_VEXT_VF(vfmadd_vf_h, 2)
3459GEN_VEXT_VF(vfmadd_vf_w, 4)
3460GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3461
3462static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3463{
c45eff30
WL
3464 return float16_muladd(d, b, a, float_muladd_negate_c |
3465 float_muladd_negate_product, s);
4aa5a8fe
LZ
3466}
3467
3468static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3469{
c45eff30
WL
3470 return float32_muladd(d, b, a, float_muladd_negate_c |
3471 float_muladd_negate_product, s);
4aa5a8fe
LZ
3472}
3473
3474static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3475{
c45eff30
WL
3476 return float64_muladd(d, b, a, float_muladd_negate_c |
3477 float_muladd_negate_product, s);
4aa5a8fe
LZ
3478}
3479
3480RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3481RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3482RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3483GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3484GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3485GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3486RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3487RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3488RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3489GEN_VEXT_VF(vfnmadd_vf_h, 2)
3490GEN_VEXT_VF(vfnmadd_vf_w, 4)
3491GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3492
3493static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3494{
3495 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3496}
3497
3498static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3499{
3500 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3501}
3502
3503static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3504{
3505 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3506}
3507
3508RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3509RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3510RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3511GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3512GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3513GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3514RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3515RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3516RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3517GEN_VEXT_VF(vfmsub_vf_h, 2)
3518GEN_VEXT_VF(vfmsub_vf_w, 4)
3519GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3520
3521static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3522{
3523 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3524}
3525
3526static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3527{
3528 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3529}
3530
3531static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3532{
3533 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3534}
3535
3536RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3537RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3538RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3539GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3540GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3541GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3542RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3543RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3544RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3545GEN_VEXT_VF(vfnmsub_vf_h, 2)
3546GEN_VEXT_VF(vfnmsub_vf_w, 4)
3547GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3548
3549/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3550static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3551{
3552 return float32_muladd(float16_to_float32(a, true, s),
c45eff30 3553 float16_to_float32(b, true, s), d, 0, s);
0dd50959
LZ
3554}
3555
3556static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3557{
3558 return float64_muladd(float32_to_float64(a, s),
c45eff30 3559 float32_to_float64(b, s), d, 0, s);
0dd50959
LZ
3560}
3561
3562RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3563RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3564GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3565GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3566RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3567RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3568GEN_VEXT_VF(vfwmacc_vf_h, 4)
3569GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959
LZ
3570
3571static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3572{
3573 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3574 float16_to_float32(b, true, s), d,
3575 float_muladd_negate_c | float_muladd_negate_product,
3576 s);
0dd50959
LZ
3577}
3578
3579static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3580{
c45eff30
WL
3581 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3582 d, float_muladd_negate_c |
3583 float_muladd_negate_product, s);
0dd50959
LZ
3584}
3585
3586RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3587RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3588GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3589GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3590RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3591RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3592GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3593GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3594
3595static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3596{
3597 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3598 float16_to_float32(b, true, s), d,
3599 float_muladd_negate_c, s);
0dd50959
LZ
3600}
3601
3602static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3603{
3604 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3605 float32_to_float64(b, s), d,
3606 float_muladd_negate_c, s);
0dd50959
LZ
3607}
3608
3609RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3610RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3611GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3612GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3613RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3614RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3615GEN_VEXT_VF(vfwmsac_vf_h, 4)
3616GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3617
3618static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3619{
3620 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3621 float16_to_float32(b, true, s), d,
3622 float_muladd_negate_product, s);
0dd50959
LZ
3623}
3624
3625static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3626{
3627 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3628 float32_to_float64(b, s), d,
3629 float_muladd_negate_product, s);
0dd50959
LZ
3630}
3631
3632RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3633RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3634GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3635GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3636RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3637RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3638GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3639GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3640
3641/* Vector Floating-Point Square-Root Instruction */
3642/* (TD, T2, TX2) */
3643#define OP_UU_H uint16_t, uint16_t, uint16_t
3644#define OP_UU_W uint32_t, uint32_t, uint32_t
3645#define OP_UU_D uint64_t, uint64_t, uint64_t
3646
c45eff30 3647#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
d9e4ce72 3648static void do_##NAME(void *vd, void *vs2, int i, \
c45eff30 3649 CPURISCVState *env) \
d9e4ce72
LZ
3650{ \
3651 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3652 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3653}
3654
5eacf7d8 3655#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72 3656void HELPER(NAME)(void *vd, void *v0, void *vs2, \
c45eff30 3657 CPURISCVState *env, uint32_t desc) \
d9e4ce72 3658{ \
d9e4ce72
LZ
3659 uint32_t vm = vext_vm(desc); \
3660 uint32_t vl = env->vl; \
5eacf7d8 3661 uint32_t total_elems = \
3662 vext_get_total_elems(env, desc, ESZ); \
3663 uint32_t vta = vext_vta(desc); \
5b448f44 3664 uint32_t vma = vext_vma(desc); \
d9e4ce72
LZ
3665 uint32_t i; \
3666 \
3667 if (vl == 0) { \
3668 return; \
3669 } \
f714361e 3670 for (i = env->vstart; i < vl; i++) { \
f9298de5 3671 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3672 /* set masked-off elements to 1s */ \
3673 vext_set_elems_1s(vd, vma, i * ESZ, \
3674 (i + 1) * ESZ); \
d9e4ce72
LZ
3675 continue; \
3676 } \
3677 do_##NAME(vd, vs2, i, env); \
3678 } \
f714361e 3679 env->vstart = 0; \
5eacf7d8 3680 vext_set_elems_1s(vd, vta, vl * ESZ, \
3681 total_elems * ESZ); \
d9e4ce72
LZ
3682}
3683
3684RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3685RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3686RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3687GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3688GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3689GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3690
e848a1e5
FC
3691/*
3692 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3693 *
3694 * Adapted from riscv-v-spec recip.c:
3695 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3696 */
3697static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3698{
3699 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3700 uint64_t exp = extract64(f, frac_size, exp_size);
3701 uint64_t frac = extract64(f, 0, frac_size);
3702
3703 const uint8_t lookup_table[] = {
3704 52, 51, 50, 48, 47, 46, 44, 43,
3705 42, 41, 40, 39, 38, 36, 35, 34,
3706 33, 32, 31, 30, 30, 29, 28, 27,
3707 26, 25, 24, 23, 23, 22, 21, 20,
3708 19, 19, 18, 17, 16, 16, 15, 14,
3709 14, 13, 12, 12, 11, 10, 10, 9,
3710 9, 8, 7, 7, 6, 6, 5, 4,
3711 4, 3, 3, 2, 2, 1, 1, 0,
3712 127, 125, 123, 121, 119, 118, 116, 114,
3713 113, 111, 109, 108, 106, 105, 103, 102,
3714 100, 99, 97, 96, 95, 93, 92, 91,
3715 90, 88, 87, 86, 85, 84, 83, 82,
3716 80, 79, 78, 77, 76, 75, 74, 73,
3717 72, 71, 70, 70, 69, 68, 67, 66,
3718 65, 64, 63, 63, 62, 61, 60, 59,
3719 59, 58, 57, 56, 56, 55, 54, 53
3720 };
3721 const int precision = 7;
3722
3723 if (exp == 0 && frac != 0) { /* subnormal */
3724 /* Normalize the subnormal. */
3725 while (extract64(frac, frac_size - 1, 1) == 0) {
3726 exp--;
3727 frac <<= 1;
3728 }
3729
3730 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3731 }
3732
3733 int idx = ((exp & 1) << (precision - 1)) |
c45eff30 3734 (frac >> (frac_size - precision + 1));
e848a1e5 3735 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3736 (frac_size - precision);
e848a1e5
FC
3737 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3738
3739 uint64_t val = 0;
3740 val = deposit64(val, 0, frac_size, out_frac);
3741 val = deposit64(val, frac_size, exp_size, out_exp);
3742 val = deposit64(val, frac_size + exp_size, 1, sign);
3743 return val;
3744}
3745
3746static float16 frsqrt7_h(float16 f, float_status *s)
3747{
3748 int exp_size = 5, frac_size = 10;
3749 bool sign = float16_is_neg(f);
3750
3751 /*
3752 * frsqrt7(sNaN) = canonical NaN
3753 * frsqrt7(-inf) = canonical NaN
3754 * frsqrt7(-normal) = canonical NaN
3755 * frsqrt7(-subnormal) = canonical NaN
3756 */
3757 if (float16_is_signaling_nan(f, s) ||
c45eff30
WL
3758 (float16_is_infinity(f) && sign) ||
3759 (float16_is_normal(f) && sign) ||
3760 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
e848a1e5
FC
3761 s->float_exception_flags |= float_flag_invalid;
3762 return float16_default_nan(s);
3763 }
3764
3765 /* frsqrt7(qNaN) = canonical NaN */
3766 if (float16_is_quiet_nan(f, s)) {
3767 return float16_default_nan(s);
3768 }
3769
3770 /* frsqrt7(+-0) = +-inf */
3771 if (float16_is_zero(f)) {
3772 s->float_exception_flags |= float_flag_divbyzero;
3773 return float16_set_sign(float16_infinity, sign);
3774 }
3775
3776 /* frsqrt7(+inf) = +0 */
3777 if (float16_is_infinity(f) && !sign) {
3778 return float16_set_sign(float16_zero, sign);
3779 }
3780
3781 /* +normal, +subnormal */
3782 uint64_t val = frsqrt7(f, exp_size, frac_size);
3783 return make_float16(val);
3784}
3785
3786static float32 frsqrt7_s(float32 f, float_status *s)
3787{
3788 int exp_size = 8, frac_size = 23;
3789 bool sign = float32_is_neg(f);
3790
3791 /*
3792 * frsqrt7(sNaN) = canonical NaN
3793 * frsqrt7(-inf) = canonical NaN
3794 * frsqrt7(-normal) = canonical NaN
3795 * frsqrt7(-subnormal) = canonical NaN
3796 */
3797 if (float32_is_signaling_nan(f, s) ||
c45eff30
WL
3798 (float32_is_infinity(f) && sign) ||
3799 (float32_is_normal(f) && sign) ||
3800 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
e848a1e5
FC
3801 s->float_exception_flags |= float_flag_invalid;
3802 return float32_default_nan(s);
3803 }
3804
3805 /* frsqrt7(qNaN) = canonical NaN */
3806 if (float32_is_quiet_nan(f, s)) {
3807 return float32_default_nan(s);
3808 }
3809
3810 /* frsqrt7(+-0) = +-inf */
3811 if (float32_is_zero(f)) {
3812 s->float_exception_flags |= float_flag_divbyzero;
3813 return float32_set_sign(float32_infinity, sign);
3814 }
3815
3816 /* frsqrt7(+inf) = +0 */
3817 if (float32_is_infinity(f) && !sign) {
3818 return float32_set_sign(float32_zero, sign);
3819 }
3820
3821 /* +normal, +subnormal */
3822 uint64_t val = frsqrt7(f, exp_size, frac_size);
3823 return make_float32(val);
3824}
3825
3826static float64 frsqrt7_d(float64 f, float_status *s)
3827{
3828 int exp_size = 11, frac_size = 52;
3829 bool sign = float64_is_neg(f);
3830
3831 /*
3832 * frsqrt7(sNaN) = canonical NaN
3833 * frsqrt7(-inf) = canonical NaN
3834 * frsqrt7(-normal) = canonical NaN
3835 * frsqrt7(-subnormal) = canonical NaN
3836 */
3837 if (float64_is_signaling_nan(f, s) ||
c45eff30
WL
3838 (float64_is_infinity(f) && sign) ||
3839 (float64_is_normal(f) && sign) ||
3840 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
e848a1e5
FC
3841 s->float_exception_flags |= float_flag_invalid;
3842 return float64_default_nan(s);
3843 }
3844
3845 /* frsqrt7(qNaN) = canonical NaN */
3846 if (float64_is_quiet_nan(f, s)) {
3847 return float64_default_nan(s);
3848 }
3849
3850 /* frsqrt7(+-0) = +-inf */
3851 if (float64_is_zero(f)) {
3852 s->float_exception_flags |= float_flag_divbyzero;
3853 return float64_set_sign(float64_infinity, sign);
3854 }
3855
3856 /* frsqrt7(+inf) = +0 */
3857 if (float64_is_infinity(f) && !sign) {
3858 return float64_set_sign(float64_zero, sign);
3859 }
3860
3861 /* +normal, +subnormal */
3862 uint64_t val = frsqrt7(f, exp_size, frac_size);
3863 return make_float64(val);
3864}
3865
3866RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3867RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3868RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3869GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3870GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3871GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3872
55c35407
FC
3873/*
3874 * Vector Floating-Point Reciprocal Estimate Instruction
3875 *
3876 * Adapted from riscv-v-spec recip.c:
3877 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3878 */
3879static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3880 float_status *s)
3881{
3882 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3883 uint64_t exp = extract64(f, frac_size, exp_size);
3884 uint64_t frac = extract64(f, 0, frac_size);
3885
3886 const uint8_t lookup_table[] = {
3887 127, 125, 123, 121, 119, 117, 116, 114,
3888 112, 110, 109, 107, 105, 104, 102, 100,
3889 99, 97, 96, 94, 93, 91, 90, 88,
3890 87, 85, 84, 83, 81, 80, 79, 77,
3891 76, 75, 74, 72, 71, 70, 69, 68,
3892 66, 65, 64, 63, 62, 61, 60, 59,
3893 58, 57, 56, 55, 54, 53, 52, 51,
3894 50, 49, 48, 47, 46, 45, 44, 43,
3895 42, 41, 40, 40, 39, 38, 37, 36,
3896 35, 35, 34, 33, 32, 31, 31, 30,
3897 29, 28, 28, 27, 26, 25, 25, 24,
3898 23, 23, 22, 21, 21, 20, 19, 19,
3899 18, 17, 17, 16, 15, 15, 14, 14,
3900 13, 12, 12, 11, 11, 10, 9, 9,
3901 8, 8, 7, 7, 6, 5, 5, 4,
3902 4, 3, 3, 2, 2, 1, 1, 0
3903 };
3904 const int precision = 7;
3905
3906 if (exp == 0 && frac != 0) { /* subnormal */
3907 /* Normalize the subnormal. */
3908 while (extract64(frac, frac_size - 1, 1) == 0) {
3909 exp--;
3910 frac <<= 1;
3911 }
3912
3913 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3914
3915 if (exp != 0 && exp != UINT64_MAX) {
3916 /*
3917 * Overflow to inf or max value of same sign,
3918 * depending on sign and rounding mode.
3919 */
3920 s->float_exception_flags |= (float_flag_inexact |
3921 float_flag_overflow);
3922
3923 if ((s->float_rounding_mode == float_round_to_zero) ||
3924 ((s->float_rounding_mode == float_round_down) && !sign) ||
3925 ((s->float_rounding_mode == float_round_up) && sign)) {
3926 /* Return greatest/negative finite value. */
3927 return (sign << (exp_size + frac_size)) |
c45eff30 3928 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
55c35407
FC
3929 } else {
3930 /* Return +-inf. */
3931 return (sign << (exp_size + frac_size)) |
c45eff30 3932 MAKE_64BIT_MASK(frac_size, exp_size);
55c35407
FC
3933 }
3934 }
3935 }
3936
3937 int idx = frac >> (frac_size - precision);
3938 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3939 (frac_size - precision);
55c35407
FC
3940 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3941
3942 if (out_exp == 0 || out_exp == UINT64_MAX) {
3943 /*
3944 * The result is subnormal, but don't raise the underflow exception,
3945 * because there's no additional loss of precision.
3946 */
3947 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3948 if (out_exp == UINT64_MAX) {
3949 out_frac >>= 1;
3950 out_exp = 0;
3951 }
3952 }
3953
3954 uint64_t val = 0;
3955 val = deposit64(val, 0, frac_size, out_frac);
3956 val = deposit64(val, frac_size, exp_size, out_exp);
3957 val = deposit64(val, frac_size + exp_size, 1, sign);
3958 return val;
3959}
3960
3961static float16 frec7_h(float16 f, float_status *s)
3962{
3963 int exp_size = 5, frac_size = 10;
3964 bool sign = float16_is_neg(f);
3965
3966 /* frec7(+-inf) = +-0 */
3967 if (float16_is_infinity(f)) {
3968 return float16_set_sign(float16_zero, sign);
3969 }
3970
3971 /* frec7(+-0) = +-inf */
3972 if (float16_is_zero(f)) {
3973 s->float_exception_flags |= float_flag_divbyzero;
3974 return float16_set_sign(float16_infinity, sign);
3975 }
3976
3977 /* frec7(sNaN) = canonical NaN */
3978 if (float16_is_signaling_nan(f, s)) {
3979 s->float_exception_flags |= float_flag_invalid;
3980 return float16_default_nan(s);
3981 }
3982
3983 /* frec7(qNaN) = canonical NaN */
3984 if (float16_is_quiet_nan(f, s)) {
3985 return float16_default_nan(s);
3986 }
3987
3988 /* +-normal, +-subnormal */
3989 uint64_t val = frec7(f, exp_size, frac_size, s);
3990 return make_float16(val);
3991}
3992
3993static float32 frec7_s(float32 f, float_status *s)
3994{
3995 int exp_size = 8, frac_size = 23;
3996 bool sign = float32_is_neg(f);
3997
3998 /* frec7(+-inf) = +-0 */
3999 if (float32_is_infinity(f)) {
4000 return float32_set_sign(float32_zero, sign);
4001 }
4002
4003 /* frec7(+-0) = +-inf */
4004 if (float32_is_zero(f)) {
4005 s->float_exception_flags |= float_flag_divbyzero;
4006 return float32_set_sign(float32_infinity, sign);
4007 }
4008
4009 /* frec7(sNaN) = canonical NaN */
4010 if (float32_is_signaling_nan(f, s)) {
4011 s->float_exception_flags |= float_flag_invalid;
4012 return float32_default_nan(s);
4013 }
4014
4015 /* frec7(qNaN) = canonical NaN */
4016 if (float32_is_quiet_nan(f, s)) {
4017 return float32_default_nan(s);
4018 }
4019
4020 /* +-normal, +-subnormal */
4021 uint64_t val = frec7(f, exp_size, frac_size, s);
4022 return make_float32(val);
4023}
4024
4025static float64 frec7_d(float64 f, float_status *s)
4026{
4027 int exp_size = 11, frac_size = 52;
4028 bool sign = float64_is_neg(f);
4029
4030 /* frec7(+-inf) = +-0 */
4031 if (float64_is_infinity(f)) {
4032 return float64_set_sign(float64_zero, sign);
4033 }
4034
4035 /* frec7(+-0) = +-inf */
4036 if (float64_is_zero(f)) {
4037 s->float_exception_flags |= float_flag_divbyzero;
4038 return float64_set_sign(float64_infinity, sign);
4039 }
4040
4041 /* frec7(sNaN) = canonical NaN */
4042 if (float64_is_signaling_nan(f, s)) {
4043 s->float_exception_flags |= float_flag_invalid;
4044 return float64_default_nan(s);
4045 }
4046
4047 /* frec7(qNaN) = canonical NaN */
4048 if (float64_is_quiet_nan(f, s)) {
4049 return float64_default_nan(s);
4050 }
4051
4052 /* +-normal, +-subnormal */
4053 uint64_t val = frec7(f, exp_size, frac_size, s);
4054 return make_float64(val);
4055}
4056
4057RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4058RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4059RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 4060GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4061GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4062GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 4063
230b53dd 4064/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
4065RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4066RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4067RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 4068GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4069GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4070GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
4071RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4072RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4073RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 4074GEN_VEXT_VF(vfmin_vf_h, 2)
4075GEN_VEXT_VF(vfmin_vf_w, 4)
4076GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 4077
49c5611a
FC
4078RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4079RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4080RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 4081GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4082GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4083GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
4084RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4085RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4086RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 4087GEN_VEXT_VF(vfmax_vf_h, 2)
4088GEN_VEXT_VF(vfmax_vf_w, 4)
4089GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
4090
4091/* Vector Floating-Point Sign-Injection Instructions */
4092static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4093{
4094 return deposit64(b, 0, 15, a);
4095}
4096
4097static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4098{
4099 return deposit64(b, 0, 31, a);
4100}
4101
4102static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4103{
4104 return deposit64(b, 0, 63, a);
4105}
4106
4107RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4108RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4109RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 4110GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4111GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4112GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
4113RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4114RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4115RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 4116GEN_VEXT_VF(vfsgnj_vf_h, 2)
4117GEN_VEXT_VF(vfsgnj_vf_w, 4)
4118GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
4119
4120static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4121{
4122 return deposit64(~b, 0, 15, a);
4123}
4124
4125static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4126{
4127 return deposit64(~b, 0, 31, a);
4128}
4129
4130static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4131{
4132 return deposit64(~b, 0, 63, a);
4133}
4134
4135RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4136RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4137RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 4138GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4139GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4140GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
4141RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4142RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4143RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 4144GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4145GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4146GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
4147
4148static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4149{
4150 return deposit64(b ^ a, 0, 15, a);
4151}
4152
4153static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4154{
4155 return deposit64(b ^ a, 0, 31, a);
4156}
4157
4158static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4159{
4160 return deposit64(b ^ a, 0, 63, a);
4161}
4162
4163RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4164RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4165RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 4166GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4167GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4168GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
4169RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4170RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4171RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 4172GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4173GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4174GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
4175
4176/* Vector Floating-Point Compare Instructions */
4177#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4178void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4179 CPURISCVState *env, uint32_t desc) \
4180{ \
2a68e9e5
LZ
4181 uint32_t vm = vext_vm(desc); \
4182 uint32_t vl = env->vl; \
86247c51 4183 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5eacf7d8 4184 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4185 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4186 uint32_t i; \
4187 \
f714361e 4188 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4189 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4190 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4191 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4192 /* set masked-off elements to 1s */ \
4193 if (vma) { \
4194 vext_set_elem_mask(vd, i, 1); \
4195 } \
2a68e9e5
LZ
4196 continue; \
4197 } \
f9298de5 4198 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4199 DO_OP(s2, s1, &env->fp_status)); \
4200 } \
f714361e 4201 env->vstart = 0; \
3b57254d
WL
4202 /*
4203 * mask destination register are always tail-agnostic
4204 * set tail elements to 1s
4205 */ \
5eacf7d8 4206 if (vta_all_1s) { \
4207 for (; i < total_elems; i++) { \
4208 vext_set_elem_mask(vd, i, 1); \
4209 } \
4210 } \
2a68e9e5
LZ
4211}
4212
2a68e9e5
LZ
4213GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4214GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4215GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4216
4217#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4218void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4219 CPURISCVState *env, uint32_t desc) \
4220{ \
2a68e9e5
LZ
4221 uint32_t vm = vext_vm(desc); \
4222 uint32_t vl = env->vl; \
86247c51 4223 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5eacf7d8 4224 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4225 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4226 uint32_t i; \
4227 \
f714361e 4228 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4229 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4230 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4231 /* set masked-off elements to 1s */ \
4232 if (vma) { \
4233 vext_set_elem_mask(vd, i, 1); \
4234 } \
2a68e9e5
LZ
4235 continue; \
4236 } \
f9298de5 4237 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4238 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4239 } \
f714361e 4240 env->vstart = 0; \
3b57254d
WL
4241 /*
4242 * mask destination register are always tail-agnostic
4243 * set tail elements to 1s
4244 */ \
5eacf7d8 4245 if (vta_all_1s) { \
4246 for (; i < total_elems; i++) { \
4247 vext_set_elem_mask(vd, i, 1); \
4248 } \
4249 } \
2a68e9e5
LZ
4250}
4251
4252GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4253GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4254GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4255
4256static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4257{
4258 FloatRelation compare = float16_compare_quiet(a, b, s);
4259 return compare != float_relation_equal;
4260}
4261
4262static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4263{
4264 FloatRelation compare = float32_compare_quiet(a, b, s);
4265 return compare != float_relation_equal;
4266}
4267
4268static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4269{
4270 FloatRelation compare = float64_compare_quiet(a, b, s);
4271 return compare != float_relation_equal;
4272}
4273
4274GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4275GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4276GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4277GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4278GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4279GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4280
2a68e9e5
LZ
4281GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4282GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4283GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4284GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4285GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4286GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4287
2a68e9e5
LZ
4288GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4289GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4290GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4291GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4292GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4293GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4294
4295static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4296{
4297 FloatRelation compare = float16_compare(a, b, s);
4298 return compare == float_relation_greater;
4299}
4300
4301static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4302{
4303 FloatRelation compare = float32_compare(a, b, s);
4304 return compare == float_relation_greater;
4305}
4306
4307static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4308{
4309 FloatRelation compare = float64_compare(a, b, s);
4310 return compare == float_relation_greater;
4311}
4312
4313GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4314GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4315GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4316
4317static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4318{
4319 FloatRelation compare = float16_compare(a, b, s);
4320 return compare == float_relation_greater ||
4321 compare == float_relation_equal;
4322}
4323
4324static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4325{
4326 FloatRelation compare = float32_compare(a, b, s);
4327 return compare == float_relation_greater ||
4328 compare == float_relation_equal;
4329}
4330
4331static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4332{
4333 FloatRelation compare = float64_compare(a, b, s);
4334 return compare == float_relation_greater ||
4335 compare == float_relation_equal;
4336}
4337
4338GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4339GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4340GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4341
121ddbb3
LZ
4342/* Vector Floating-Point Classify Instruction */
4343#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4344static void do_##NAME(void *vd, void *vs2, int i) \
4345{ \
4346 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4347 *((TD *)vd + HD(i)) = OP(s2); \
4348}
4349
5eacf7d8 4350#define GEN_VEXT_V(NAME, ESZ) \
121ddbb3
LZ
4351void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4352 CPURISCVState *env, uint32_t desc) \
4353{ \
121ddbb3
LZ
4354 uint32_t vm = vext_vm(desc); \
4355 uint32_t vl = env->vl; \
5eacf7d8 4356 uint32_t total_elems = \
4357 vext_get_total_elems(env, desc, ESZ); \
4358 uint32_t vta = vext_vta(desc); \
5b448f44 4359 uint32_t vma = vext_vma(desc); \
121ddbb3
LZ
4360 uint32_t i; \
4361 \
f714361e 4362 for (i = env->vstart; i < vl; i++) { \
f9298de5 4363 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4364 /* set masked-off elements to 1s */ \
4365 vext_set_elems_1s(vd, vma, i * ESZ, \
4366 (i + 1) * ESZ); \
121ddbb3
LZ
4367 continue; \
4368 } \
4369 do_##NAME(vd, vs2, i); \
4370 } \
f714361e 4371 env->vstart = 0; \
5eacf7d8 4372 /* set tail elements to 1s */ \
4373 vext_set_elems_1s(vd, vta, vl * ESZ, \
4374 total_elems * ESZ); \
121ddbb3
LZ
4375}
4376
4377target_ulong fclass_h(uint64_t frs1)
4378{
4379 float16 f = frs1;
4380 bool sign = float16_is_neg(f);
4381
4382 if (float16_is_infinity(f)) {
4383 return sign ? 1 << 0 : 1 << 7;
4384 } else if (float16_is_zero(f)) {
4385 return sign ? 1 << 3 : 1 << 4;
4386 } else if (float16_is_zero_or_denormal(f)) {
4387 return sign ? 1 << 2 : 1 << 5;
4388 } else if (float16_is_any_nan(f)) {
4389 float_status s = { }; /* for snan_bit_is_one */
4390 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4391 } else {
4392 return sign ? 1 << 1 : 1 << 6;
4393 }
4394}
4395
4396target_ulong fclass_s(uint64_t frs1)
4397{
4398 float32 f = frs1;
4399 bool sign = float32_is_neg(f);
4400
4401 if (float32_is_infinity(f)) {
4402 return sign ? 1 << 0 : 1 << 7;
4403 } else if (float32_is_zero(f)) {
4404 return sign ? 1 << 3 : 1 << 4;
4405 } else if (float32_is_zero_or_denormal(f)) {
4406 return sign ? 1 << 2 : 1 << 5;
4407 } else if (float32_is_any_nan(f)) {
4408 float_status s = { }; /* for snan_bit_is_one */
4409 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4410 } else {
4411 return sign ? 1 << 1 : 1 << 6;
4412 }
4413}
4414
4415target_ulong fclass_d(uint64_t frs1)
4416{
4417 float64 f = frs1;
4418 bool sign = float64_is_neg(f);
4419
4420 if (float64_is_infinity(f)) {
4421 return sign ? 1 << 0 : 1 << 7;
4422 } else if (float64_is_zero(f)) {
4423 return sign ? 1 << 3 : 1 << 4;
4424 } else if (float64_is_zero_or_denormal(f)) {
4425 return sign ? 1 << 2 : 1 << 5;
4426 } else if (float64_is_any_nan(f)) {
4427 float_status s = { }; /* for snan_bit_is_one */
4428 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4429 } else {
4430 return sign ? 1 << 1 : 1 << 6;
4431 }
4432}
4433
4434RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4435RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4436RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4437GEN_VEXT_V(vfclass_v_h, 2)
4438GEN_VEXT_V(vfclass_v_w, 4)
4439GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4440
4441/* Vector Floating-Point Merge Instruction */
5eacf7d8 4442
3479a814 4443#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4444void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4445 CPURISCVState *env, uint32_t desc) \
4446{ \
64ab5846
LZ
4447 uint32_t vm = vext_vm(desc); \
4448 uint32_t vl = env->vl; \
5eacf7d8 4449 uint32_t esz = sizeof(ETYPE); \
4450 uint32_t total_elems = \
4451 vext_get_total_elems(env, desc, esz); \
4452 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4453 uint32_t i; \
4454 \
f714361e 4455 for (i = env->vstart; i < vl; i++) { \
64ab5846 4456 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
c45eff30
WL
4457 *((ETYPE *)vd + H(i)) = \
4458 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4459 } \
f714361e 4460 env->vstart = 0; \
5eacf7d8 4461 /* set tail elements to 1s */ \
4462 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4463}
4464
3479a814
FC
4465GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4466GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4467GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4468
4469/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4470/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4471RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4472RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4473RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4474GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4475GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4476GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4477
4478/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4479RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4480RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4481RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4482GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4483GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4484GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4485
4486/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4487RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4488RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4489RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4490GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4491GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4492GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4493
4494/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4495RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4496RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4497RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4498GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4499GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4500GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4501
4502/* Widening Floating-Point/Integer Type-Convert Instructions */
4503/* (TD, T2, TX2) */
3ce4c09d 4504#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4505#define WOP_UU_H uint32_t, uint16_t, uint16_t
4506#define WOP_UU_W uint64_t, uint32_t, uint32_t
3b57254d
WL
4507/*
4508 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4509 */
4514b7b1
LZ
4510RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4511RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4512GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4513GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4514
4515/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4516RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4517RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4518GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4519GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1 4520
246f8796
WL
4521/*
4522 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4523 */
3ce4c09d 4524RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4525RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4526RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4527GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4528GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4529GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4530
4531/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4532RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4533RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4534RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4535GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4536GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4537GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4538
4539/*
246f8796 4540 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4514b7b1
LZ
4541 */
4542static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4543{
4544 return float16_to_float32(a, true, s);
4545}
4546
4547RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4548RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4549GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4550GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e
LZ
4551
4552/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4553/* (TD, T2, TX2) */
ff679b58 4554#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4555#define NOP_UU_H uint16_t, uint32_t, uint32_t
4556#define NOP_UU_W uint32_t, uint64_t, uint64_t
4557/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4558RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4559RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4560RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4561GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4562GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4563GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4564
4565/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4566RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4567RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4568RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4569GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4570GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4571GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e 4572
246f8796
WL
4573/*
4574 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4575 */
ff679b58
FC
4576RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4577RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4578GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4579GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4580
4581/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4582RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4583RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4584GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4585GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4586
4587/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4588static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4589{
4590 return float32_to_float16(a, true, s);
4591}
4592
ff679b58
FC
4593RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4594RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4595GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4596GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1
LZ
4597
4598/*
3b57254d 4599 * Vector Reduction Operations
fe5c9ab1
LZ
4600 */
4601/* Vector Single-Width Integer Reduction Instructions */
3479a814 4602#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1 4603void HELPER(NAME)(void *vd, void *v0, void *vs1, \
c45eff30
WL
4604 void *vs2, CPURISCVState *env, \
4605 uint32_t desc) \
fe5c9ab1 4606{ \
fe5c9ab1
LZ
4607 uint32_t vm = vext_vm(desc); \
4608 uint32_t vl = env->vl; \
df4f52a7 4609 uint32_t esz = sizeof(TD); \
4610 uint32_t vlenb = simd_maxsz(desc); \
4611 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4612 uint32_t i; \
fe5c9ab1
LZ
4613 TD s1 = *((TD *)vs1 + HD(0)); \
4614 \
f714361e 4615 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4616 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4617 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4618 continue; \
4619 } \
4620 s1 = OP(s1, (TD)s2); \
4621 } \
4622 *((TD *)vd + HD(0)) = s1; \
f714361e 4623 env->vstart = 0; \
df4f52a7 4624 /* set tail elements to 1s */ \
4625 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4626}
4627
4628/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4629GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4630GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4631GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4632GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4633
4634/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4635GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4636GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4637GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4638GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4639
4640/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4641GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4642GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4643GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4644GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4645
4646/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4647GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4648GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4649GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4650GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4651
4652/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4653GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4654GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4655GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4656GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4657
4658/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4659GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4660GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4661GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4662GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4663
4664/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4665GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4666GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4667GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4668GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4669
4670/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4671GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4672GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4673GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4674GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4675
4676/* Vector Widening Integer Reduction Instructions */
4677/* signed sum reduction into double-width accumulator */
3479a814
FC
4678GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4679GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4680GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4681
4682/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4683GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4684GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4685GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4686
4687/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4688#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4689void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4690 void *vs2, CPURISCVState *env, \
4691 uint32_t desc) \
4692{ \
523547f1
LZ
4693 uint32_t vm = vext_vm(desc); \
4694 uint32_t vl = env->vl; \
df4f52a7 4695 uint32_t esz = sizeof(TD); \
4696 uint32_t vlenb = simd_maxsz(desc); \
4697 uint32_t vta = vext_vta(desc); \
523547f1 4698 uint32_t i; \
523547f1
LZ
4699 TD s1 = *((TD *)vs1 + HD(0)); \
4700 \
f714361e 4701 for (i = env->vstart; i < vl; i++) { \
523547f1 4702 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4703 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4704 continue; \
4705 } \
4706 s1 = OP(s1, (TD)s2, &env->fp_status); \
4707 } \
4708 *((TD *)vd + HD(0)) = s1; \
f714361e 4709 env->vstart = 0; \
df4f52a7 4710 /* set tail elements to 1s */ \
4711 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4712}
4713
4714/* Unordered sum */
a3ab69f9
YL
4715GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4716GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4717GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4718
4719/* Ordered sum */
4720GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4721GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4722GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4723
4724/* Maximum value */
246f8796
WL
4725GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4726 float16_maximum_number)
4727GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4728 float32_maximum_number)
4729GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4730 float64_maximum_number)
523547f1
LZ
4731
4732/* Minimum value */
246f8796
WL
4733GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4734 float16_minimum_number)
4735GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4736 float32_minimum_number)
4737GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4738 float64_minimum_number)
696b0c26 4739
5bda21c0
YL
4740/* Vector Widening Floating-Point Add Instructions */
4741static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
696b0c26 4742{
5bda21c0 4743 return float32_add(a, float16_to_float32(b, true, s), s);
696b0c26
LZ
4744}
4745
5bda21c0 4746static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
696b0c26 4747{
5bda21c0 4748 return float64_add(a, float32_to_float64(b, s), s);
696b0c26 4749}
c21f34ae 4750
5bda21c0 4751/* Vector Widening Floating-Point Reduction Instructions */
a3ab69f9
YL
4752/* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4753GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4754GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4755GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4756GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
5bda21c0 4757
c21f34ae 4758/*
3b57254d 4759 * Vector Mask Operations
c21f34ae
LZ
4760 */
4761/* Vector Mask-Register Logical Instructions */
4762#define GEN_VEXT_MASK_VV(NAME, OP) \
4763void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4764 void *vs2, CPURISCVState *env, \
4765 uint32_t desc) \
4766{ \
c21f34ae 4767 uint32_t vl = env->vl; \
86247c51 4768 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
acc6ffd4 4769 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4770 uint32_t i; \
4771 int a, b; \
4772 \
f714361e 4773 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4774 a = vext_elem_mask(vs1, i); \
4775 b = vext_elem_mask(vs2, i); \
4776 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4777 } \
f714361e 4778 env->vstart = 0; \
3b57254d
WL
4779 /*
4780 * mask destination register are always tail-agnostic
4781 * set tail elements to 1s
acc6ffd4 4782 */ \
acc6ffd4 4783 if (vta_all_1s) { \
4784 for (; i < total_elems; i++) { \
4785 vext_set_elem_mask(vd, i, 1); \
4786 } \
4787 } \
c21f34ae
LZ
4788}
4789
4790#define DO_NAND(N, M) (!(N & M))
4791#define DO_ANDNOT(N, M) (N & !M)
4792#define DO_NOR(N, M) (!(N | M))
4793#define DO_ORNOT(N, M) (N | !M)
4794#define DO_XNOR(N, M) (!(N ^ M))
4795
4796GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4797GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4798GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4799GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4800GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4801GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4802GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4803GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4804
0014aa74
FC
4805/* Vector count population in mask vcpop */
4806target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4807 uint32_t desc)
2e88f551
LZ
4808{
4809 target_ulong cnt = 0;
2e88f551
LZ
4810 uint32_t vm = vext_vm(desc);
4811 uint32_t vl = env->vl;
4812 int i;
4813
f714361e 4814 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4815 if (vm || vext_elem_mask(v0, i)) {
4816 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4817 cnt++;
4818 }
4819 }
4820 }
f714361e 4821 env->vstart = 0;
2e88f551
LZ
4822 return cnt;
4823}
0db67e1c 4824
3b57254d 4825/* vfirst find-first-set mask bit */
d71a24fc
FC
4826target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4827 uint32_t desc)
0db67e1c 4828{
0db67e1c
LZ
4829 uint32_t vm = vext_vm(desc);
4830 uint32_t vl = env->vl;
4831 int i;
4832
f714361e 4833 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4834 if (vm || vext_elem_mask(v0, i)) {
4835 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4836 return i;
4837 }
4838 }
4839 }
f714361e 4840 env->vstart = 0;
0db67e1c
LZ
4841 return -1LL;
4842}
81fbf7da
LZ
4843
4844enum set_mask_type {
4845 ONLY_FIRST = 1,
4846 INCLUDE_FIRST,
4847 BEFORE_FIRST,
4848};
4849
4850static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4851 uint32_t desc, enum set_mask_type type)
4852{
81fbf7da
LZ
4853 uint32_t vm = vext_vm(desc);
4854 uint32_t vl = env->vl;
86247c51 4855 uint32_t total_elems = riscv_cpu_cfg(env)->vlen;
acc6ffd4 4856 uint32_t vta_all_1s = vext_vta_all_1s(desc);
35f2d795 4857 uint32_t vma = vext_vma(desc);
81fbf7da
LZ
4858 int i;
4859 bool first_mask_bit = false;
4860
f714361e 4861 for (i = env->vstart; i < vl; i++) {
f9298de5 4862 if (!vm && !vext_elem_mask(v0, i)) {
35f2d795
YTC
4863 /* set masked-off elements to 1s */
4864 if (vma) {
4865 vext_set_elem_mask(vd, i, 1);
4866 }
81fbf7da
LZ
4867 continue;
4868 }
4869 /* write a zero to all following active elements */
4870 if (first_mask_bit) {
f9298de5 4871 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4872 continue;
4873 }
f9298de5 4874 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4875 first_mask_bit = true;
4876 if (type == BEFORE_FIRST) {
f9298de5 4877 vext_set_elem_mask(vd, i, 0);
81fbf7da 4878 } else {
f9298de5 4879 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4880 }
4881 } else {
4882 if (type == ONLY_FIRST) {
f9298de5 4883 vext_set_elem_mask(vd, i, 0);
81fbf7da 4884 } else {
f9298de5 4885 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4886 }
4887 }
4888 }
f714361e 4889 env->vstart = 0;
3b57254d
WL
4890 /*
4891 * mask destination register are always tail-agnostic
4892 * set tail elements to 1s
4893 */
acc6ffd4 4894 if (vta_all_1s) {
4895 for (; i < total_elems; i++) {
4896 vext_set_elem_mask(vd, i, 1);
4897 }
4898 }
81fbf7da
LZ
4899}
4900
4901void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4902 uint32_t desc)
4903{
4904 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4905}
4906
4907void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4908 uint32_t desc)
4909{
4910 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4911}
4912
4913void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4914 uint32_t desc)
4915{
4916 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4917}
78d90cfe
LZ
4918
4919/* Vector Iota Instruction */
3479a814 4920#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4921void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4922 uint32_t desc) \
4923{ \
78d90cfe
LZ
4924 uint32_t vm = vext_vm(desc); \
4925 uint32_t vl = env->vl; \
acc6ffd4 4926 uint32_t esz = sizeof(ETYPE); \
4927 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4928 uint32_t vta = vext_vta(desc); \
35f2d795 4929 uint32_t vma = vext_vma(desc); \
78d90cfe
LZ
4930 uint32_t sum = 0; \
4931 int i; \
4932 \
f714361e 4933 for (i = env->vstart; i < vl; i++) { \
f9298de5 4934 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4935 /* set masked-off elements to 1s */ \
4936 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
78d90cfe
LZ
4937 continue; \
4938 } \
4939 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4940 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4941 sum++; \
4942 } \
4943 } \
f714361e 4944 env->vstart = 0; \
acc6ffd4 4945 /* set tail elements to 1s */ \
4946 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4947}
4948
3479a814
FC
4949GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4950GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4951GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4952GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4953
4954/* Vector Element Index Instruction */
3479a814 4955#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4956void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4957{ \
126bec3f
LZ
4958 uint32_t vm = vext_vm(desc); \
4959 uint32_t vl = env->vl; \
acc6ffd4 4960 uint32_t esz = sizeof(ETYPE); \
4961 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4962 uint32_t vta = vext_vta(desc); \
35f2d795 4963 uint32_t vma = vext_vma(desc); \
126bec3f
LZ
4964 int i; \
4965 \
f714361e 4966 for (i = env->vstart; i < vl; i++) { \
f9298de5 4967 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4968 /* set masked-off elements to 1s */ \
4969 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
126bec3f
LZ
4970 continue; \
4971 } \
4972 *((ETYPE *)vd + H(i)) = i; \
4973 } \
f714361e 4974 env->vstart = 0; \
acc6ffd4 4975 /* set tail elements to 1s */ \
4976 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4977}
4978
3479a814
FC
4979GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4980GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4981GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4982GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4983
4984/*
3b57254d 4985 * Vector Permutation Instructions
ec17e036
LZ
4986 */
4987
4988/* Vector Slide Instructions */
3479a814 4989#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4990void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4991 CPURISCVState *env, uint32_t desc) \
4992{ \
ec17e036
LZ
4993 uint32_t vm = vext_vm(desc); \
4994 uint32_t vl = env->vl; \
803963f7 4995 uint32_t esz = sizeof(ETYPE); \
4996 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4997 uint32_t vta = vext_vta(desc); \
edabcd0e 4998 uint32_t vma = vext_vma(desc); \
f714361e 4999 target_ulong offset = s1, i_min, i; \
ec17e036 5000 \
f714361e
FC
5001 i_min = MAX(env->vstart, offset); \
5002 for (i = i_min; i < vl; i++) { \
f9298de5 5003 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5004 /* set masked-off elements to 1s */ \
5005 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
ec17e036
LZ
5006 continue; \
5007 } \
5008 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5009 } \
803963f7 5010 /* set tail elements to 1s */ \
5011 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
5012}
5013
5014/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
5015GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5016GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5017GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5018GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 5019
3479a814 5020#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
5021void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5022 CPURISCVState *env, uint32_t desc) \
5023{ \
6438ed61 5024 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
5025 uint32_t vm = vext_vm(desc); \
5026 uint32_t vl = env->vl; \
803963f7 5027 uint32_t esz = sizeof(ETYPE); \
5028 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5029 uint32_t vta = vext_vta(desc); \
edabcd0e 5030 uint32_t vma = vext_vma(desc); \
6438ed61 5031 target_ulong i_max, i; \
ec17e036 5032 \
f714361e
FC
5033 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
5034 for (i = env->vstart; i < i_max; ++i) { \
edabcd0e
YTC
5035 if (!vm && !vext_elem_mask(v0, i)) { \
5036 /* set masked-off elements to 1s */ \
5037 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5038 continue; \
6438ed61 5039 } \
edabcd0e 5040 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
6438ed61
FC
5041 } \
5042 \
5043 for (i = i_max; i < vl; ++i) { \
5044 if (vm || vext_elem_mask(v0, i)) { \
5045 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 5046 } \
ec17e036 5047 } \
f714361e
FC
5048 \
5049 env->vstart = 0; \
803963f7 5050 /* set tail elements to 1s */ \
5051 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
5052}
5053
5054/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
5055GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5056GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5057GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5058GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 5059
c7b8a421 5060#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
8c89d50c 5061static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
5062 void *vs2, CPURISCVState *env, \
5063 uint32_t desc) \
8500d4ab 5064{ \
c7b8a421 5065 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5066 uint32_t vm = vext_vm(desc); \
5067 uint32_t vl = env->vl; \
803963f7 5068 uint32_t esz = sizeof(ETYPE); \
5069 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5070 uint32_t vta = vext_vta(desc); \
edabcd0e 5071 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
5072 uint32_t i; \
5073 \
f714361e 5074 for (i = env->vstart; i < vl; i++) { \
8500d4ab 5075 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5076 /* set masked-off elements to 1s */ \
5077 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
5078 continue; \
5079 } \
5080 if (i == 0) { \
5081 *((ETYPE *)vd + H(i)) = s1; \
5082 } else { \
5083 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5084 } \
5085 } \
f714361e 5086 env->vstart = 0; \
803963f7 5087 /* set tail elements to 1s */ \
5088 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5089}
5090
5091GEN_VEXT_VSLIE1UP(8, H1)
5092GEN_VEXT_VSLIE1UP(16, H2)
5093GEN_VEXT_VSLIE1UP(32, H4)
5094GEN_VEXT_VSLIE1UP(64, H8)
5095
c7b8a421 5096#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
5097void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5098 CPURISCVState *env, uint32_t desc) \
5099{ \
c7b8a421 5100 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5101}
5102
5103/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
5104GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5105GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5106GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5107GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5108
c7b8a421 5109#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
8c89d50c 5110static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
5111 void *vs2, CPURISCVState *env, \
5112 uint32_t desc) \
8500d4ab 5113{ \
c7b8a421 5114 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5115 uint32_t vm = vext_vm(desc); \
5116 uint32_t vl = env->vl; \
803963f7 5117 uint32_t esz = sizeof(ETYPE); \
5118 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5119 uint32_t vta = vext_vta(desc); \
edabcd0e 5120 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
5121 uint32_t i; \
5122 \
f714361e 5123 for (i = env->vstart; i < vl; i++) { \
8500d4ab 5124 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5125 /* set masked-off elements to 1s */ \
5126 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
5127 continue; \
5128 } \
5129 if (i == vl - 1) { \
5130 *((ETYPE *)vd + H(i)) = s1; \
5131 } else { \
5132 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5133 } \
5134 } \
f714361e 5135 env->vstart = 0; \
803963f7 5136 /* set tail elements to 1s */ \
5137 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5138}
5139
5140GEN_VEXT_VSLIDE1DOWN(8, H1)
5141GEN_VEXT_VSLIDE1DOWN(16, H2)
5142GEN_VEXT_VSLIDE1DOWN(32, H4)
5143GEN_VEXT_VSLIDE1DOWN(64, H8)
5144
c7b8a421 5145#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
5146void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5147 CPURISCVState *env, uint32_t desc) \
5148{ \
c7b8a421 5149 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5150}
5151
5152/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
5153GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5154GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5155GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5156GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5157
5158/* Vector Floating-Point Slide Instructions */
c7b8a421 5159#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
5160void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5161 CPURISCVState *env, uint32_t desc) \
5162{ \
c7b8a421 5163 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5164}
5165
5166/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5167GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5168GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5169GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5170
c7b8a421 5171#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
5172void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5173 CPURISCVState *env, uint32_t desc) \
5174{ \
c7b8a421 5175 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5176}
5177
5178/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5179GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5180GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5181GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
5182
5183/* Vector Register Gather Instruction */
50bfb45b 5184#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
5185void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5186 CPURISCVState *env, uint32_t desc) \
5187{ \
f714361e 5188 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
5189 uint32_t vm = vext_vm(desc); \
5190 uint32_t vl = env->vl; \
803963f7 5191 uint32_t esz = sizeof(TS2); \
5192 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5193 uint32_t vta = vext_vta(desc); \
edabcd0e 5194 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5195 uint64_t index; \
5196 uint32_t i; \
e4b83d5c 5197 \
f714361e 5198 for (i = env->vstart; i < vl; i++) { \
f9298de5 5199 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5200 /* set masked-off elements to 1s */ \
5201 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5202 continue; \
5203 } \
50bfb45b 5204 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 5205 if (index >= vlmax) { \
50bfb45b 5206 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 5207 } else { \
50bfb45b 5208 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
5209 } \
5210 } \
f714361e 5211 env->vstart = 0; \
803963f7 5212 /* set tail elements to 1s */ \
5213 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5214}
5215
5216/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
5217GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5218GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5219GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5220GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5221
5222GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5223GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5224GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5225GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5226
3479a814 5227#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5228void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5229 CPURISCVState *env, uint32_t desc) \
5230{ \
5a9f8e15 5231 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5232 uint32_t vm = vext_vm(desc); \
5233 uint32_t vl = env->vl; \
803963f7 5234 uint32_t esz = sizeof(ETYPE); \
5235 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5236 uint32_t vta = vext_vta(desc); \
edabcd0e 5237 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5238 uint64_t index = s1; \
5239 uint32_t i; \
e4b83d5c 5240 \
f714361e 5241 for (i = env->vstart; i < vl; i++) { \
f9298de5 5242 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5243 /* set masked-off elements to 1s */ \
5244 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5245 continue; \
5246 } \
5247 if (index >= vlmax) { \
5248 *((ETYPE *)vd + H(i)) = 0; \
5249 } else { \
5250 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5251 } \
5252 } \
f714361e 5253 env->vstart = 0; \
803963f7 5254 /* set tail elements to 1s */ \
5255 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5256}
5257
5258/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5259GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5260GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5261GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5262GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5263
5264/* Vector Compress Instruction */
3479a814 5265#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5266void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5267 CPURISCVState *env, uint32_t desc) \
5268{ \
31bf42a2 5269 uint32_t vl = env->vl; \
803963f7 5270 uint32_t esz = sizeof(ETYPE); \
5271 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5272 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5273 uint32_t num = 0, i; \
5274 \
f714361e 5275 for (i = env->vstart; i < vl; i++) { \
f9298de5 5276 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5277 continue; \
5278 } \
5279 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5280 num++; \
5281 } \
f714361e 5282 env->vstart = 0; \
803963f7 5283 /* set tail elements to 1s */ \
5284 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5285}
5286
5287/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5288GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5289GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5290GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5291GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5292
f714361e 5293/* Vector Whole Register Move */
f32d82f6
WL
5294void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5295{
f06193c4 5296 /* EEW = SEW */
f32d82f6 5297 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5298 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5299 uint32_t startb = env->vstart * sewb;
5300 uint32_t i = startb;
f32d82f6
WL
5301
5302 memcpy((uint8_t *)vd + H1(i),
5303 (uint8_t *)vs2 + H1(i),
f06193c4 5304 maxsz - startb);
f714361e 5305
f32d82f6
WL
5306 env->vstart = 0;
5307}
f714361e 5308
cd01340e
FC
5309/* Vector Integer Extension */
5310#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5311void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5312 CPURISCVState *env, uint32_t desc) \
5313{ \
5314 uint32_t vl = env->vl; \
5315 uint32_t vm = vext_vm(desc); \
803963f7 5316 uint32_t esz = sizeof(ETYPE); \
5317 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5318 uint32_t vta = vext_vta(desc); \
edabcd0e 5319 uint32_t vma = vext_vma(desc); \
cd01340e
FC
5320 uint32_t i; \
5321 \
f714361e 5322 for (i = env->vstart; i < vl; i++) { \
cd01340e 5323 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5324 /* set masked-off elements to 1s */ \
5325 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
cd01340e
FC
5326 continue; \
5327 } \
5328 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5329 } \
f714361e 5330 env->vstart = 0; \
803963f7 5331 /* set tail elements to 1s */ \
5332 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5333}
5334
5335GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5336GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5337GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5338GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5339GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5340GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5341
5342GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5343GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5344GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5345GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5346GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5347GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)