]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
Merge tag 'dirtylimit-dirtyrate-pull-request-20231010' of https://github.com/newfrida...
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc 24#include "exec/exec-all.h"
09b07f28 25#include "exec/cpu_ldst.h"
2b7168fc 26#include "exec/helper-proto.h"
ce2a0343 27#include "fpu/softfloat.h"
751538d5
LZ
28#include "tcg/tcg-gvec-desc.h"
29#include "internals.h"
98f40dd2 30#include "vector_internals.h"
2b7168fc
LZ
31#include <math.h>
32
33target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
34 target_ulong s2)
35{
36 int vlmax, vl;
37 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 38 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
39 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
40 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
41 int xlen = riscv_cpu_xlen(env);
42 bool vill = (s2 >> (xlen - 1)) & 0x1;
43 target_ulong reserved = s2 &
44 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
45 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 46
d9b7609a 47 if (lmul & 4) {
32be3250 48 /* Fractional LMUL - check LMUL * VLEN >= SEW */
d9b7609a 49 if (lmul == 4 ||
32be3250 50 cpu->cfg.vlen >> (8 - lmul) < sew) {
d9b7609a
FC
51 vill = true;
52 }
53 }
54
c45eff30 55 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
2b7168fc 56 /* only set vill bit. */
d96a271a
LZ
57 env->vill = 1;
58 env->vtype = 0;
2b7168fc
LZ
59 env->vl = 0;
60 env->vstart = 0;
61 return 0;
62 }
63
64 vlmax = vext_get_vlmax(cpu, s2);
65 if (s1 <= vlmax) {
66 vl = s1;
67 } else {
68 vl = vlmax;
69 }
70 env->vl = vl;
71 env->vtype = s2;
72 env->vstart = 0;
ac6bcf4d 73 env->vill = 0;
2b7168fc
LZ
74 return vl;
75}
751538d5 76
751538d5 77/*
5a9f8e15 78 * Get the maximum number of elements can be operated.
751538d5 79 *
c7b8a421 80 * log2_esz: log2 of element size in bytes.
751538d5 81 */
c7b8a421 82static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 83{
5a9f8e15 84 /*
8a4b5257 85 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
86 * so vlen in bytes (vlenb) is encoded as maxsz.
87 */
88 uint32_t vlenb = simd_maxsz(desc);
89
90 /* Return VLMAX */
c7b8a421 91 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 92 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
93}
94
d6b9d930
LZ
95static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
96{
7b945bdc 97 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
d6b9d930
LZ
98}
99
751538d5
LZ
100/*
101 * This function checks watchpoint before real load operation.
102 *
7893e42d 103 * In system mode, the TLB API probe_access is enough for watchpoint check.
751538d5
LZ
104 * In user mode, there is no watchpoint support now.
105 *
106 * It will trigger an exception if there is no mapping in TLB
107 * and page table walk can't fill the TLB entry. Then the guest
108 * software can return here after process the exception or never return.
109 */
110static void probe_pages(CPURISCVState *env, target_ulong addr,
111 target_ulong len, uintptr_t ra,
112 MMUAccessType access_type)
113{
114 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
115 target_ulong curlen = MIN(pagelen, len);
116
d6b9d930 117 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
118 cpu_mmu_index(env, false), ra);
119 if (len > curlen) {
120 addr += curlen;
121 curlen = len - curlen;
d6b9d930 122 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
123 cpu_mmu_index(env, false), ra);
124 }
125}
126
f9298de5
FC
127static inline void vext_set_elem_mask(void *v0, int index,
128 uint8_t value)
3a6f8f68 129{
f9298de5
FC
130 int idx = index / 64;
131 int pos = index % 64;
3a6f8f68 132 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 133 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 134}
751538d5 135
751538d5 136/* elements operations for load and store */
022b9bce 137typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
751538d5 138 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 139
79556fb6 140#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
141static void NAME(CPURISCVState *env, abi_ptr addr, \
142 uint32_t idx, void *vd, uintptr_t retaddr)\
143{ \
751538d5 144 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 145 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
146} \
147
79556fb6
FC
148GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
149GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
150GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
151GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
152
153#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
154static void NAME(CPURISCVState *env, abi_ptr addr, \
155 uint32_t idx, void *vd, uintptr_t retaddr)\
156{ \
157 ETYPE data = *((ETYPE *)vd + H(idx)); \
158 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
159}
160
751538d5
LZ
161GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
162GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
163GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
164GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
165
949b6bcb
XW
166static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
167 uint32_t desc, uint32_t nf,
e130683f
DHB
168 uint32_t esz, uint32_t max_elems)
169{
e130683f 170 uint32_t vta = vext_vta(desc);
e130683f
DHB
171 int k;
172
bc0ec52e
DHB
173 if (vta == 0) {
174 return;
175 }
176
e130683f
DHB
177 for (k = 0; k < nf; ++k) {
178 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
179 (k * max_elems + max_elems) * esz);
180 }
e130683f
DHB
181}
182
751538d5 183/*
3b57254d 184 * stride: access vector element from strided memory
751538d5
LZ
185 */
186static void
187vext_ldst_stride(void *vd, void *v0, target_ulong base,
188 target_ulong stride, CPURISCVState *env,
189 uint32_t desc, uint32_t vm,
3479a814 190 vext_ldst_elem_fn *ldst_elem,
c7b8a421 191 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
192{
193 uint32_t i, k;
194 uint32_t nf = vext_nf(desc);
c7b8a421 195 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 196 uint32_t esz = 1 << log2_esz;
265ecd4c 197 uint32_t vma = vext_vma(desc);
751538d5 198
f714361e 199 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
751538d5 200 k = 0;
751538d5 201 while (k < nf) {
265ecd4c
YTC
202 if (!vm && !vext_elem_mask(v0, i)) {
203 /* set masked-off elements to 1s */
204 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
205 (i + k * max_elems + 1) * esz);
206 k++;
207 continue;
208 }
c7b8a421 209 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 210 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
211 k++;
212 }
213 }
f714361e 214 env->vstart = 0;
e130683f 215
949b6bcb 216 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
751538d5
LZ
217}
218
79556fb6 219#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
220void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
221 target_ulong stride, CPURISCVState *env, \
222 uint32_t desc) \
223{ \
224 uint32_t vm = vext_vm(desc); \
225 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 226 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
227}
228
79556fb6
FC
229GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
230GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
231GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
232GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
233
234#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
235void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
236 target_ulong stride, CPURISCVState *env, \
237 uint32_t desc) \
238{ \
239 uint32_t vm = vext_vm(desc); \
240 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 241 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
242}
243
79556fb6
FC
244GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
245GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
246GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
247GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
248
249/*
3b57254d 250 * unit-stride: access elements stored contiguously in memory
751538d5
LZ
251 */
252
3b57254d 253/* unmasked unit-stride load and store operation */
751538d5
LZ
254static void
255vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 256 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 257 uintptr_t ra)
751538d5
LZ
258{
259 uint32_t i, k;
260 uint32_t nf = vext_nf(desc);
c7b8a421 261 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 262 uint32_t esz = 1 << log2_esz;
751538d5 263
751538d5 264 /* load bytes from guest memory */
5c89e9c0 265 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
266 k = 0;
267 while (k < nf) {
c7b8a421 268 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 269 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
270 k++;
271 }
272 }
f714361e 273 env->vstart = 0;
e130683f 274
949b6bcb 275 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
751538d5
LZ
276}
277
278/*
246f8796 279 * masked unit-stride load and store operation will be a special case of
fba59e0f 280 * stride, stride = NF * sizeof (ETYPE)
751538d5
LZ
281 */
282
79556fb6 283#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
284void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
285 CPURISCVState *env, uint32_t desc) \
286{ \
5a9f8e15 287 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 288 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 289 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
290} \
291 \
292void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
293 CPURISCVState *env, uint32_t desc) \
294{ \
3479a814 295 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 296 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
297}
298
79556fb6
FC
299GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
300GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
301GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
302GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
303
5c89e9c0
FC
304#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
305void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
306 CPURISCVState *env, uint32_t desc) \
307{ \
308 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
309 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 310 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
311} \
312 \
313void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
314 CPURISCVState *env, uint32_t desc) \
315{ \
316 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 317 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
318}
319
79556fb6
FC
320GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
321GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
322GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
323GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 324
26086aea 325/*
3b57254d 326 * unit stride mask load and store, EEW = 1
26086aea
FC
327 */
328void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
329 CPURISCVState *env, uint32_t desc)
330{
331 /* evl = ceil(vl/8) */
332 uint8_t evl = (env->vl + 7) >> 3;
333 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 334 0, evl, GETPC());
26086aea
FC
335}
336
337void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
338 CPURISCVState *env, uint32_t desc)
339{
340 /* evl = ceil(vl/8) */
341 uint8_t evl = (env->vl + 7) >> 3;
342 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 343 0, evl, GETPC());
26086aea
FC
344}
345
f732560e 346/*
3b57254d 347 * index: access vector element from indexed memory
f732560e
LZ
348 */
349typedef target_ulong vext_get_index_addr(target_ulong base,
350 uint32_t idx, void *vs2);
351
352#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
353static target_ulong NAME(target_ulong base, \
354 uint32_t idx, void *vs2) \
355{ \
356 return (base + *((ETYPE *)vs2 + H(idx))); \
357}
358
83fcd573
FC
359GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
360GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
361GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
362GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
363
364static inline void
365vext_ldst_index(void *vd, void *v0, target_ulong base,
366 void *vs2, CPURISCVState *env, uint32_t desc,
367 vext_get_index_addr get_index_addr,
368 vext_ldst_elem_fn *ldst_elem,
c7b8a421 369 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
370{
371 uint32_t i, k;
372 uint32_t nf = vext_nf(desc);
373 uint32_t vm = vext_vm(desc);
c7b8a421 374 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 375 uint32_t esz = 1 << log2_esz;
265ecd4c 376 uint32_t vma = vext_vma(desc);
f732560e 377
f732560e 378 /* load bytes from guest memory */
f714361e 379 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f714361e 380 k = 0;
f732560e 381 while (k < nf) {
265ecd4c
YTC
382 if (!vm && !vext_elem_mask(v0, i)) {
383 /* set masked-off elements to 1s */
384 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
385 (i + k * max_elems + 1) * esz);
386 k++;
387 continue;
388 }
c7b8a421 389 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 390 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
391 k++;
392 }
393 }
f714361e 394 env->vstart = 0;
e130683f 395
949b6bcb 396 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
f732560e
LZ
397}
398
08b9d0ed 399#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
400void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
401 void *vs2, CPURISCVState *env, uint32_t desc) \
402{ \
403 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 404 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
405}
406
08b9d0ed
FC
407GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
408GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
409GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
410GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
411GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
412GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
413GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
414GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
415GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
416GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
417GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
418GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
419GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
420GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
421GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
422GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
423
424#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
425void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
426 void *vs2, CPURISCVState *env, uint32_t desc) \
427{ \
428 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 429 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 430 GETPC()); \
f732560e
LZ
431}
432
08b9d0ed
FC
433GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
434GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
435GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
436GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
437GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
438GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
439GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
440GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
441GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
442GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
443GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
444GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
445GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
446GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
447GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
448GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
449
450/*
3b57254d 451 * unit-stride fault-only-fisrt load instructions
022b4ecf
LZ
452 */
453static inline void
454vext_ldff(void *vd, void *v0, target_ulong base,
455 CPURISCVState *env, uint32_t desc,
456 vext_ldst_elem_fn *ldst_elem,
c7b8a421 457 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
458{
459 void *host;
460 uint32_t i, k, vl = 0;
022b4ecf
LZ
461 uint32_t nf = vext_nf(desc);
462 uint32_t vm = vext_vm(desc);
c7b8a421 463 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 464 uint32_t esz = 1 << log2_esz;
265ecd4c 465 uint32_t vma = vext_vma(desc);
022b4ecf
LZ
466 target_ulong addr, offset, remain;
467
3b57254d 468 /* probe every access */
f714361e 469 for (i = env->vstart; i < env->vl; i++) {
f9298de5 470 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
471 continue;
472 }
c7b8a421 473 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 474 if (i == 0) {
c7b8a421 475 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
476 } else {
477 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 478 remain = nf << log2_esz;
022b4ecf
LZ
479 while (remain > 0) {
480 offset = -(addr | TARGET_PAGE_MASK);
481 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
482 cpu_mmu_index(env, false));
483 if (host) {
484#ifdef CONFIG_USER_ONLY
4cc9f284 485 if (!page_check_range(addr, offset, PAGE_READ)) {
022b4ecf
LZ
486 vl = i;
487 goto ProbeSuccess;
488 }
489#else
01d09525 490 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
491#endif
492 } else {
493 vl = i;
494 goto ProbeSuccess;
495 }
496 if (remain <= offset) {
497 break;
498 }
499 remain -= offset;
d6b9d930 500 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
501 }
502 }
503 }
504ProbeSuccess:
505 /* load bytes from guest memory */
506 if (vl != 0) {
507 env->vl = vl;
508 }
f714361e 509 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 510 k = 0;
022b4ecf 511 while (k < nf) {
265ecd4c
YTC
512 if (!vm && !vext_elem_mask(v0, i)) {
513 /* set masked-off elements to 1s */
514 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
515 (i + k * max_elems + 1) * esz);
516 k++;
517 continue;
518 }
f3f65c40 519 addr = base + ((i * nf + k) << log2_esz);
d6b9d930 520 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
521 k++;
522 }
523 }
f714361e 524 env->vstart = 0;
e130683f 525
949b6bcb 526 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
022b4ecf
LZ
527}
528
d3e5e2ff
FC
529#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
530void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
531 CPURISCVState *env, uint32_t desc) \
532{ \
533 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 534 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
535}
536
d3e5e2ff
FC
537GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
538GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
539GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
540GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 541
268fcca6
LZ
542#define DO_SWAP(N, M) (M)
543#define DO_AND(N, M) (N & M)
544#define DO_XOR(N, M) (N ^ M)
545#define DO_OR(N, M) (N | M)
546#define DO_ADD(N, M) (N + M)
547
268fcca6
LZ
548/* Signed min/max */
549#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
550#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
551
30206bd8 552/*
3b57254d 553 * load and store whole register instructions
30206bd8
FC
554 */
555static void
556vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 557 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 558{
f714361e 559 uint32_t i, k, off, pos;
30206bd8 560 uint32_t nf = vext_nf(desc);
86247c51 561 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
c7b8a421 562 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 563
f714361e
FC
564 k = env->vstart / max_elems;
565 off = env->vstart % max_elems;
30206bd8 566
f714361e
FC
567 if (off) {
568 /* load/store rest of elements of current segment pointed by vstart */
569 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 570 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
246f8796
WL
571 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd,
572 ra);
f714361e
FC
573 }
574 k++;
575 }
576
577 /* load/store elements for rest of segments */
578 for (; k < nf; k++) {
579 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 580 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 581 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
582 }
583 }
f714361e
FC
584
585 env->vstart = 0;
30206bd8
FC
586}
587
588#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
589void HELPER(NAME)(void *vd, target_ulong base, \
590 CPURISCVState *env, uint32_t desc) \
591{ \
592 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 593 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
594}
595
596GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
597GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
598GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
599GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
600GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
601GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
602GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
603GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
604GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
605GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
606GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
607GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
608GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
609GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
610GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
611GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
612
613#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
614void HELPER(NAME)(void *vd, target_ulong base, \
615 CPURISCVState *env, uint32_t desc) \
616{ \
617 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 618 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
619}
620
621GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
622GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
623GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
624GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
625
43740e3a 626/*
3b57254d 627 * Vector Integer Arithmetic Instructions
43740e3a
LZ
628 */
629
43740e3a
LZ
630/* (TD, T1, T2, TX1, TX2) */
631#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
632#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
633#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
634#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
958b85f3
LZ
635#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
636#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
637#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
638#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
639#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
640#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
641#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
642#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
643#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
644#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
645#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
646#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
647#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
648#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
649#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
650#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
651#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
652#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
653#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a 654
43740e3a
LZ
655#define DO_SUB(N, M) (N - M)
656#define DO_RSUB(N, M) (M - N)
657
658RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
659RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
660RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
661RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
662RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
663RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
664RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
665RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
666
f1eed927 667GEN_VEXT_VV(vadd_vv_b, 1)
668GEN_VEXT_VV(vadd_vv_h, 2)
669GEN_VEXT_VV(vadd_vv_w, 4)
670GEN_VEXT_VV(vadd_vv_d, 8)
671GEN_VEXT_VV(vsub_vv_b, 1)
672GEN_VEXT_VV(vsub_vv_h, 2)
673GEN_VEXT_VV(vsub_vv_w, 4)
674GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a 675
43740e3a
LZ
676
677RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
678RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
679RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
680RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
681RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
682RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
683RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
684RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
685RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
686RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
687RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
688RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
689
5c19fc15 690GEN_VEXT_VX(vadd_vx_b, 1)
691GEN_VEXT_VX(vadd_vx_h, 2)
692GEN_VEXT_VX(vadd_vx_w, 4)
693GEN_VEXT_VX(vadd_vx_d, 8)
694GEN_VEXT_VX(vsub_vx_b, 1)
695GEN_VEXT_VX(vsub_vx_h, 2)
696GEN_VEXT_VX(vsub_vx_w, 4)
697GEN_VEXT_VX(vsub_vx_d, 8)
698GEN_VEXT_VX(vrsub_vx_b, 1)
699GEN_VEXT_VX(vrsub_vx_h, 2)
700GEN_VEXT_VX(vrsub_vx_w, 4)
701GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
702
703void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
704{
705 intptr_t oprsz = simd_oprsz(desc);
706 intptr_t i;
707
708 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
709 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
710 }
711}
712
713void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
714{
715 intptr_t oprsz = simd_oprsz(desc);
716 intptr_t i;
717
718 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
719 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
720 }
721}
722
723void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
724{
725 intptr_t oprsz = simd_oprsz(desc);
726 intptr_t i;
727
728 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
729 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
730 }
731}
732
733void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
734{
735 intptr_t oprsz = simd_oprsz(desc);
736 intptr_t i;
737
738 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
739 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
740 }
741}
8fcdf776
LZ
742
743/* Vector Widening Integer Add/Subtract */
744#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
745#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
746#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
747#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
748#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
749#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
750#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
751#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
752#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
753#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
754#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
755#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
756RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
757RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
758RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
759RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
760RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
761RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
762RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
763RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
764RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
765RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
766RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
767RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
768RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
769RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
770RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
771RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
772RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
773RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
774RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
775RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
776RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
777RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
778RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
779RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 780GEN_VEXT_VV(vwaddu_vv_b, 2)
781GEN_VEXT_VV(vwaddu_vv_h, 4)
782GEN_VEXT_VV(vwaddu_vv_w, 8)
783GEN_VEXT_VV(vwsubu_vv_b, 2)
784GEN_VEXT_VV(vwsubu_vv_h, 4)
785GEN_VEXT_VV(vwsubu_vv_w, 8)
786GEN_VEXT_VV(vwadd_vv_b, 2)
787GEN_VEXT_VV(vwadd_vv_h, 4)
788GEN_VEXT_VV(vwadd_vv_w, 8)
789GEN_VEXT_VV(vwsub_vv_b, 2)
790GEN_VEXT_VV(vwsub_vv_h, 4)
791GEN_VEXT_VV(vwsub_vv_w, 8)
792GEN_VEXT_VV(vwaddu_wv_b, 2)
793GEN_VEXT_VV(vwaddu_wv_h, 4)
794GEN_VEXT_VV(vwaddu_wv_w, 8)
795GEN_VEXT_VV(vwsubu_wv_b, 2)
796GEN_VEXT_VV(vwsubu_wv_h, 4)
797GEN_VEXT_VV(vwsubu_wv_w, 8)
798GEN_VEXT_VV(vwadd_wv_b, 2)
799GEN_VEXT_VV(vwadd_wv_h, 4)
800GEN_VEXT_VV(vwadd_wv_w, 8)
801GEN_VEXT_VV(vwsub_wv_b, 2)
802GEN_VEXT_VV(vwsub_wv_h, 4)
803GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
804
805RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
806RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
807RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
808RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
809RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
810RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
811RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
812RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
813RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
814RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
815RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
816RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
817RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
818RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
819RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
820RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
821RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
822RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
823RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
824RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
825RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
826RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
827RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
828RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 829GEN_VEXT_VX(vwaddu_vx_b, 2)
830GEN_VEXT_VX(vwaddu_vx_h, 4)
831GEN_VEXT_VX(vwaddu_vx_w, 8)
832GEN_VEXT_VX(vwsubu_vx_b, 2)
833GEN_VEXT_VX(vwsubu_vx_h, 4)
834GEN_VEXT_VX(vwsubu_vx_w, 8)
835GEN_VEXT_VX(vwadd_vx_b, 2)
836GEN_VEXT_VX(vwadd_vx_h, 4)
837GEN_VEXT_VX(vwadd_vx_w, 8)
838GEN_VEXT_VX(vwsub_vx_b, 2)
839GEN_VEXT_VX(vwsub_vx_h, 4)
840GEN_VEXT_VX(vwsub_vx_w, 8)
841GEN_VEXT_VX(vwaddu_wx_b, 2)
842GEN_VEXT_VX(vwaddu_wx_h, 4)
843GEN_VEXT_VX(vwaddu_wx_w, 8)
844GEN_VEXT_VX(vwsubu_wx_b, 2)
845GEN_VEXT_VX(vwsubu_wx_h, 4)
846GEN_VEXT_VX(vwsubu_wx_w, 8)
847GEN_VEXT_VX(vwadd_wx_b, 2)
848GEN_VEXT_VX(vwadd_wx_h, 4)
849GEN_VEXT_VX(vwadd_wx_w, 8)
850GEN_VEXT_VX(vwsub_wx_b, 2)
851GEN_VEXT_VX(vwsub_wx_h, 4)
852GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
853
854/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
855#define DO_VADC(N, M, C) (N + M + C)
856#define DO_VSBC(N, M, C) (N - M - C)
857
3479a814 858#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
859void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
860 CPURISCVState *env, uint32_t desc) \
861{ \
3a6f8f68 862 uint32_t vl = env->vl; \
5c19fc15 863 uint32_t esz = sizeof(ETYPE); \
864 uint32_t total_elems = \
865 vext_get_total_elems(env, desc, esz); \
866 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
867 uint32_t i; \
868 \
f714361e 869 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
870 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
871 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 872 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
873 \
874 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
875 } \
f714361e 876 env->vstart = 0; \
5c19fc15 877 /* set tail elements to 1s */ \
878 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
879}
880
3479a814
FC
881GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
882GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
883GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
884GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 885
3479a814
FC
886GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
887GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
888GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
889GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 890
3479a814 891#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
892void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
893 CPURISCVState *env, uint32_t desc) \
894{ \
3a6f8f68 895 uint32_t vl = env->vl; \
5c19fc15 896 uint32_t esz = sizeof(ETYPE); \
897 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
898 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
899 uint32_t i; \
900 \
f714361e 901 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 902 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 903 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
904 \
905 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
906 } \
c45eff30 907 env->vstart = 0; \
5c19fc15 908 /* set tail elements to 1s */ \
909 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
910}
911
3479a814
FC
912GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
913GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
914GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
915GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 916
3479a814
FC
917GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
918GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
919GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
920GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
921
922#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
923 (__typeof(N))(N + M) < N)
924#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
925
926#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
927void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
928 CPURISCVState *env, uint32_t desc) \
929{ \
3a6f8f68 930 uint32_t vl = env->vl; \
bb45485a 931 uint32_t vm = vext_vm(desc); \
86247c51 932 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5c19fc15 933 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
934 uint32_t i; \
935 \
f714361e 936 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
937 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
938 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 939 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 940 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 941 } \
f714361e 942 env->vstart = 0; \
3b57254d
WL
943 /*
944 * mask destination register are always tail-agnostic
945 * set tail elements to 1s
946 */ \
5c19fc15 947 if (vta_all_1s) { \
948 for (; i < total_elems; i++) { \
949 vext_set_elem_mask(vd, i, 1); \
950 } \
951 } \
3a6f8f68
LZ
952}
953
954GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
955GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
956GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
957GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
958
959GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
960GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
961GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
962GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
963
964#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
965void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
966 void *vs2, CPURISCVState *env, uint32_t desc) \
967{ \
3a6f8f68 968 uint32_t vl = env->vl; \
bb45485a 969 uint32_t vm = vext_vm(desc); \
86247c51 970 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5c19fc15 971 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
972 uint32_t i; \
973 \
f714361e 974 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 975 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 976 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 977 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
978 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
979 } \
f714361e 980 env->vstart = 0; \
3b57254d
WL
981 /*
982 * mask destination register are always tail-agnostic
983 * set tail elements to 1s
984 */ \
5c19fc15 985 if (vta_all_1s) { \
986 for (; i < total_elems; i++) { \
987 vext_set_elem_mask(vd, i, 1); \
988 } \
989 } \
3a6f8f68
LZ
990}
991
992GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
993GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
994GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
995GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
996
997GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
998GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
999GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1000GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1001
1002/* Vector Bitwise Logical Instructions */
1003RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1004RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1005RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1006RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1007RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1008RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1009RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1010RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1011RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1012RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1013RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1014RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1015GEN_VEXT_VV(vand_vv_b, 1)
1016GEN_VEXT_VV(vand_vv_h, 2)
1017GEN_VEXT_VV(vand_vv_w, 4)
1018GEN_VEXT_VV(vand_vv_d, 8)
1019GEN_VEXT_VV(vor_vv_b, 1)
1020GEN_VEXT_VV(vor_vv_h, 2)
1021GEN_VEXT_VV(vor_vv_w, 4)
1022GEN_VEXT_VV(vor_vv_d, 8)
1023GEN_VEXT_VV(vxor_vv_b, 1)
1024GEN_VEXT_VV(vxor_vv_h, 2)
1025GEN_VEXT_VV(vxor_vv_w, 4)
1026GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1027
1028RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1029RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1030RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1031RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1032RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1033RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1034RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1035RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1036RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1037RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1038RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1039RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1040GEN_VEXT_VX(vand_vx_b, 1)
1041GEN_VEXT_VX(vand_vx_h, 2)
1042GEN_VEXT_VX(vand_vx_w, 4)
1043GEN_VEXT_VX(vand_vx_d, 8)
1044GEN_VEXT_VX(vor_vx_b, 1)
1045GEN_VEXT_VX(vor_vx_h, 2)
1046GEN_VEXT_VX(vor_vx_w, 4)
1047GEN_VEXT_VX(vor_vx_d, 8)
1048GEN_VEXT_VX(vxor_vx_b, 1)
1049GEN_VEXT_VX(vxor_vx_h, 2)
1050GEN_VEXT_VX(vxor_vx_w, 4)
1051GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1052
1053/* Vector Single-Width Bit Shift Instructions */
1054#define DO_SLL(N, M) (N << (M))
1055#define DO_SRL(N, M) (N >> (M))
1056
1057/* generate the helpers for shift instructions with two vector operators */
3479a814 1058#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1059void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1060 void *vs2, CPURISCVState *env, uint32_t desc) \
1061{ \
3277d955
LZ
1062 uint32_t vm = vext_vm(desc); \
1063 uint32_t vl = env->vl; \
7b1bff41 1064 uint32_t esz = sizeof(TS1); \
1065 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1066 uint32_t vta = vext_vta(desc); \
fd93045e 1067 uint32_t vma = vext_vma(desc); \
3277d955
LZ
1068 uint32_t i; \
1069 \
f714361e 1070 for (i = env->vstart; i < vl; i++) { \
f9298de5 1071 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1072 /* set masked-off elements to 1s */ \
1073 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
3277d955
LZ
1074 continue; \
1075 } \
1076 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1077 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1078 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1079 } \
f714361e 1080 env->vstart = 0; \
7b1bff41 1081 /* set tail elements to 1s */ \
1082 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1083}
1084
3479a814
FC
1085GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1086GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1087GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1088GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1089
3479a814
FC
1090GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1091GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1092GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1093GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1094
3479a814
FC
1095GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1096GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1097GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1098GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1099
246f8796
WL
1100/*
1101 * generate the helpers for shift instructions with one vector and one scalar
1102 */
3479a814
FC
1103#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1104void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
1105 void *vs2, CPURISCVState *env, \
1106 uint32_t desc) \
3479a814
FC
1107{ \
1108 uint32_t vm = vext_vm(desc); \
1109 uint32_t vl = env->vl; \
7b1bff41 1110 uint32_t esz = sizeof(TD); \
1111 uint32_t total_elems = \
1112 vext_get_total_elems(env, desc, esz); \
1113 uint32_t vta = vext_vta(desc); \
fd93045e 1114 uint32_t vma = vext_vma(desc); \
3479a814
FC
1115 uint32_t i; \
1116 \
f714361e 1117 for (i = env->vstart; i < vl; i++) { \
3479a814 1118 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1119 /* set masked-off elements to 1s */ \
1120 vext_set_elems_1s(vd, vma, i * esz, \
1121 (i + 1) * esz); \
3479a814
FC
1122 continue; \
1123 } \
1124 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1125 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1126 } \
f714361e 1127 env->vstart = 0; \
7b1bff41 1128 /* set tail elements to 1s */ \
1129 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1130}
1131
1132GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1133GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1134GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1135GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1136
1137GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1138GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1139GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1140GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1141
1142GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1143GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1144GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1145GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1146
1147/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1148GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1149GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1150GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1151GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1152GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1153GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1154GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1155GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1156GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1157GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1158GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1159GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1160
1161/* Vector Integer Comparison Instructions */
1162#define DO_MSEQ(N, M) (N == M)
1163#define DO_MSNE(N, M) (N != M)
1164#define DO_MSLT(N, M) (N < M)
1165#define DO_MSLE(N, M) (N <= M)
1166#define DO_MSGT(N, M) (N > M)
1167
1168#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1169void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1170 CPURISCVState *env, uint32_t desc) \
1171{ \
1366fc79
LZ
1172 uint32_t vm = vext_vm(desc); \
1173 uint32_t vl = env->vl; \
86247c51 1174 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
38581e5c 1175 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1176 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1177 uint32_t i; \
1178 \
f714361e 1179 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1180 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1181 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1182 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1183 /* set masked-off elements to 1s */ \
1184 if (vma) { \
1185 vext_set_elem_mask(vd, i, 1); \
1186 } \
1366fc79
LZ
1187 continue; \
1188 } \
f9298de5 1189 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1190 } \
f714361e 1191 env->vstart = 0; \
3b57254d
WL
1192 /*
1193 * mask destination register are always tail-agnostic
1194 * set tail elements to 1s
1195 */ \
38581e5c 1196 if (vta_all_1s) { \
1197 for (; i < total_elems; i++) { \
1198 vext_set_elem_mask(vd, i, 1); \
1199 } \
1200 } \
1366fc79
LZ
1201}
1202
1203GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1204GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1205GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1206GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1207
1208GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1209GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1210GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1211GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1212
1213GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1214GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1215GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1216GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1217
1218GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1219GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1220GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1221GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1222
1223GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1224GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1225GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1226GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1227
1228GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1229GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1230GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1231GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1232
1233#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1234void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1235 CPURISCVState *env, uint32_t desc) \
1236{ \
1366fc79
LZ
1237 uint32_t vm = vext_vm(desc); \
1238 uint32_t vl = env->vl; \
86247c51 1239 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
38581e5c 1240 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1241 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1242 uint32_t i; \
1243 \
f714361e 1244 for (i = env->vstart; i < vl; i++) { \
1366fc79 1245 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1246 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1247 /* set masked-off elements to 1s */ \
1248 if (vma) { \
1249 vext_set_elem_mask(vd, i, 1); \
1250 } \
1366fc79
LZ
1251 continue; \
1252 } \
f9298de5 1253 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1254 DO_OP(s2, (ETYPE)(target_long)s1)); \
1255 } \
f714361e 1256 env->vstart = 0; \
3b57254d
WL
1257 /*
1258 * mask destination register are always tail-agnostic
1259 * set tail elements to 1s
1260 */ \
38581e5c 1261 if (vta_all_1s) { \
1262 for (; i < total_elems; i++) { \
1263 vext_set_elem_mask(vd, i, 1); \
1264 } \
1265 } \
1366fc79
LZ
1266}
1267
1268GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1269GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1270GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1271GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1272
1273GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1274GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1275GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1276GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1277
1278GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1279GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1280GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1281GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1282
1283GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1284GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1285GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1286GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1287
1288GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1289GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1290GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1291GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1292
1293GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1294GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1295GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1296GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1297
1298GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1299GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1300GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1301GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1302
1303GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1304GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1305GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1306GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1307
1308/* Vector Integer Min/Max Instructions */
1309RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1310RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1311RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1312RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1313RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1314RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1315RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1316RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1317RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1318RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1319RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1320RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1321RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1322RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1323RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1324RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1325GEN_VEXT_VV(vminu_vv_b, 1)
1326GEN_VEXT_VV(vminu_vv_h, 2)
1327GEN_VEXT_VV(vminu_vv_w, 4)
1328GEN_VEXT_VV(vminu_vv_d, 8)
1329GEN_VEXT_VV(vmin_vv_b, 1)
1330GEN_VEXT_VV(vmin_vv_h, 2)
1331GEN_VEXT_VV(vmin_vv_w, 4)
1332GEN_VEXT_VV(vmin_vv_d, 8)
1333GEN_VEXT_VV(vmaxu_vv_b, 1)
1334GEN_VEXT_VV(vmaxu_vv_h, 2)
1335GEN_VEXT_VV(vmaxu_vv_w, 4)
1336GEN_VEXT_VV(vmaxu_vv_d, 8)
1337GEN_VEXT_VV(vmax_vv_b, 1)
1338GEN_VEXT_VV(vmax_vv_h, 2)
1339GEN_VEXT_VV(vmax_vv_w, 4)
1340GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1341
1342RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1343RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1344RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1345RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1346RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1347RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1348RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1349RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1350RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1351RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1352RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1353RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1354RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1355RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1356RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1357RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1358GEN_VEXT_VX(vminu_vx_b, 1)
1359GEN_VEXT_VX(vminu_vx_h, 2)
1360GEN_VEXT_VX(vminu_vx_w, 4)
1361GEN_VEXT_VX(vminu_vx_d, 8)
1362GEN_VEXT_VX(vmin_vx_b, 1)
1363GEN_VEXT_VX(vmin_vx_h, 2)
1364GEN_VEXT_VX(vmin_vx_w, 4)
1365GEN_VEXT_VX(vmin_vx_d, 8)
1366GEN_VEXT_VX(vmaxu_vx_b, 1)
1367GEN_VEXT_VX(vmaxu_vx_h, 2)
1368GEN_VEXT_VX(vmaxu_vx_w, 4)
1369GEN_VEXT_VX(vmaxu_vx_d, 8)
1370GEN_VEXT_VX(vmax_vx_b, 1)
1371GEN_VEXT_VX(vmax_vx_h, 2)
1372GEN_VEXT_VX(vmax_vx_w, 4)
1373GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1374
1375/* Vector Single-Width Integer Multiply Instructions */
1376#define DO_MUL(N, M) (N * M)
1377RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1378RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1379RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1380RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1381GEN_VEXT_VV(vmul_vv_b, 1)
1382GEN_VEXT_VV(vmul_vv_h, 2)
1383GEN_VEXT_VV(vmul_vv_w, 4)
1384GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1385
1386static int8_t do_mulh_b(int8_t s2, int8_t s1)
1387{
1388 return (int16_t)s2 * (int16_t)s1 >> 8;
1389}
1390
1391static int16_t do_mulh_h(int16_t s2, int16_t s1)
1392{
1393 return (int32_t)s2 * (int32_t)s1 >> 16;
1394}
1395
1396static int32_t do_mulh_w(int32_t s2, int32_t s1)
1397{
1398 return (int64_t)s2 * (int64_t)s1 >> 32;
1399}
1400
1401static int64_t do_mulh_d(int64_t s2, int64_t s1)
1402{
1403 uint64_t hi_64, lo_64;
1404
1405 muls64(&lo_64, &hi_64, s1, s2);
1406 return hi_64;
1407}
1408
1409static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1410{
1411 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1412}
1413
1414static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1415{
1416 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1417}
1418
1419static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1420{
1421 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1422}
1423
1424static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1425{
1426 uint64_t hi_64, lo_64;
1427
1428 mulu64(&lo_64, &hi_64, s2, s1);
1429 return hi_64;
1430}
1431
1432static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1433{
1434 return (int16_t)s2 * (uint16_t)s1 >> 8;
1435}
1436
1437static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1438{
1439 return (int32_t)s2 * (uint32_t)s1 >> 16;
1440}
1441
1442static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1443{
1444 return (int64_t)s2 * (uint64_t)s1 >> 32;
1445}
1446
1447/*
1448 * Let A = signed operand,
1449 * B = unsigned operand
1450 * P = mulu64(A, B), unsigned product
1451 *
1452 * LET X = 2 ** 64 - A, 2's complement of A
1453 * SP = signed product
1454 * THEN
1455 * IF A < 0
1456 * SP = -X * B
1457 * = -(2 ** 64 - A) * B
1458 * = A * B - 2 ** 64 * B
1459 * = P - 2 ** 64 * B
1460 * ELSE
1461 * SP = P
1462 * THEN
1463 * HI_P -= (A < 0 ? B : 0)
1464 */
1465
1466static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1467{
1468 uint64_t hi_64, lo_64;
1469
1470 mulu64(&lo_64, &hi_64, s2, s1);
1471
1472 hi_64 -= s2 < 0 ? s1 : 0;
1473 return hi_64;
1474}
1475
1476RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1477RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1478RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1479RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1480RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1481RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1482RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1483RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1484RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1485RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1486RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1487RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1488GEN_VEXT_VV(vmulh_vv_b, 1)
1489GEN_VEXT_VV(vmulh_vv_h, 2)
1490GEN_VEXT_VV(vmulh_vv_w, 4)
1491GEN_VEXT_VV(vmulh_vv_d, 8)
1492GEN_VEXT_VV(vmulhu_vv_b, 1)
1493GEN_VEXT_VV(vmulhu_vv_h, 2)
1494GEN_VEXT_VV(vmulhu_vv_w, 4)
1495GEN_VEXT_VV(vmulhu_vv_d, 8)
1496GEN_VEXT_VV(vmulhsu_vv_b, 1)
1497GEN_VEXT_VV(vmulhsu_vv_h, 2)
1498GEN_VEXT_VV(vmulhsu_vv_w, 4)
1499GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1500
1501RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1502RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1503RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1504RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1505RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1506RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1507RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1508RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1509RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1510RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1511RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1512RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1513RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1514RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1515RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1516RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1517GEN_VEXT_VX(vmul_vx_b, 1)
1518GEN_VEXT_VX(vmul_vx_h, 2)
1519GEN_VEXT_VX(vmul_vx_w, 4)
1520GEN_VEXT_VX(vmul_vx_d, 8)
1521GEN_VEXT_VX(vmulh_vx_b, 1)
1522GEN_VEXT_VX(vmulh_vx_h, 2)
1523GEN_VEXT_VX(vmulh_vx_w, 4)
1524GEN_VEXT_VX(vmulh_vx_d, 8)
1525GEN_VEXT_VX(vmulhu_vx_b, 1)
1526GEN_VEXT_VX(vmulhu_vx_h, 2)
1527GEN_VEXT_VX(vmulhu_vx_w, 4)
1528GEN_VEXT_VX(vmulhu_vx_d, 8)
1529GEN_VEXT_VX(vmulhsu_vx_b, 1)
1530GEN_VEXT_VX(vmulhsu_vx_h, 2)
1531GEN_VEXT_VX(vmulhsu_vx_w, 4)
1532GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1533
1534/* Vector Integer Divide Instructions */
1535#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1536#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
c45eff30 1537#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
85e6658c 1538 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
c45eff30 1539#define DO_REM(N, M) (unlikely(M == 0) ? N : \
85e6658c
LZ
1540 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1541
1542RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1543RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1544RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1545RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1546RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1547RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1548RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1549RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1550RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1551RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1552RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1553RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1554RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1555RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1556RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1557RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1558GEN_VEXT_VV(vdivu_vv_b, 1)
1559GEN_VEXT_VV(vdivu_vv_h, 2)
1560GEN_VEXT_VV(vdivu_vv_w, 4)
1561GEN_VEXT_VV(vdivu_vv_d, 8)
1562GEN_VEXT_VV(vdiv_vv_b, 1)
1563GEN_VEXT_VV(vdiv_vv_h, 2)
1564GEN_VEXT_VV(vdiv_vv_w, 4)
1565GEN_VEXT_VV(vdiv_vv_d, 8)
1566GEN_VEXT_VV(vremu_vv_b, 1)
1567GEN_VEXT_VV(vremu_vv_h, 2)
1568GEN_VEXT_VV(vremu_vv_w, 4)
1569GEN_VEXT_VV(vremu_vv_d, 8)
1570GEN_VEXT_VV(vrem_vv_b, 1)
1571GEN_VEXT_VV(vrem_vv_h, 2)
1572GEN_VEXT_VV(vrem_vv_w, 4)
1573GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1574
1575RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1576RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1577RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1578RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1579RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1580RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1581RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1582RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1583RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1584RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1585RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1586RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1587RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1588RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1589RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1590RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1591GEN_VEXT_VX(vdivu_vx_b, 1)
1592GEN_VEXT_VX(vdivu_vx_h, 2)
1593GEN_VEXT_VX(vdivu_vx_w, 4)
1594GEN_VEXT_VX(vdivu_vx_d, 8)
1595GEN_VEXT_VX(vdiv_vx_b, 1)
1596GEN_VEXT_VX(vdiv_vx_h, 2)
1597GEN_VEXT_VX(vdiv_vx_w, 4)
1598GEN_VEXT_VX(vdiv_vx_d, 8)
1599GEN_VEXT_VX(vremu_vx_b, 1)
1600GEN_VEXT_VX(vremu_vx_h, 2)
1601GEN_VEXT_VX(vremu_vx_w, 4)
1602GEN_VEXT_VX(vremu_vx_d, 8)
1603GEN_VEXT_VX(vrem_vx_b, 1)
1604GEN_VEXT_VX(vrem_vx_h, 2)
1605GEN_VEXT_VX(vrem_vx_w, 4)
1606GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1607
1608/* Vector Widening Integer Multiply Instructions */
1609RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1610RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1611RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1612RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1613RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1614RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1615RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1616RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1617RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1618GEN_VEXT_VV(vwmul_vv_b, 2)
1619GEN_VEXT_VV(vwmul_vv_h, 4)
1620GEN_VEXT_VV(vwmul_vv_w, 8)
1621GEN_VEXT_VV(vwmulu_vv_b, 2)
1622GEN_VEXT_VV(vwmulu_vv_h, 4)
1623GEN_VEXT_VV(vwmulu_vv_w, 8)
1624GEN_VEXT_VV(vwmulsu_vv_b, 2)
1625GEN_VEXT_VV(vwmulsu_vv_h, 4)
1626GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1627
1628RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1629RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1630RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1631RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1632RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1633RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1634RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1635RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1636RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1637GEN_VEXT_VX(vwmul_vx_b, 2)
1638GEN_VEXT_VX(vwmul_vx_h, 4)
1639GEN_VEXT_VX(vwmul_vx_w, 8)
1640GEN_VEXT_VX(vwmulu_vx_b, 2)
1641GEN_VEXT_VX(vwmulu_vx_h, 4)
1642GEN_VEXT_VX(vwmulu_vx_w, 8)
1643GEN_VEXT_VX(vwmulsu_vx_b, 2)
1644GEN_VEXT_VX(vwmulsu_vx_h, 4)
1645GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1646
1647/* Vector Single-Width Integer Multiply-Add Instructions */
c45eff30 1648#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
54df813a
LZ
1649static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1650{ \
1651 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1652 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1653 TD d = *((TD *)vd + HD(i)); \
1654 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1655}
1656
1657#define DO_MACC(N, M, D) (M * N + D)
1658#define DO_NMSAC(N, M, D) (-(M * N) + D)
1659#define DO_MADD(N, M, D) (M * D + N)
1660#define DO_NMSUB(N, M, D) (-(M * D) + N)
1661RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1662RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1663RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1664RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1665RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1666RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1667RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1668RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1669RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1670RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1671RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1672RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1673RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1674RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1675RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1676RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1677GEN_VEXT_VV(vmacc_vv_b, 1)
1678GEN_VEXT_VV(vmacc_vv_h, 2)
1679GEN_VEXT_VV(vmacc_vv_w, 4)
1680GEN_VEXT_VV(vmacc_vv_d, 8)
1681GEN_VEXT_VV(vnmsac_vv_b, 1)
1682GEN_VEXT_VV(vnmsac_vv_h, 2)
1683GEN_VEXT_VV(vnmsac_vv_w, 4)
1684GEN_VEXT_VV(vnmsac_vv_d, 8)
1685GEN_VEXT_VV(vmadd_vv_b, 1)
1686GEN_VEXT_VV(vmadd_vv_h, 2)
1687GEN_VEXT_VV(vmadd_vv_w, 4)
1688GEN_VEXT_VV(vmadd_vv_d, 8)
1689GEN_VEXT_VV(vnmsub_vv_b, 1)
1690GEN_VEXT_VV(vnmsub_vv_h, 2)
1691GEN_VEXT_VV(vnmsub_vv_w, 4)
1692GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1693
1694#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1695static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1696{ \
1697 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1698 TD d = *((TD *)vd + HD(i)); \
1699 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1700}
1701
1702RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1703RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1704RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1705RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1706RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1707RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1708RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1709RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1710RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1711RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1712RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1713RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1714RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1715RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1716RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1717RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1718GEN_VEXT_VX(vmacc_vx_b, 1)
1719GEN_VEXT_VX(vmacc_vx_h, 2)
1720GEN_VEXT_VX(vmacc_vx_w, 4)
1721GEN_VEXT_VX(vmacc_vx_d, 8)
1722GEN_VEXT_VX(vnmsac_vx_b, 1)
1723GEN_VEXT_VX(vnmsac_vx_h, 2)
1724GEN_VEXT_VX(vnmsac_vx_w, 4)
1725GEN_VEXT_VX(vnmsac_vx_d, 8)
1726GEN_VEXT_VX(vmadd_vx_b, 1)
1727GEN_VEXT_VX(vmadd_vx_h, 2)
1728GEN_VEXT_VX(vmadd_vx_w, 4)
1729GEN_VEXT_VX(vmadd_vx_d, 8)
1730GEN_VEXT_VX(vnmsub_vx_b, 1)
1731GEN_VEXT_VX(vnmsub_vx_h, 2)
1732GEN_VEXT_VX(vnmsub_vx_w, 4)
1733GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1734
1735/* Vector Widening Integer Multiply-Add Instructions */
1736RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1737RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1738RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1739RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1740RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1741RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1742RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1743RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1744RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1745GEN_VEXT_VV(vwmaccu_vv_b, 2)
1746GEN_VEXT_VV(vwmaccu_vv_h, 4)
1747GEN_VEXT_VV(vwmaccu_vv_w, 8)
1748GEN_VEXT_VV(vwmacc_vv_b, 2)
1749GEN_VEXT_VV(vwmacc_vv_h, 4)
1750GEN_VEXT_VV(vwmacc_vv_w, 8)
1751GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1752GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1753GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1754
1755RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1756RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1757RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1758RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1759RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1760RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1761RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1762RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1763RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1764RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1765RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1766RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1767GEN_VEXT_VX(vwmaccu_vx_b, 2)
1768GEN_VEXT_VX(vwmaccu_vx_h, 4)
1769GEN_VEXT_VX(vwmaccu_vx_w, 8)
1770GEN_VEXT_VX(vwmacc_vx_b, 2)
1771GEN_VEXT_VX(vwmacc_vx_h, 4)
1772GEN_VEXT_VX(vwmacc_vx_w, 8)
1773GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1774GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1775GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1776GEN_VEXT_VX(vwmaccus_vx_b, 2)
1777GEN_VEXT_VX(vwmaccus_vx_h, 4)
1778GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1779
1780/* Vector Integer Merge and Move Instructions */
3479a814 1781#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1782void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1783 uint32_t desc) \
1784{ \
1785 uint32_t vl = env->vl; \
89a32de2 1786 uint32_t esz = sizeof(ETYPE); \
1787 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1788 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1789 uint32_t i; \
1790 \
f714361e 1791 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1792 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1793 *((ETYPE *)vd + H(i)) = s1; \
1794 } \
f714361e 1795 env->vstart = 0; \
89a32de2 1796 /* set tail elements to 1s */ \
1797 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1798}
1799
3479a814
FC
1800GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1801GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1802GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1803GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1804
3479a814 1805#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1806void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1807 uint32_t desc) \
1808{ \
1809 uint32_t vl = env->vl; \
89a32de2 1810 uint32_t esz = sizeof(ETYPE); \
1811 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1812 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1813 uint32_t i; \
1814 \
f714361e 1815 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1816 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1817 } \
f714361e 1818 env->vstart = 0; \
89a32de2 1819 /* set tail elements to 1s */ \
1820 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1821}
1822
3479a814
FC
1823GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1824GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1825GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1826GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 1827
3479a814 1828#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1829void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1830 CPURISCVState *env, uint32_t desc) \
1831{ \
f020a7a1 1832 uint32_t vl = env->vl; \
89a32de2 1833 uint32_t esz = sizeof(ETYPE); \
1834 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1835 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1836 uint32_t i; \
1837 \
f714361e 1838 for (i = env->vstart; i < vl; i++) { \
f9298de5 1839 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
1840 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1841 } \
f714361e 1842 env->vstart = 0; \
89a32de2 1843 /* set tail elements to 1s */ \
1844 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1845}
1846
3479a814
FC
1847GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1848GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1849GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1850GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 1851
3479a814 1852#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1853void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1854 void *vs2, CPURISCVState *env, uint32_t desc) \
1855{ \
f020a7a1 1856 uint32_t vl = env->vl; \
89a32de2 1857 uint32_t esz = sizeof(ETYPE); \
1858 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1859 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1860 uint32_t i; \
1861 \
f714361e 1862 for (i = env->vstart; i < vl; i++) { \
f020a7a1 1863 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1864 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
1865 (ETYPE)(target_long)s1); \
1866 *((ETYPE *)vd + H(i)) = d; \
1867 } \
f714361e 1868 env->vstart = 0; \
89a32de2 1869 /* set tail elements to 1s */ \
1870 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1871}
1872
3479a814
FC
1873GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1874GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1875GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1876GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
1877
1878/*
3b57254d 1879 * Vector Fixed-Point Arithmetic Instructions
eb2650e3
LZ
1880 */
1881
1882/* Vector Single-Width Saturating Add and Subtract */
1883
1884/*
1885 * As fixed point instructions probably have round mode and saturation,
1886 * define common macros for fixed point here.
1887 */
1888typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1889 CPURISCVState *env, int vxrm);
1890
1891#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1892static inline void \
1893do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1894 CPURISCVState *env, int vxrm) \
1895{ \
1896 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1897 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1898 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1899}
1900
1901static inline void
1902vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1903 CPURISCVState *env,
f9298de5 1904 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 1905 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 1906{
f714361e 1907 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 1908 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
1909 /* set masked-off elements to 1s */
1910 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
1911 continue;
1912 }
1913 fn(vd, vs1, vs2, i, env, vxrm);
1914 }
f714361e 1915 env->vstart = 0;
eb2650e3
LZ
1916}
1917
1918static inline void
1919vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1920 CPURISCVState *env,
8a085fb2 1921 uint32_t desc,
09106eed 1922 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 1923{
eb2650e3
LZ
1924 uint32_t vm = vext_vm(desc);
1925 uint32_t vl = env->vl;
09106eed 1926 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
1927 uint32_t vta = vext_vta(desc);
72e17a9f 1928 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
1929
1930 switch (env->vxrm) {
1931 case 0: /* rnu */
1932 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1933 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
1934 break;
1935 case 1: /* rne */
1936 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1937 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
1938 break;
1939 case 2: /* rdn */
1940 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1941 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
1942 break;
1943 default: /* rod */
1944 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1945 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
1946 break;
1947 }
09106eed 1948 /* set tail elements to 1s */
1949 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
1950}
1951
1952/* generate helpers for fixed point instructions with OPIVV format */
09106eed 1953#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
1954void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1955 CPURISCVState *env, uint32_t desc) \
1956{ \
8a085fb2 1957 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 1958 do_##NAME, ESZ); \
eb2650e3
LZ
1959}
1960
246f8796
WL
1961static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
1962 uint8_t b)
eb2650e3
LZ
1963{
1964 uint8_t res = a + b;
1965 if (res < a) {
1966 res = UINT8_MAX;
1967 env->vxsat = 0x1;
1968 }
1969 return res;
1970}
1971
1972static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1973 uint16_t b)
1974{
1975 uint16_t res = a + b;
1976 if (res < a) {
1977 res = UINT16_MAX;
1978 env->vxsat = 0x1;
1979 }
1980 return res;
1981}
1982
1983static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1984 uint32_t b)
1985{
1986 uint32_t res = a + b;
1987 if (res < a) {
1988 res = UINT32_MAX;
1989 env->vxsat = 0x1;
1990 }
1991 return res;
1992}
1993
1994static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1995 uint64_t b)
1996{
1997 uint64_t res = a + b;
1998 if (res < a) {
1999 res = UINT64_MAX;
2000 env->vxsat = 0x1;
2001 }
2002 return res;
2003}
2004
2005RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2006RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2007RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2008RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2009GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2010GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2011GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2012GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2013
2014typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2015 CPURISCVState *env, int vxrm);
2016
2017#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2018static inline void \
2019do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2020 CPURISCVState *env, int vxrm) \
2021{ \
2022 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2023 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2024}
2025
2026static inline void
2027vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2028 CPURISCVState *env,
f9298de5 2029 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2030 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2031{
f714361e 2032 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2033 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2034 /* set masked-off elements to 1s */
2035 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2036 continue;
2037 }
2038 fn(vd, s1, vs2, i, env, vxrm);
2039 }
f714361e 2040 env->vstart = 0;
eb2650e3
LZ
2041}
2042
2043static inline void
2044vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2045 CPURISCVState *env,
8a085fb2 2046 uint32_t desc,
09106eed 2047 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2048{
eb2650e3
LZ
2049 uint32_t vm = vext_vm(desc);
2050 uint32_t vl = env->vl;
09106eed 2051 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2052 uint32_t vta = vext_vta(desc);
72e17a9f 2053 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2054
2055 switch (env->vxrm) {
2056 case 0: /* rnu */
2057 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2058 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2059 break;
2060 case 1: /* rne */
2061 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2062 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2063 break;
2064 case 2: /* rdn */
2065 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2066 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2067 break;
2068 default: /* rod */
2069 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2070 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2071 break;
2072 }
09106eed 2073 /* set tail elements to 1s */
2074 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2075}
2076
2077/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2078#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3 2079void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
2080 void *vs2, CPURISCVState *env, \
2081 uint32_t desc) \
eb2650e3 2082{ \
8a085fb2 2083 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2084 do_##NAME, ESZ); \
eb2650e3
LZ
2085}
2086
2087RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2088RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2089RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2090RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2091GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2092GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2093GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2094GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2095
2096static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2097{
2098 int8_t res = a + b;
2099 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2100 res = a > 0 ? INT8_MAX : INT8_MIN;
2101 env->vxsat = 0x1;
2102 }
2103 return res;
2104}
2105
246f8796
WL
2106static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2107 int16_t b)
eb2650e3
LZ
2108{
2109 int16_t res = a + b;
2110 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2111 res = a > 0 ? INT16_MAX : INT16_MIN;
2112 env->vxsat = 0x1;
2113 }
2114 return res;
2115}
2116
246f8796
WL
2117static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2118 int32_t b)
eb2650e3
LZ
2119{
2120 int32_t res = a + b;
2121 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2122 res = a > 0 ? INT32_MAX : INT32_MIN;
2123 env->vxsat = 0x1;
2124 }
2125 return res;
2126}
2127
246f8796
WL
2128static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2129 int64_t b)
eb2650e3
LZ
2130{
2131 int64_t res = a + b;
2132 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2133 res = a > 0 ? INT64_MAX : INT64_MIN;
2134 env->vxsat = 0x1;
2135 }
2136 return res;
2137}
2138
2139RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2140RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2141RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2142RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2143GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2144GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2145GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2146GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2147
2148RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2149RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2150RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2151RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2152GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2153GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2154GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2155GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3 2156
246f8796
WL
2157static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2158 uint8_t b)
eb2650e3
LZ
2159{
2160 uint8_t res = a - b;
2161 if (res > a) {
2162 res = 0;
2163 env->vxsat = 0x1;
2164 }
2165 return res;
2166}
2167
2168static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2169 uint16_t b)
2170{
2171 uint16_t res = a - b;
2172 if (res > a) {
2173 res = 0;
2174 env->vxsat = 0x1;
2175 }
2176 return res;
2177}
2178
2179static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2180 uint32_t b)
2181{
2182 uint32_t res = a - b;
2183 if (res > a) {
2184 res = 0;
2185 env->vxsat = 0x1;
2186 }
2187 return res;
2188}
2189
2190static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2191 uint64_t b)
2192{
2193 uint64_t res = a - b;
2194 if (res > a) {
2195 res = 0;
2196 env->vxsat = 0x1;
2197 }
2198 return res;
2199}
2200
2201RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2202RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2203RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2204RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2205GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2206GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2207GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2208GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2209
2210RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2211RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2212RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2213RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2214GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2215GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2216GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2217GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2218
2219static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2220{
2221 int8_t res = a - b;
2222 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2223 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2224 env->vxsat = 0x1;
2225 }
2226 return res;
2227}
2228
246f8796
WL
2229static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2230 int16_t b)
eb2650e3
LZ
2231{
2232 int16_t res = a - b;
2233 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2234 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2235 env->vxsat = 0x1;
2236 }
2237 return res;
2238}
2239
246f8796
WL
2240static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2241 int32_t b)
eb2650e3
LZ
2242{
2243 int32_t res = a - b;
2244 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2245 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2246 env->vxsat = 0x1;
2247 }
2248 return res;
2249}
2250
246f8796
WL
2251static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2252 int64_t b)
eb2650e3
LZ
2253{
2254 int64_t res = a - b;
2255 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2256 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2257 env->vxsat = 0x1;
2258 }
2259 return res;
2260}
2261
2262RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2263RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2264RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2265RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2266GEN_VEXT_VV_RM(vssub_vv_b, 1)
2267GEN_VEXT_VV_RM(vssub_vv_h, 2)
2268GEN_VEXT_VV_RM(vssub_vv_w, 4)
2269GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2270
2271RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2272RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2273RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2274RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2275GEN_VEXT_VX_RM(vssub_vx_b, 1)
2276GEN_VEXT_VX_RM(vssub_vx_h, 2)
2277GEN_VEXT_VX_RM(vssub_vx_w, 4)
2278GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2279
2280/* Vector Single-Width Averaging Add and Subtract */
2281static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2282{
2283 uint8_t d = extract64(v, shift, 1);
2284 uint8_t d1;
2285 uint64_t D1, D2;
2286
2287 if (shift == 0 || shift > 64) {
2288 return 0;
2289 }
2290
2291 d1 = extract64(v, shift - 1, 1);
2292 D1 = extract64(v, 0, shift);
2293 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2294 return d1;
2295 } else if (vxrm == 1) { /* round-to-nearest-even */
2296 if (shift > 1) {
2297 D2 = extract64(v, 0, shift - 1);
2298 return d1 & ((D2 != 0) | d);
2299 } else {
2300 return d1 & d;
2301 }
2302 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2303 return !d & (D1 != 0);
2304 }
2305 return 0; /* round-down (truncate) */
2306}
2307
246f8796
WL
2308static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2309 int32_t b)
b7aee481
LZ
2310{
2311 int64_t res = (int64_t)a + b;
2312 uint8_t round = get_round(vxrm, res, 1);
2313
2314 return (res >> 1) + round;
2315}
2316
246f8796
WL
2317static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2318 int64_t b)
b7aee481
LZ
2319{
2320 int64_t res = a + b;
2321 uint8_t round = get_round(vxrm, res, 1);
2322 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2323
2324 /* With signed overflow, bit 64 is inverse of bit 63. */
2325 return ((res >> 1) ^ over) + round;
2326}
2327
2328RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2329RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2330RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2331RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2332GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2333GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2334GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2335GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2336
2337RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2338RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2339RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2340RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2341GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2342GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2343GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2344GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2345
8b99a110
FC
2346static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2347 uint32_t a, uint32_t b)
2348{
2349 uint64_t res = (uint64_t)a + b;
2350 uint8_t round = get_round(vxrm, res, 1);
2351
2352 return (res >> 1) + round;
2353}
2354
2355static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2356 uint64_t a, uint64_t b)
2357{
2358 uint64_t res = a + b;
2359 uint8_t round = get_round(vxrm, res, 1);
2360 uint64_t over = (uint64_t)(res < a) << 63;
2361
2362 return ((res >> 1) | over) + round;
2363}
2364
2365RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2366RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2367RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2368RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2369GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2370GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2371GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2372GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2373
2374RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2375RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2376RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2377RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2378GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2379GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2380GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2381GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2382
246f8796
WL
2383static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2384 int32_t b)
b7aee481
LZ
2385{
2386 int64_t res = (int64_t)a - b;
2387 uint8_t round = get_round(vxrm, res, 1);
2388
2389 return (res >> 1) + round;
2390}
2391
246f8796
WL
2392static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2393 int64_t b)
b7aee481
LZ
2394{
2395 int64_t res = (int64_t)a - b;
2396 uint8_t round = get_round(vxrm, res, 1);
2397 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2398
2399 /* With signed overflow, bit 64 is inverse of bit 63. */
2400 return ((res >> 1) ^ over) + round;
2401}
2402
2403RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2404RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2405RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2406RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2407GEN_VEXT_VV_RM(vasub_vv_b, 1)
2408GEN_VEXT_VV_RM(vasub_vv_h, 2)
2409GEN_VEXT_VV_RM(vasub_vv_w, 4)
2410GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2411
2412RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2413RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2414RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2415RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2416GEN_VEXT_VX_RM(vasub_vx_b, 1)
2417GEN_VEXT_VX_RM(vasub_vx_h, 2)
2418GEN_VEXT_VX_RM(vasub_vx_w, 4)
2419GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2420
8b99a110
FC
2421static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2422 uint32_t a, uint32_t b)
2423{
2424 int64_t res = (int64_t)a - b;
2425 uint8_t round = get_round(vxrm, res, 1);
2426
2427 return (res >> 1) + round;
2428}
2429
2430static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2431 uint64_t a, uint64_t b)
2432{
2433 uint64_t res = (uint64_t)a - b;
2434 uint8_t round = get_round(vxrm, res, 1);
2435 uint64_t over = (uint64_t)(res > a) << 63;
2436
2437 return ((res >> 1) | over) + round;
2438}
2439
2440RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2441RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2442RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2443RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2444GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2445GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2446GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2447GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2448
2449RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2450RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2451RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2452RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2453GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2454GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2455GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2456GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2457
9f0ff9e5
LZ
2458/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2459static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2460{
2461 uint8_t round;
2462 int16_t res;
2463
2464 res = (int16_t)a * (int16_t)b;
2465 round = get_round(vxrm, res, 7);
c45eff30 2466 res = (res >> 7) + round;
9f0ff9e5
LZ
2467
2468 if (res > INT8_MAX) {
2469 env->vxsat = 0x1;
2470 return INT8_MAX;
2471 } else if (res < INT8_MIN) {
2472 env->vxsat = 0x1;
2473 return INT8_MIN;
2474 } else {
2475 return res;
2476 }
2477}
2478
2479static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2480{
2481 uint8_t round;
2482 int32_t res;
2483
2484 res = (int32_t)a * (int32_t)b;
2485 round = get_round(vxrm, res, 15);
c45eff30 2486 res = (res >> 15) + round;
9f0ff9e5
LZ
2487
2488 if (res > INT16_MAX) {
2489 env->vxsat = 0x1;
2490 return INT16_MAX;
2491 } else if (res < INT16_MIN) {
2492 env->vxsat = 0x1;
2493 return INT16_MIN;
2494 } else {
2495 return res;
2496 }
2497}
2498
2499static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2500{
2501 uint8_t round;
2502 int64_t res;
2503
2504 res = (int64_t)a * (int64_t)b;
2505 round = get_round(vxrm, res, 31);
c45eff30 2506 res = (res >> 31) + round;
9f0ff9e5
LZ
2507
2508 if (res > INT32_MAX) {
2509 env->vxsat = 0x1;
2510 return INT32_MAX;
2511 } else if (res < INT32_MIN) {
2512 env->vxsat = 0x1;
2513 return INT32_MIN;
2514 } else {
2515 return res;
2516 }
2517}
2518
2519static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2520{
2521 uint8_t round;
2522 uint64_t hi_64, lo_64;
2523 int64_t res;
2524
2525 if (a == INT64_MIN && b == INT64_MIN) {
2526 env->vxsat = 1;
2527 return INT64_MAX;
2528 }
2529
2530 muls64(&lo_64, &hi_64, a, b);
2531 round = get_round(vxrm, lo_64, 63);
2532 /*
2533 * Cannot overflow, as there are always
2534 * 2 sign bits after multiply.
2535 */
2536 res = (hi_64 << 1) | (lo_64 >> 63);
2537 if (round) {
2538 if (res == INT64_MAX) {
2539 env->vxsat = 1;
2540 } else {
2541 res += 1;
2542 }
2543 }
2544 return res;
2545}
2546
2547RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2548RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2549RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2550RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2551GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2552GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2553GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2554GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2555
2556RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2557RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2558RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2559RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2560GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2561GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2562GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2563GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2564
04a61406
LZ
2565/* Vector Single-Width Scaling Shift Instructions */
2566static inline uint8_t
2567vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2568{
2569 uint8_t round, shift = b & 0x7;
2570 uint8_t res;
2571
2572 round = get_round(vxrm, a, shift);
c45eff30 2573 res = (a >> shift) + round;
04a61406
LZ
2574 return res;
2575}
2576static inline uint16_t
2577vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2578{
2579 uint8_t round, shift = b & 0xf;
04a61406
LZ
2580
2581 round = get_round(vxrm, a, shift);
66997c42 2582 return (a >> shift) + round;
04a61406
LZ
2583}
2584static inline uint32_t
2585vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2586{
2587 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2588
2589 round = get_round(vxrm, a, shift);
66997c42 2590 return (a >> shift) + round;
04a61406
LZ
2591}
2592static inline uint64_t
2593vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2594{
2595 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2596
2597 round = get_round(vxrm, a, shift);
66997c42 2598 return (a >> shift) + round;
04a61406
LZ
2599}
2600RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2601RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2602RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2603RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2604GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2605GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2606GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2607GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2608
2609RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2610RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2611RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2612RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2613GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2614GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2615GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2616GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2617
2618static inline int8_t
2619vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2620{
2621 uint8_t round, shift = b & 0x7;
04a61406
LZ
2622
2623 round = get_round(vxrm, a, shift);
66997c42 2624 return (a >> shift) + round;
04a61406
LZ
2625}
2626static inline int16_t
2627vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2628{
2629 uint8_t round, shift = b & 0xf;
04a61406
LZ
2630
2631 round = get_round(vxrm, a, shift);
66997c42 2632 return (a >> shift) + round;
04a61406
LZ
2633}
2634static inline int32_t
2635vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2636{
2637 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2638
2639 round = get_round(vxrm, a, shift);
66997c42 2640 return (a >> shift) + round;
04a61406
LZ
2641}
2642static inline int64_t
2643vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2644{
2645 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2646
2647 round = get_round(vxrm, a, shift);
66997c42 2648 return (a >> shift) + round;
04a61406 2649}
9ff3d287 2650
04a61406
LZ
2651RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2652RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2653RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2654RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2655GEN_VEXT_VV_RM(vssra_vv_b, 1)
2656GEN_VEXT_VV_RM(vssra_vv_h, 2)
2657GEN_VEXT_VV_RM(vssra_vv_w, 4)
2658GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2659
2660RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2661RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2662RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2663RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2664GEN_VEXT_VX_RM(vssra_vx_b, 1)
2665GEN_VEXT_VX_RM(vssra_vx_h, 2)
2666GEN_VEXT_VX_RM(vssra_vx_w, 4)
2667GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2668
2669/* Vector Narrowing Fixed-Point Clip Instructions */
2670static inline int8_t
2671vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2672{
2673 uint8_t round, shift = b & 0xf;
2674 int16_t res;
2675
2676 round = get_round(vxrm, a, shift);
c45eff30 2677 res = (a >> shift) + round;
9ff3d287
LZ
2678 if (res > INT8_MAX) {
2679 env->vxsat = 0x1;
2680 return INT8_MAX;
2681 } else if (res < INT8_MIN) {
2682 env->vxsat = 0x1;
2683 return INT8_MIN;
2684 } else {
2685 return res;
2686 }
2687}
2688
2689static inline int16_t
2690vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2691{
2692 uint8_t round, shift = b & 0x1f;
2693 int32_t res;
2694
2695 round = get_round(vxrm, a, shift);
c45eff30 2696 res = (a >> shift) + round;
9ff3d287
LZ
2697 if (res > INT16_MAX) {
2698 env->vxsat = 0x1;
2699 return INT16_MAX;
2700 } else if (res < INT16_MIN) {
2701 env->vxsat = 0x1;
2702 return INT16_MIN;
2703 } else {
2704 return res;
2705 }
2706}
2707
2708static inline int32_t
2709vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2710{
2711 uint8_t round, shift = b & 0x3f;
2712 int64_t res;
2713
2714 round = get_round(vxrm, a, shift);
c45eff30 2715 res = (a >> shift) + round;
9ff3d287
LZ
2716 if (res > INT32_MAX) {
2717 env->vxsat = 0x1;
2718 return INT32_MAX;
2719 } else if (res < INT32_MIN) {
2720 env->vxsat = 0x1;
2721 return INT32_MIN;
2722 } else {
2723 return res;
2724 }
2725}
2726
a70b3a73
FC
2727RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2728RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2729RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2730GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2731GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2732GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2733
2734RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2735RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2736RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2737GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2738GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2739GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2740
2741static inline uint8_t
2742vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2743{
2744 uint8_t round, shift = b & 0xf;
2745 uint16_t res;
2746
2747 round = get_round(vxrm, a, shift);
c45eff30 2748 res = (a >> shift) + round;
9ff3d287
LZ
2749 if (res > UINT8_MAX) {
2750 env->vxsat = 0x1;
2751 return UINT8_MAX;
2752 } else {
2753 return res;
2754 }
2755}
2756
2757static inline uint16_t
2758vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2759{
2760 uint8_t round, shift = b & 0x1f;
2761 uint32_t res;
2762
2763 round = get_round(vxrm, a, shift);
c45eff30 2764 res = (a >> shift) + round;
9ff3d287
LZ
2765 if (res > UINT16_MAX) {
2766 env->vxsat = 0x1;
2767 return UINT16_MAX;
2768 } else {
2769 return res;
2770 }
2771}
2772
2773static inline uint32_t
2774vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2775{
2776 uint8_t round, shift = b & 0x3f;
a70b3a73 2777 uint64_t res;
9ff3d287
LZ
2778
2779 round = get_round(vxrm, a, shift);
c45eff30 2780 res = (a >> shift) + round;
9ff3d287
LZ
2781 if (res > UINT32_MAX) {
2782 env->vxsat = 0x1;
2783 return UINT32_MAX;
2784 } else {
2785 return res;
2786 }
2787}
2788
a70b3a73
FC
2789RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2790RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2791RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 2792GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2793GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2794GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 2795
a70b3a73
FC
2796RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2797RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2798RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 2799GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
2800GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
2801GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
2802
2803/*
3b57254d 2804 * Vector Float Point Arithmetic Instructions
ce2a0343
LZ
2805 */
2806/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2807#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2808static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2809 CPURISCVState *env) \
2810{ \
2811 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2812 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2813 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2814}
2815
5eacf7d8 2816#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
2817void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2818 void *vs2, CPURISCVState *env, \
2819 uint32_t desc) \
2820{ \
ce2a0343
LZ
2821 uint32_t vm = vext_vm(desc); \
2822 uint32_t vl = env->vl; \
5eacf7d8 2823 uint32_t total_elems = \
2824 vext_get_total_elems(env, desc, ESZ); \
2825 uint32_t vta = vext_vta(desc); \
5b448f44 2826 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
2827 uint32_t i; \
2828 \
f714361e 2829 for (i = env->vstart; i < vl; i++) { \
f9298de5 2830 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
2831 /* set masked-off elements to 1s */ \
2832 vext_set_elems_1s(vd, vma, i * ESZ, \
2833 (i + 1) * ESZ); \
ce2a0343
LZ
2834 continue; \
2835 } \
2836 do_##NAME(vd, vs1, vs2, i, env); \
2837 } \
f714361e 2838 env->vstart = 0; \
5eacf7d8 2839 /* set tail elements to 1s */ \
2840 vext_set_elems_1s(vd, vta, vl * ESZ, \
2841 total_elems * ESZ); \
ce2a0343
LZ
2842}
2843
2844RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2845RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2846RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 2847GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
2848GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
2849GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
2850
2851#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2852static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2853 CPURISCVState *env) \
2854{ \
2855 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2856 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2857}
2858
5eacf7d8 2859#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
2860void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2861 void *vs2, CPURISCVState *env, \
2862 uint32_t desc) \
2863{ \
ce2a0343
LZ
2864 uint32_t vm = vext_vm(desc); \
2865 uint32_t vl = env->vl; \
5eacf7d8 2866 uint32_t total_elems = \
c45eff30 2867 vext_get_total_elems(env, desc, ESZ); \
5eacf7d8 2868 uint32_t vta = vext_vta(desc); \
5b448f44 2869 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
2870 uint32_t i; \
2871 \
f714361e 2872 for (i = env->vstart; i < vl; i++) { \
f9298de5 2873 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
2874 /* set masked-off elements to 1s */ \
2875 vext_set_elems_1s(vd, vma, i * ESZ, \
2876 (i + 1) * ESZ); \
ce2a0343
LZ
2877 continue; \
2878 } \
2879 do_##NAME(vd, s1, vs2, i, env); \
2880 } \
f714361e 2881 env->vstart = 0; \
5eacf7d8 2882 /* set tail elements to 1s */ \
2883 vext_set_elems_1s(vd, vta, vl * ESZ, \
2884 total_elems * ESZ); \
ce2a0343
LZ
2885}
2886
2887RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2888RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2889RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 2890GEN_VEXT_VF(vfadd_vf_h, 2)
2891GEN_VEXT_VF(vfadd_vf_w, 4)
2892GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
2893
2894RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2895RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2896RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 2897GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
2898GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
2899GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
2900RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2901RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2902RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 2903GEN_VEXT_VF(vfsub_vf_h, 2)
2904GEN_VEXT_VF(vfsub_vf_w, 4)
2905GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
2906
2907static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2908{
2909 return float16_sub(b, a, s);
2910}
2911
2912static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2913{
2914 return float32_sub(b, a, s);
2915}
2916
2917static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2918{
2919 return float64_sub(b, a, s);
2920}
2921
2922RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2923RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2924RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 2925GEN_VEXT_VF(vfrsub_vf_h, 2)
2926GEN_VEXT_VF(vfrsub_vf_w, 4)
2927GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
2928
2929/* Vector Widening Floating-Point Add/Subtract Instructions */
2930static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2931{
2932 return float32_add(float16_to_float32(a, true, s),
c45eff30 2933 float16_to_float32(b, true, s), s);
eeffab2e
LZ
2934}
2935
2936static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2937{
2938 return float64_add(float32_to_float64(a, s),
c45eff30 2939 float32_to_float64(b, s), s);
eeffab2e
LZ
2940
2941}
2942
2943RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2944RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 2945GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
2946GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
2947RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2948RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 2949GEN_VEXT_VF(vfwadd_vf_h, 4)
2950GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
2951
2952static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2953{
2954 return float32_sub(float16_to_float32(a, true, s),
c45eff30 2955 float16_to_float32(b, true, s), s);
eeffab2e
LZ
2956}
2957
2958static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2959{
2960 return float64_sub(float32_to_float64(a, s),
c45eff30 2961 float32_to_float64(b, s), s);
eeffab2e
LZ
2962
2963}
2964
2965RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2966RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 2967GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
2968GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
2969RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2970RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 2971GEN_VEXT_VF(vfwsub_vf_h, 4)
2972GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
2973
2974static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2975{
2976 return float32_add(a, float16_to_float32(b, true, s), s);
2977}
2978
2979static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2980{
2981 return float64_add(a, float32_to_float64(b, s), s);
2982}
2983
2984RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2985RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 2986GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
2987GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
2988RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2989RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 2990GEN_VEXT_VF(vfwadd_wf_h, 4)
2991GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
2992
2993static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2994{
2995 return float32_sub(a, float16_to_float32(b, true, s), s);
2996}
2997
2998static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2999{
3000 return float64_sub(a, float32_to_float64(b, s), s);
3001}
3002
3003RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3004RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3005GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3006GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3007RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3008RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3009GEN_VEXT_VF(vfwsub_wf_h, 4)
3010GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3011
3012/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3013RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3014RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3015RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3016GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3017GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3018GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3019RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3020RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3021RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3022GEN_VEXT_VF(vfmul_vf_h, 2)
3023GEN_VEXT_VF(vfmul_vf_w, 4)
3024GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3025
3026RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3027RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3028RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3029GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3030GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3031GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3032RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3033RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3034RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3035GEN_VEXT_VF(vfdiv_vf_h, 2)
3036GEN_VEXT_VF(vfdiv_vf_w, 4)
3037GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3038
3039static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3040{
3041 return float16_div(b, a, s);
3042}
3043
3044static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3045{
3046 return float32_div(b, a, s);
3047}
3048
3049static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3050{
3051 return float64_div(b, a, s);
3052}
3053
3054RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3055RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3056RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3057GEN_VEXT_VF(vfrdiv_vf_h, 2)
3058GEN_VEXT_VF(vfrdiv_vf_w, 4)
3059GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3060
3061/* Vector Widening Floating-Point Multiply */
3062static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3063{
3064 return float32_mul(float16_to_float32(a, true, s),
c45eff30 3065 float16_to_float32(b, true, s), s);
f7c7b7cd
LZ
3066}
3067
3068static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3069{
3070 return float64_mul(float32_to_float64(a, s),
c45eff30 3071 float32_to_float64(b, s), s);
f7c7b7cd
LZ
3072
3073}
3074RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3075RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3076GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3077GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3078RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3079RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3080GEN_VEXT_VF(vfwmul_vf_h, 4)
3081GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3082
3083/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3084#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3085static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
c45eff30 3086 CPURISCVState *env) \
4aa5a8fe
LZ
3087{ \
3088 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3089 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3090 TD d = *((TD *)vd + HD(i)); \
3091 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3092}
3093
3094static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3095{
3096 return float16_muladd(a, b, d, 0, s);
3097}
3098
3099static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3100{
3101 return float32_muladd(a, b, d, 0, s);
3102}
3103
3104static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3105{
3106 return float64_muladd(a, b, d, 0, s);
3107}
3108
3109RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3110RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3111RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3112GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3113GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3114GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3115
3116#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3117static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
c45eff30 3118 CPURISCVState *env) \
4aa5a8fe
LZ
3119{ \
3120 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3121 TD d = *((TD *)vd + HD(i)); \
3122 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3123}
3124
3125RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3126RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3127RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3128GEN_VEXT_VF(vfmacc_vf_h, 2)
3129GEN_VEXT_VF(vfmacc_vf_w, 4)
3130GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3131
3132static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3133{
c45eff30
WL
3134 return float16_muladd(a, b, d, float_muladd_negate_c |
3135 float_muladd_negate_product, s);
4aa5a8fe
LZ
3136}
3137
3138static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3139{
c45eff30
WL
3140 return float32_muladd(a, b, d, float_muladd_negate_c |
3141 float_muladd_negate_product, s);
4aa5a8fe
LZ
3142}
3143
3144static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3145{
c45eff30
WL
3146 return float64_muladd(a, b, d, float_muladd_negate_c |
3147 float_muladd_negate_product, s);
4aa5a8fe
LZ
3148}
3149
3150RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3151RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3152RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3153GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3154GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3155GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3156RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3157RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3158RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3159GEN_VEXT_VF(vfnmacc_vf_h, 2)
3160GEN_VEXT_VF(vfnmacc_vf_w, 4)
3161GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3162
3163static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3164{
3165 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3166}
3167
3168static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3169{
3170 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3171}
3172
3173static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3174{
3175 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3176}
3177
3178RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3179RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3180RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3181GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3182GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3183GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3184RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3185RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3186RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3187GEN_VEXT_VF(vfmsac_vf_h, 2)
3188GEN_VEXT_VF(vfmsac_vf_w, 4)
3189GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3190
3191static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3192{
3193 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3194}
3195
3196static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3197{
3198 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3199}
3200
3201static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3202{
3203 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3204}
3205
3206RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3207RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3208RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3209GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3210GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3211GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3212RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3213RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3214RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3215GEN_VEXT_VF(vfnmsac_vf_h, 2)
3216GEN_VEXT_VF(vfnmsac_vf_w, 4)
3217GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3218
3219static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3220{
3221 return float16_muladd(d, b, a, 0, s);
3222}
3223
3224static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3225{
3226 return float32_muladd(d, b, a, 0, s);
3227}
3228
3229static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3230{
3231 return float64_muladd(d, b, a, 0, s);
3232}
3233
3234RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3235RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3236RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3237GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3238GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3239GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3240RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3241RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3242RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3243GEN_VEXT_VF(vfmadd_vf_h, 2)
3244GEN_VEXT_VF(vfmadd_vf_w, 4)
3245GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3246
3247static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3248{
c45eff30
WL
3249 return float16_muladd(d, b, a, float_muladd_negate_c |
3250 float_muladd_negate_product, s);
4aa5a8fe
LZ
3251}
3252
3253static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3254{
c45eff30
WL
3255 return float32_muladd(d, b, a, float_muladd_negate_c |
3256 float_muladd_negate_product, s);
4aa5a8fe
LZ
3257}
3258
3259static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3260{
c45eff30
WL
3261 return float64_muladd(d, b, a, float_muladd_negate_c |
3262 float_muladd_negate_product, s);
4aa5a8fe
LZ
3263}
3264
3265RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3266RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3267RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3268GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3269GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3270GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3271RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3272RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3273RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3274GEN_VEXT_VF(vfnmadd_vf_h, 2)
3275GEN_VEXT_VF(vfnmadd_vf_w, 4)
3276GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3277
3278static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3279{
3280 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3281}
3282
3283static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3284{
3285 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3286}
3287
3288static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3289{
3290 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3291}
3292
3293RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3294RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3295RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3296GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3297GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3298GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3299RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3300RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3301RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3302GEN_VEXT_VF(vfmsub_vf_h, 2)
3303GEN_VEXT_VF(vfmsub_vf_w, 4)
3304GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3305
3306static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3307{
3308 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3309}
3310
3311static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3312{
3313 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3314}
3315
3316static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3317{
3318 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3319}
3320
3321RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3322RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3323RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3324GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3325GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3326GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3327RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3328RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3329RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3330GEN_VEXT_VF(vfnmsub_vf_h, 2)
3331GEN_VEXT_VF(vfnmsub_vf_w, 4)
3332GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3333
3334/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3335static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3336{
3337 return float32_muladd(float16_to_float32(a, true, s),
c45eff30 3338 float16_to_float32(b, true, s), d, 0, s);
0dd50959
LZ
3339}
3340
3341static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3342{
3343 return float64_muladd(float32_to_float64(a, s),
c45eff30 3344 float32_to_float64(b, s), d, 0, s);
0dd50959
LZ
3345}
3346
3347RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3348RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3349GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3350GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3351RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3352RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3353GEN_VEXT_VF(vfwmacc_vf_h, 4)
3354GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959 3355
adf772b0
WL
3356static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3357{
3358 return float32_muladd(bfloat16_to_float32(a, s),
3359 bfloat16_to_float32(b, s), d, 0, s);
3360}
3361
3362RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3363GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3364RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmacc16)
3365GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3366
0dd50959
LZ
3367static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3368{
3369 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3370 float16_to_float32(b, true, s), d,
3371 float_muladd_negate_c | float_muladd_negate_product,
3372 s);
0dd50959
LZ
3373}
3374
3375static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3376{
c45eff30
WL
3377 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3378 d, float_muladd_negate_c |
3379 float_muladd_negate_product, s);
0dd50959
LZ
3380}
3381
3382RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3383RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3384GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3385GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3386RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3387RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3388GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3389GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3390
3391static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3392{
3393 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3394 float16_to_float32(b, true, s), d,
3395 float_muladd_negate_c, s);
0dd50959
LZ
3396}
3397
3398static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3399{
3400 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3401 float32_to_float64(b, s), d,
3402 float_muladd_negate_c, s);
0dd50959
LZ
3403}
3404
3405RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3406RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3407GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3408GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3409RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3410RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3411GEN_VEXT_VF(vfwmsac_vf_h, 4)
3412GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3413
3414static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3415{
3416 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3417 float16_to_float32(b, true, s), d,
3418 float_muladd_negate_product, s);
0dd50959
LZ
3419}
3420
3421static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3422{
3423 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3424 float32_to_float64(b, s), d,
3425 float_muladd_negate_product, s);
0dd50959
LZ
3426}
3427
3428RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3429RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3430GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3431GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3432RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3433RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3434GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3435GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3436
3437/* Vector Floating-Point Square-Root Instruction */
c45eff30 3438#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
d9e4ce72 3439static void do_##NAME(void *vd, void *vs2, int i, \
c45eff30 3440 CPURISCVState *env) \
d9e4ce72
LZ
3441{ \
3442 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3443 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3444}
3445
5eacf7d8 3446#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72 3447void HELPER(NAME)(void *vd, void *v0, void *vs2, \
c45eff30 3448 CPURISCVState *env, uint32_t desc) \
d9e4ce72 3449{ \
d9e4ce72
LZ
3450 uint32_t vm = vext_vm(desc); \
3451 uint32_t vl = env->vl; \
5eacf7d8 3452 uint32_t total_elems = \
3453 vext_get_total_elems(env, desc, ESZ); \
3454 uint32_t vta = vext_vta(desc); \
5b448f44 3455 uint32_t vma = vext_vma(desc); \
d9e4ce72
LZ
3456 uint32_t i; \
3457 \
3458 if (vl == 0) { \
3459 return; \
3460 } \
f714361e 3461 for (i = env->vstart; i < vl; i++) { \
f9298de5 3462 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3463 /* set masked-off elements to 1s */ \
3464 vext_set_elems_1s(vd, vma, i * ESZ, \
3465 (i + 1) * ESZ); \
d9e4ce72
LZ
3466 continue; \
3467 } \
3468 do_##NAME(vd, vs2, i, env); \
3469 } \
f714361e 3470 env->vstart = 0; \
5eacf7d8 3471 vext_set_elems_1s(vd, vta, vl * ESZ, \
3472 total_elems * ESZ); \
d9e4ce72
LZ
3473}
3474
3475RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3476RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3477RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3478GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3479GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3480GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3481
e848a1e5
FC
3482/*
3483 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3484 *
3485 * Adapted from riscv-v-spec recip.c:
3486 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3487 */
3488static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3489{
3490 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3491 uint64_t exp = extract64(f, frac_size, exp_size);
3492 uint64_t frac = extract64(f, 0, frac_size);
3493
3494 const uint8_t lookup_table[] = {
3495 52, 51, 50, 48, 47, 46, 44, 43,
3496 42, 41, 40, 39, 38, 36, 35, 34,
3497 33, 32, 31, 30, 30, 29, 28, 27,
3498 26, 25, 24, 23, 23, 22, 21, 20,
3499 19, 19, 18, 17, 16, 16, 15, 14,
3500 14, 13, 12, 12, 11, 10, 10, 9,
3501 9, 8, 7, 7, 6, 6, 5, 4,
3502 4, 3, 3, 2, 2, 1, 1, 0,
3503 127, 125, 123, 121, 119, 118, 116, 114,
3504 113, 111, 109, 108, 106, 105, 103, 102,
3505 100, 99, 97, 96, 95, 93, 92, 91,
3506 90, 88, 87, 86, 85, 84, 83, 82,
3507 80, 79, 78, 77, 76, 75, 74, 73,
3508 72, 71, 70, 70, 69, 68, 67, 66,
3509 65, 64, 63, 63, 62, 61, 60, 59,
3510 59, 58, 57, 56, 56, 55, 54, 53
3511 };
3512 const int precision = 7;
3513
3514 if (exp == 0 && frac != 0) { /* subnormal */
3515 /* Normalize the subnormal. */
3516 while (extract64(frac, frac_size - 1, 1) == 0) {
3517 exp--;
3518 frac <<= 1;
3519 }
3520
3521 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3522 }
3523
3524 int idx = ((exp & 1) << (precision - 1)) |
c45eff30 3525 (frac >> (frac_size - precision + 1));
e848a1e5 3526 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3527 (frac_size - precision);
e848a1e5
FC
3528 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3529
3530 uint64_t val = 0;
3531 val = deposit64(val, 0, frac_size, out_frac);
3532 val = deposit64(val, frac_size, exp_size, out_exp);
3533 val = deposit64(val, frac_size + exp_size, 1, sign);
3534 return val;
3535}
3536
3537static float16 frsqrt7_h(float16 f, float_status *s)
3538{
3539 int exp_size = 5, frac_size = 10;
3540 bool sign = float16_is_neg(f);
3541
3542 /*
3543 * frsqrt7(sNaN) = canonical NaN
3544 * frsqrt7(-inf) = canonical NaN
3545 * frsqrt7(-normal) = canonical NaN
3546 * frsqrt7(-subnormal) = canonical NaN
3547 */
3548 if (float16_is_signaling_nan(f, s) ||
c45eff30
WL
3549 (float16_is_infinity(f) && sign) ||
3550 (float16_is_normal(f) && sign) ||
3551 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
e848a1e5
FC
3552 s->float_exception_flags |= float_flag_invalid;
3553 return float16_default_nan(s);
3554 }
3555
3556 /* frsqrt7(qNaN) = canonical NaN */
3557 if (float16_is_quiet_nan(f, s)) {
3558 return float16_default_nan(s);
3559 }
3560
3561 /* frsqrt7(+-0) = +-inf */
3562 if (float16_is_zero(f)) {
3563 s->float_exception_flags |= float_flag_divbyzero;
3564 return float16_set_sign(float16_infinity, sign);
3565 }
3566
3567 /* frsqrt7(+inf) = +0 */
3568 if (float16_is_infinity(f) && !sign) {
3569 return float16_set_sign(float16_zero, sign);
3570 }
3571
3572 /* +normal, +subnormal */
3573 uint64_t val = frsqrt7(f, exp_size, frac_size);
3574 return make_float16(val);
3575}
3576
3577static float32 frsqrt7_s(float32 f, float_status *s)
3578{
3579 int exp_size = 8, frac_size = 23;
3580 bool sign = float32_is_neg(f);
3581
3582 /*
3583 * frsqrt7(sNaN) = canonical NaN
3584 * frsqrt7(-inf) = canonical NaN
3585 * frsqrt7(-normal) = canonical NaN
3586 * frsqrt7(-subnormal) = canonical NaN
3587 */
3588 if (float32_is_signaling_nan(f, s) ||
c45eff30
WL
3589 (float32_is_infinity(f) && sign) ||
3590 (float32_is_normal(f) && sign) ||
3591 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
e848a1e5
FC
3592 s->float_exception_flags |= float_flag_invalid;
3593 return float32_default_nan(s);
3594 }
3595
3596 /* frsqrt7(qNaN) = canonical NaN */
3597 if (float32_is_quiet_nan(f, s)) {
3598 return float32_default_nan(s);
3599 }
3600
3601 /* frsqrt7(+-0) = +-inf */
3602 if (float32_is_zero(f)) {
3603 s->float_exception_flags |= float_flag_divbyzero;
3604 return float32_set_sign(float32_infinity, sign);
3605 }
3606
3607 /* frsqrt7(+inf) = +0 */
3608 if (float32_is_infinity(f) && !sign) {
3609 return float32_set_sign(float32_zero, sign);
3610 }
3611
3612 /* +normal, +subnormal */
3613 uint64_t val = frsqrt7(f, exp_size, frac_size);
3614 return make_float32(val);
3615}
3616
3617static float64 frsqrt7_d(float64 f, float_status *s)
3618{
3619 int exp_size = 11, frac_size = 52;
3620 bool sign = float64_is_neg(f);
3621
3622 /*
3623 * frsqrt7(sNaN) = canonical NaN
3624 * frsqrt7(-inf) = canonical NaN
3625 * frsqrt7(-normal) = canonical NaN
3626 * frsqrt7(-subnormal) = canonical NaN
3627 */
3628 if (float64_is_signaling_nan(f, s) ||
c45eff30
WL
3629 (float64_is_infinity(f) && sign) ||
3630 (float64_is_normal(f) && sign) ||
3631 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
e848a1e5
FC
3632 s->float_exception_flags |= float_flag_invalid;
3633 return float64_default_nan(s);
3634 }
3635
3636 /* frsqrt7(qNaN) = canonical NaN */
3637 if (float64_is_quiet_nan(f, s)) {
3638 return float64_default_nan(s);
3639 }
3640
3641 /* frsqrt7(+-0) = +-inf */
3642 if (float64_is_zero(f)) {
3643 s->float_exception_flags |= float_flag_divbyzero;
3644 return float64_set_sign(float64_infinity, sign);
3645 }
3646
3647 /* frsqrt7(+inf) = +0 */
3648 if (float64_is_infinity(f) && !sign) {
3649 return float64_set_sign(float64_zero, sign);
3650 }
3651
3652 /* +normal, +subnormal */
3653 uint64_t val = frsqrt7(f, exp_size, frac_size);
3654 return make_float64(val);
3655}
3656
3657RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3658RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3659RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3660GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3661GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3662GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3663
55c35407
FC
3664/*
3665 * Vector Floating-Point Reciprocal Estimate Instruction
3666 *
3667 * Adapted from riscv-v-spec recip.c:
3668 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3669 */
3670static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3671 float_status *s)
3672{
3673 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3674 uint64_t exp = extract64(f, frac_size, exp_size);
3675 uint64_t frac = extract64(f, 0, frac_size);
3676
3677 const uint8_t lookup_table[] = {
3678 127, 125, 123, 121, 119, 117, 116, 114,
3679 112, 110, 109, 107, 105, 104, 102, 100,
3680 99, 97, 96, 94, 93, 91, 90, 88,
3681 87, 85, 84, 83, 81, 80, 79, 77,
3682 76, 75, 74, 72, 71, 70, 69, 68,
3683 66, 65, 64, 63, 62, 61, 60, 59,
3684 58, 57, 56, 55, 54, 53, 52, 51,
3685 50, 49, 48, 47, 46, 45, 44, 43,
3686 42, 41, 40, 40, 39, 38, 37, 36,
3687 35, 35, 34, 33, 32, 31, 31, 30,
3688 29, 28, 28, 27, 26, 25, 25, 24,
3689 23, 23, 22, 21, 21, 20, 19, 19,
3690 18, 17, 17, 16, 15, 15, 14, 14,
3691 13, 12, 12, 11, 11, 10, 9, 9,
3692 8, 8, 7, 7, 6, 5, 5, 4,
3693 4, 3, 3, 2, 2, 1, 1, 0
3694 };
3695 const int precision = 7;
3696
3697 if (exp == 0 && frac != 0) { /* subnormal */
3698 /* Normalize the subnormal. */
3699 while (extract64(frac, frac_size - 1, 1) == 0) {
3700 exp--;
3701 frac <<= 1;
3702 }
3703
3704 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3705
3706 if (exp != 0 && exp != UINT64_MAX) {
3707 /*
3708 * Overflow to inf or max value of same sign,
3709 * depending on sign and rounding mode.
3710 */
3711 s->float_exception_flags |= (float_flag_inexact |
3712 float_flag_overflow);
3713
3714 if ((s->float_rounding_mode == float_round_to_zero) ||
3715 ((s->float_rounding_mode == float_round_down) && !sign) ||
3716 ((s->float_rounding_mode == float_round_up) && sign)) {
3717 /* Return greatest/negative finite value. */
3718 return (sign << (exp_size + frac_size)) |
c45eff30 3719 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
55c35407
FC
3720 } else {
3721 /* Return +-inf. */
3722 return (sign << (exp_size + frac_size)) |
c45eff30 3723 MAKE_64BIT_MASK(frac_size, exp_size);
55c35407
FC
3724 }
3725 }
3726 }
3727
3728 int idx = frac >> (frac_size - precision);
3729 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3730 (frac_size - precision);
55c35407
FC
3731 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3732
3733 if (out_exp == 0 || out_exp == UINT64_MAX) {
3734 /*
3735 * The result is subnormal, but don't raise the underflow exception,
3736 * because there's no additional loss of precision.
3737 */
3738 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3739 if (out_exp == UINT64_MAX) {
3740 out_frac >>= 1;
3741 out_exp = 0;
3742 }
3743 }
3744
3745 uint64_t val = 0;
3746 val = deposit64(val, 0, frac_size, out_frac);
3747 val = deposit64(val, frac_size, exp_size, out_exp);
3748 val = deposit64(val, frac_size + exp_size, 1, sign);
3749 return val;
3750}
3751
3752static float16 frec7_h(float16 f, float_status *s)
3753{
3754 int exp_size = 5, frac_size = 10;
3755 bool sign = float16_is_neg(f);
3756
3757 /* frec7(+-inf) = +-0 */
3758 if (float16_is_infinity(f)) {
3759 return float16_set_sign(float16_zero, sign);
3760 }
3761
3762 /* frec7(+-0) = +-inf */
3763 if (float16_is_zero(f)) {
3764 s->float_exception_flags |= float_flag_divbyzero;
3765 return float16_set_sign(float16_infinity, sign);
3766 }
3767
3768 /* frec7(sNaN) = canonical NaN */
3769 if (float16_is_signaling_nan(f, s)) {
3770 s->float_exception_flags |= float_flag_invalid;
3771 return float16_default_nan(s);
3772 }
3773
3774 /* frec7(qNaN) = canonical NaN */
3775 if (float16_is_quiet_nan(f, s)) {
3776 return float16_default_nan(s);
3777 }
3778
3779 /* +-normal, +-subnormal */
3780 uint64_t val = frec7(f, exp_size, frac_size, s);
3781 return make_float16(val);
3782}
3783
3784static float32 frec7_s(float32 f, float_status *s)
3785{
3786 int exp_size = 8, frac_size = 23;
3787 bool sign = float32_is_neg(f);
3788
3789 /* frec7(+-inf) = +-0 */
3790 if (float32_is_infinity(f)) {
3791 return float32_set_sign(float32_zero, sign);
3792 }
3793
3794 /* frec7(+-0) = +-inf */
3795 if (float32_is_zero(f)) {
3796 s->float_exception_flags |= float_flag_divbyzero;
3797 return float32_set_sign(float32_infinity, sign);
3798 }
3799
3800 /* frec7(sNaN) = canonical NaN */
3801 if (float32_is_signaling_nan(f, s)) {
3802 s->float_exception_flags |= float_flag_invalid;
3803 return float32_default_nan(s);
3804 }
3805
3806 /* frec7(qNaN) = canonical NaN */
3807 if (float32_is_quiet_nan(f, s)) {
3808 return float32_default_nan(s);
3809 }
3810
3811 /* +-normal, +-subnormal */
3812 uint64_t val = frec7(f, exp_size, frac_size, s);
3813 return make_float32(val);
3814}
3815
3816static float64 frec7_d(float64 f, float_status *s)
3817{
3818 int exp_size = 11, frac_size = 52;
3819 bool sign = float64_is_neg(f);
3820
3821 /* frec7(+-inf) = +-0 */
3822 if (float64_is_infinity(f)) {
3823 return float64_set_sign(float64_zero, sign);
3824 }
3825
3826 /* frec7(+-0) = +-inf */
3827 if (float64_is_zero(f)) {
3828 s->float_exception_flags |= float_flag_divbyzero;
3829 return float64_set_sign(float64_infinity, sign);
3830 }
3831
3832 /* frec7(sNaN) = canonical NaN */
3833 if (float64_is_signaling_nan(f, s)) {
3834 s->float_exception_flags |= float_flag_invalid;
3835 return float64_default_nan(s);
3836 }
3837
3838 /* frec7(qNaN) = canonical NaN */
3839 if (float64_is_quiet_nan(f, s)) {
3840 return float64_default_nan(s);
3841 }
3842
3843 /* +-normal, +-subnormal */
3844 uint64_t val = frec7(f, exp_size, frac_size, s);
3845 return make_float64(val);
3846}
3847
3848RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3849RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3850RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 3851GEN_VEXT_V_ENV(vfrec7_v_h, 2)
3852GEN_VEXT_V_ENV(vfrec7_v_w, 4)
3853GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 3854
230b53dd 3855/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
3856RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3857RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3858RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 3859GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
3860GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
3861GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
3862RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3863RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3864RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 3865GEN_VEXT_VF(vfmin_vf_h, 2)
3866GEN_VEXT_VF(vfmin_vf_w, 4)
3867GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 3868
49c5611a
FC
3869RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3870RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3871RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 3872GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
3873GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
3874GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
3875RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3876RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3877RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 3878GEN_VEXT_VF(vfmax_vf_h, 2)
3879GEN_VEXT_VF(vfmax_vf_w, 4)
3880GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
3881
3882/* Vector Floating-Point Sign-Injection Instructions */
3883static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3884{
3885 return deposit64(b, 0, 15, a);
3886}
3887
3888static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3889{
3890 return deposit64(b, 0, 31, a);
3891}
3892
3893static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3894{
3895 return deposit64(b, 0, 63, a);
3896}
3897
3898RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3899RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3900RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 3901GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
3902GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
3903GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
3904RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3905RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3906RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 3907GEN_VEXT_VF(vfsgnj_vf_h, 2)
3908GEN_VEXT_VF(vfsgnj_vf_w, 4)
3909GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
3910
3911static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3912{
3913 return deposit64(~b, 0, 15, a);
3914}
3915
3916static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3917{
3918 return deposit64(~b, 0, 31, a);
3919}
3920
3921static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3922{
3923 return deposit64(~b, 0, 63, a);
3924}
3925
3926RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3927RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3928RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 3929GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
3930GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
3931GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
3932RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3933RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3934RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 3935GEN_VEXT_VF(vfsgnjn_vf_h, 2)
3936GEN_VEXT_VF(vfsgnjn_vf_w, 4)
3937GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
3938
3939static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3940{
3941 return deposit64(b ^ a, 0, 15, a);
3942}
3943
3944static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3945{
3946 return deposit64(b ^ a, 0, 31, a);
3947}
3948
3949static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3950{
3951 return deposit64(b ^ a, 0, 63, a);
3952}
3953
3954RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3955RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3956RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 3957GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
3958GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
3959GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
3960RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3961RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3962RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 3963GEN_VEXT_VF(vfsgnjx_vf_h, 2)
3964GEN_VEXT_VF(vfsgnjx_vf_w, 4)
3965GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
3966
3967/* Vector Floating-Point Compare Instructions */
3968#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
3969void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
3970 CPURISCVState *env, uint32_t desc) \
3971{ \
2a68e9e5
LZ
3972 uint32_t vm = vext_vm(desc); \
3973 uint32_t vl = env->vl; \
86247c51 3974 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5eacf7d8 3975 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 3976 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
3977 uint32_t i; \
3978 \
f714361e 3979 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
3980 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
3981 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 3982 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3983 /* set masked-off elements to 1s */ \
3984 if (vma) { \
3985 vext_set_elem_mask(vd, i, 1); \
3986 } \
2a68e9e5
LZ
3987 continue; \
3988 } \
f9298de5 3989 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
3990 DO_OP(s2, s1, &env->fp_status)); \
3991 } \
f714361e 3992 env->vstart = 0; \
3b57254d
WL
3993 /*
3994 * mask destination register are always tail-agnostic
3995 * set tail elements to 1s
3996 */ \
5eacf7d8 3997 if (vta_all_1s) { \
3998 for (; i < total_elems; i++) { \
3999 vext_set_elem_mask(vd, i, 1); \
4000 } \
4001 } \
2a68e9e5
LZ
4002}
4003
2a68e9e5
LZ
4004GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4005GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4006GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4007
4008#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4009void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4010 CPURISCVState *env, uint32_t desc) \
4011{ \
2a68e9e5
LZ
4012 uint32_t vm = vext_vm(desc); \
4013 uint32_t vl = env->vl; \
86247c51 4014 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
5eacf7d8 4015 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4016 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4017 uint32_t i; \
4018 \
f714361e 4019 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4020 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4021 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4022 /* set masked-off elements to 1s */ \
4023 if (vma) { \
4024 vext_set_elem_mask(vd, i, 1); \
4025 } \
2a68e9e5
LZ
4026 continue; \
4027 } \
f9298de5 4028 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4029 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4030 } \
f714361e 4031 env->vstart = 0; \
3b57254d
WL
4032 /*
4033 * mask destination register are always tail-agnostic
4034 * set tail elements to 1s
4035 */ \
5eacf7d8 4036 if (vta_all_1s) { \
4037 for (; i < total_elems; i++) { \
4038 vext_set_elem_mask(vd, i, 1); \
4039 } \
4040 } \
2a68e9e5
LZ
4041}
4042
4043GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4044GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4045GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4046
4047static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4048{
4049 FloatRelation compare = float16_compare_quiet(a, b, s);
4050 return compare != float_relation_equal;
4051}
4052
4053static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4054{
4055 FloatRelation compare = float32_compare_quiet(a, b, s);
4056 return compare != float_relation_equal;
4057}
4058
4059static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4060{
4061 FloatRelation compare = float64_compare_quiet(a, b, s);
4062 return compare != float_relation_equal;
4063}
4064
4065GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4066GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4067GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4068GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4069GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4070GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4071
2a68e9e5
LZ
4072GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4073GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4074GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4075GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4076GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4077GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4078
2a68e9e5
LZ
4079GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4080GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4081GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4082GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4083GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4084GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4085
4086static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4087{
4088 FloatRelation compare = float16_compare(a, b, s);
4089 return compare == float_relation_greater;
4090}
4091
4092static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4093{
4094 FloatRelation compare = float32_compare(a, b, s);
4095 return compare == float_relation_greater;
4096}
4097
4098static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4099{
4100 FloatRelation compare = float64_compare(a, b, s);
4101 return compare == float_relation_greater;
4102}
4103
4104GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4105GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4106GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4107
4108static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4109{
4110 FloatRelation compare = float16_compare(a, b, s);
4111 return compare == float_relation_greater ||
4112 compare == float_relation_equal;
4113}
4114
4115static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4116{
4117 FloatRelation compare = float32_compare(a, b, s);
4118 return compare == float_relation_greater ||
4119 compare == float_relation_equal;
4120}
4121
4122static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4123{
4124 FloatRelation compare = float64_compare(a, b, s);
4125 return compare == float_relation_greater ||
4126 compare == float_relation_equal;
4127}
4128
4129GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4130GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4131GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4132
121ddbb3 4133/* Vector Floating-Point Classify Instruction */
121ddbb3
LZ
4134target_ulong fclass_h(uint64_t frs1)
4135{
4136 float16 f = frs1;
4137 bool sign = float16_is_neg(f);
4138
4139 if (float16_is_infinity(f)) {
4140 return sign ? 1 << 0 : 1 << 7;
4141 } else if (float16_is_zero(f)) {
4142 return sign ? 1 << 3 : 1 << 4;
4143 } else if (float16_is_zero_or_denormal(f)) {
4144 return sign ? 1 << 2 : 1 << 5;
4145 } else if (float16_is_any_nan(f)) {
4146 float_status s = { }; /* for snan_bit_is_one */
4147 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4148 } else {
4149 return sign ? 1 << 1 : 1 << 6;
4150 }
4151}
4152
4153target_ulong fclass_s(uint64_t frs1)
4154{
4155 float32 f = frs1;
4156 bool sign = float32_is_neg(f);
4157
4158 if (float32_is_infinity(f)) {
4159 return sign ? 1 << 0 : 1 << 7;
4160 } else if (float32_is_zero(f)) {
4161 return sign ? 1 << 3 : 1 << 4;
4162 } else if (float32_is_zero_or_denormal(f)) {
4163 return sign ? 1 << 2 : 1 << 5;
4164 } else if (float32_is_any_nan(f)) {
4165 float_status s = { }; /* for snan_bit_is_one */
4166 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4167 } else {
4168 return sign ? 1 << 1 : 1 << 6;
4169 }
4170}
4171
4172target_ulong fclass_d(uint64_t frs1)
4173{
4174 float64 f = frs1;
4175 bool sign = float64_is_neg(f);
4176
4177 if (float64_is_infinity(f)) {
4178 return sign ? 1 << 0 : 1 << 7;
4179 } else if (float64_is_zero(f)) {
4180 return sign ? 1 << 3 : 1 << 4;
4181 } else if (float64_is_zero_or_denormal(f)) {
4182 return sign ? 1 << 2 : 1 << 5;
4183 } else if (float64_is_any_nan(f)) {
4184 float_status s = { }; /* for snan_bit_is_one */
4185 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4186 } else {
4187 return sign ? 1 << 1 : 1 << 6;
4188 }
4189}
4190
4191RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4192RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4193RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4194GEN_VEXT_V(vfclass_v_h, 2)
4195GEN_VEXT_V(vfclass_v_w, 4)
4196GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4197
4198/* Vector Floating-Point Merge Instruction */
5eacf7d8 4199
3479a814 4200#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4201void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4202 CPURISCVState *env, uint32_t desc) \
4203{ \
64ab5846
LZ
4204 uint32_t vm = vext_vm(desc); \
4205 uint32_t vl = env->vl; \
5eacf7d8 4206 uint32_t esz = sizeof(ETYPE); \
4207 uint32_t total_elems = \
4208 vext_get_total_elems(env, desc, esz); \
4209 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4210 uint32_t i; \
4211 \
f714361e 4212 for (i = env->vstart; i < vl; i++) { \
64ab5846 4213 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
c45eff30
WL
4214 *((ETYPE *)vd + H(i)) = \
4215 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4216 } \
f714361e 4217 env->vstart = 0; \
5eacf7d8 4218 /* set tail elements to 1s */ \
4219 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4220}
4221
3479a814
FC
4222GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4223GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4224GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4225
4226/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4227/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4228RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4229RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4230RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4231GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4232GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4233GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4234
4235/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4236RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4237RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4238RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4239GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4240GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4241GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4242
4243/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4244RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4245RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4246RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4247GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4248GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4249GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4250
4251/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4252RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4253RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4254RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4255GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4256GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4257GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4258
4259/* Widening Floating-Point/Integer Type-Convert Instructions */
4260/* (TD, T2, TX2) */
3ce4c09d 4261#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4262#define WOP_UU_H uint32_t, uint16_t, uint16_t
4263#define WOP_UU_W uint64_t, uint32_t, uint32_t
3b57254d
WL
4264/*
4265 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4266 */
4514b7b1
LZ
4267RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4268RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4269GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4270GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4271
4272/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4273RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4274RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4275GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4276GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1 4277
246f8796
WL
4278/*
4279 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4280 */
3ce4c09d 4281RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4282RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4283RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4284GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4285GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4286GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4287
4288/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4289RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4290RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4291RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4292GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4293GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4294GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4295
4296/*
246f8796 4297 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4514b7b1
LZ
4298 */
4299static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4300{
4301 return float16_to_float32(a, true, s);
4302}
4303
4304RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4305RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4306GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4307GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e 4308
87b27bfc
WL
4309RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4310GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4311
878d406e
LZ
4312/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4313/* (TD, T2, TX2) */
ff679b58 4314#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4315#define NOP_UU_H uint16_t, uint32_t, uint32_t
4316#define NOP_UU_W uint32_t, uint64_t, uint64_t
4317/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4318RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4319RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4320RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4321GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4322GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4323GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4324
4325/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4326RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4327RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4328RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4329GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4330GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4331GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e 4332
246f8796
WL
4333/*
4334 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4335 */
ff679b58
FC
4336RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4337RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4338GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4339GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4340
4341/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4342RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4343RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4344GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4345GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4346
4347/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4348static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4349{
4350 return float32_to_float16(a, true, s);
4351}
4352
ff679b58
FC
4353RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4354RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4355GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4356GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1 4357
87b27bfc
WL
4358RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4359GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4360
fe5c9ab1 4361/*
3b57254d 4362 * Vector Reduction Operations
fe5c9ab1
LZ
4363 */
4364/* Vector Single-Width Integer Reduction Instructions */
3479a814 4365#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1 4366void HELPER(NAME)(void *vd, void *v0, void *vs1, \
c45eff30
WL
4367 void *vs2, CPURISCVState *env, \
4368 uint32_t desc) \
fe5c9ab1 4369{ \
fe5c9ab1
LZ
4370 uint32_t vm = vext_vm(desc); \
4371 uint32_t vl = env->vl; \
df4f52a7 4372 uint32_t esz = sizeof(TD); \
4373 uint32_t vlenb = simd_maxsz(desc); \
4374 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4375 uint32_t i; \
fe5c9ab1
LZ
4376 TD s1 = *((TD *)vs1 + HD(0)); \
4377 \
f714361e 4378 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4379 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4380 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4381 continue; \
4382 } \
4383 s1 = OP(s1, (TD)s2); \
4384 } \
4385 *((TD *)vd + HD(0)) = s1; \
f714361e 4386 env->vstart = 0; \
df4f52a7 4387 /* set tail elements to 1s */ \
4388 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4389}
4390
4391/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4392GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4393GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4394GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4395GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4396
4397/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4398GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4399GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4400GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4401GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4402
4403/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4404GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4405GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4406GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4407GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4408
4409/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4410GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4411GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4412GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4413GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4414
4415/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4416GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4417GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4418GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4419GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4420
4421/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4422GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4423GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4424GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4425GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4426
4427/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4428GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4429GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4430GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4431GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4432
4433/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4434GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4435GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4436GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4437GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4438
4439/* Vector Widening Integer Reduction Instructions */
4440/* signed sum reduction into double-width accumulator */
3479a814
FC
4441GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4442GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4443GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4444
4445/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4446GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4447GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4448GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4449
4450/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4451#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4452void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4453 void *vs2, CPURISCVState *env, \
4454 uint32_t desc) \
4455{ \
523547f1
LZ
4456 uint32_t vm = vext_vm(desc); \
4457 uint32_t vl = env->vl; \
df4f52a7 4458 uint32_t esz = sizeof(TD); \
4459 uint32_t vlenb = simd_maxsz(desc); \
4460 uint32_t vta = vext_vta(desc); \
523547f1 4461 uint32_t i; \
523547f1
LZ
4462 TD s1 = *((TD *)vs1 + HD(0)); \
4463 \
f714361e 4464 for (i = env->vstart; i < vl; i++) { \
523547f1 4465 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4466 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4467 continue; \
4468 } \
4469 s1 = OP(s1, (TD)s2, &env->fp_status); \
4470 } \
4471 *((TD *)vd + HD(0)) = s1; \
f714361e 4472 env->vstart = 0; \
df4f52a7 4473 /* set tail elements to 1s */ \
4474 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4475}
4476
4477/* Unordered sum */
a3ab69f9
YL
4478GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4479GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4480GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4481
4482/* Ordered sum */
4483GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4484GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4485GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4486
4487/* Maximum value */
246f8796
WL
4488GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4489 float16_maximum_number)
4490GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4491 float32_maximum_number)
4492GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4493 float64_maximum_number)
523547f1
LZ
4494
4495/* Minimum value */
246f8796
WL
4496GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4497 float16_minimum_number)
4498GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4499 float32_minimum_number)
4500GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4501 float64_minimum_number)
696b0c26 4502
5bda21c0
YL
4503/* Vector Widening Floating-Point Add Instructions */
4504static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
696b0c26 4505{
5bda21c0 4506 return float32_add(a, float16_to_float32(b, true, s), s);
696b0c26
LZ
4507}
4508
5bda21c0 4509static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
696b0c26 4510{
5bda21c0 4511 return float64_add(a, float32_to_float64(b, s), s);
696b0c26 4512}
c21f34ae 4513
5bda21c0 4514/* Vector Widening Floating-Point Reduction Instructions */
a3ab69f9
YL
4515/* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4516GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4517GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4518GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4519GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
5bda21c0 4520
c21f34ae 4521/*
3b57254d 4522 * Vector Mask Operations
c21f34ae
LZ
4523 */
4524/* Vector Mask-Register Logical Instructions */
4525#define GEN_VEXT_MASK_VV(NAME, OP) \
4526void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4527 void *vs2, CPURISCVState *env, \
4528 uint32_t desc) \
4529{ \
c21f34ae 4530 uint32_t vl = env->vl; \
86247c51 4531 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \
acc6ffd4 4532 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4533 uint32_t i; \
4534 int a, b; \
4535 \
f714361e 4536 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4537 a = vext_elem_mask(vs1, i); \
4538 b = vext_elem_mask(vs2, i); \
4539 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4540 } \
f714361e 4541 env->vstart = 0; \
3b57254d
WL
4542 /*
4543 * mask destination register are always tail-agnostic
4544 * set tail elements to 1s
acc6ffd4 4545 */ \
acc6ffd4 4546 if (vta_all_1s) { \
4547 for (; i < total_elems; i++) { \
4548 vext_set_elem_mask(vd, i, 1); \
4549 } \
4550 } \
c21f34ae
LZ
4551}
4552
4553#define DO_NAND(N, M) (!(N & M))
4554#define DO_ANDNOT(N, M) (N & !M)
4555#define DO_NOR(N, M) (!(N | M))
4556#define DO_ORNOT(N, M) (N | !M)
4557#define DO_XNOR(N, M) (!(N ^ M))
4558
4559GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4560GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4561GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4562GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4563GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4564GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4565GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4566GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4567
0014aa74
FC
4568/* Vector count population in mask vcpop */
4569target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4570 uint32_t desc)
2e88f551
LZ
4571{
4572 target_ulong cnt = 0;
2e88f551
LZ
4573 uint32_t vm = vext_vm(desc);
4574 uint32_t vl = env->vl;
4575 int i;
4576
f714361e 4577 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4578 if (vm || vext_elem_mask(v0, i)) {
4579 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4580 cnt++;
4581 }
4582 }
4583 }
f714361e 4584 env->vstart = 0;
2e88f551
LZ
4585 return cnt;
4586}
0db67e1c 4587
3b57254d 4588/* vfirst find-first-set mask bit */
d71a24fc
FC
4589target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4590 uint32_t desc)
0db67e1c 4591{
0db67e1c
LZ
4592 uint32_t vm = vext_vm(desc);
4593 uint32_t vl = env->vl;
4594 int i;
4595
f714361e 4596 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4597 if (vm || vext_elem_mask(v0, i)) {
4598 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4599 return i;
4600 }
4601 }
4602 }
f714361e 4603 env->vstart = 0;
0db67e1c
LZ
4604 return -1LL;
4605}
81fbf7da
LZ
4606
4607enum set_mask_type {
4608 ONLY_FIRST = 1,
4609 INCLUDE_FIRST,
4610 BEFORE_FIRST,
4611};
4612
4613static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4614 uint32_t desc, enum set_mask_type type)
4615{
81fbf7da
LZ
4616 uint32_t vm = vext_vm(desc);
4617 uint32_t vl = env->vl;
86247c51 4618 uint32_t total_elems = riscv_cpu_cfg(env)->vlen;
acc6ffd4 4619 uint32_t vta_all_1s = vext_vta_all_1s(desc);
35f2d795 4620 uint32_t vma = vext_vma(desc);
81fbf7da
LZ
4621 int i;
4622 bool first_mask_bit = false;
4623
f714361e 4624 for (i = env->vstart; i < vl; i++) {
f9298de5 4625 if (!vm && !vext_elem_mask(v0, i)) {
35f2d795
YTC
4626 /* set masked-off elements to 1s */
4627 if (vma) {
4628 vext_set_elem_mask(vd, i, 1);
4629 }
81fbf7da
LZ
4630 continue;
4631 }
4632 /* write a zero to all following active elements */
4633 if (first_mask_bit) {
f9298de5 4634 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4635 continue;
4636 }
f9298de5 4637 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4638 first_mask_bit = true;
4639 if (type == BEFORE_FIRST) {
f9298de5 4640 vext_set_elem_mask(vd, i, 0);
81fbf7da 4641 } else {
f9298de5 4642 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4643 }
4644 } else {
4645 if (type == ONLY_FIRST) {
f9298de5 4646 vext_set_elem_mask(vd, i, 0);
81fbf7da 4647 } else {
f9298de5 4648 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4649 }
4650 }
4651 }
f714361e 4652 env->vstart = 0;
3b57254d
WL
4653 /*
4654 * mask destination register are always tail-agnostic
4655 * set tail elements to 1s
4656 */
acc6ffd4 4657 if (vta_all_1s) {
4658 for (; i < total_elems; i++) {
4659 vext_set_elem_mask(vd, i, 1);
4660 }
4661 }
81fbf7da
LZ
4662}
4663
4664void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4665 uint32_t desc)
4666{
4667 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4668}
4669
4670void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4671 uint32_t desc)
4672{
4673 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4674}
4675
4676void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4677 uint32_t desc)
4678{
4679 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4680}
78d90cfe
LZ
4681
4682/* Vector Iota Instruction */
3479a814 4683#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4684void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4685 uint32_t desc) \
4686{ \
78d90cfe
LZ
4687 uint32_t vm = vext_vm(desc); \
4688 uint32_t vl = env->vl; \
acc6ffd4 4689 uint32_t esz = sizeof(ETYPE); \
4690 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4691 uint32_t vta = vext_vta(desc); \
35f2d795 4692 uint32_t vma = vext_vma(desc); \
78d90cfe
LZ
4693 uint32_t sum = 0; \
4694 int i; \
4695 \
f714361e 4696 for (i = env->vstart; i < vl; i++) { \
f9298de5 4697 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4698 /* set masked-off elements to 1s */ \
4699 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
78d90cfe
LZ
4700 continue; \
4701 } \
4702 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4703 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4704 sum++; \
4705 } \
4706 } \
f714361e 4707 env->vstart = 0; \
acc6ffd4 4708 /* set tail elements to 1s */ \
4709 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4710}
4711
3479a814
FC
4712GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4713GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4714GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4715GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4716
4717/* Vector Element Index Instruction */
3479a814 4718#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4719void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4720{ \
126bec3f
LZ
4721 uint32_t vm = vext_vm(desc); \
4722 uint32_t vl = env->vl; \
acc6ffd4 4723 uint32_t esz = sizeof(ETYPE); \
4724 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4725 uint32_t vta = vext_vta(desc); \
35f2d795 4726 uint32_t vma = vext_vma(desc); \
126bec3f
LZ
4727 int i; \
4728 \
f714361e 4729 for (i = env->vstart; i < vl; i++) { \
f9298de5 4730 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4731 /* set masked-off elements to 1s */ \
4732 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
126bec3f
LZ
4733 continue; \
4734 } \
4735 *((ETYPE *)vd + H(i)) = i; \
4736 } \
f714361e 4737 env->vstart = 0; \
acc6ffd4 4738 /* set tail elements to 1s */ \
4739 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4740}
4741
3479a814
FC
4742GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4743GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4744GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4745GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4746
4747/*
3b57254d 4748 * Vector Permutation Instructions
ec17e036
LZ
4749 */
4750
4751/* Vector Slide Instructions */
3479a814 4752#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4753void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4754 CPURISCVState *env, uint32_t desc) \
4755{ \
ec17e036
LZ
4756 uint32_t vm = vext_vm(desc); \
4757 uint32_t vl = env->vl; \
803963f7 4758 uint32_t esz = sizeof(ETYPE); \
4759 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4760 uint32_t vta = vext_vta(desc); \
edabcd0e 4761 uint32_t vma = vext_vma(desc); \
f714361e 4762 target_ulong offset = s1, i_min, i; \
ec17e036 4763 \
f714361e
FC
4764 i_min = MAX(env->vstart, offset); \
4765 for (i = i_min; i < vl; i++) { \
f9298de5 4766 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4767 /* set masked-off elements to 1s */ \
4768 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
ec17e036
LZ
4769 continue; \
4770 } \
4771 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4772 } \
803963f7 4773 /* set tail elements to 1s */ \
4774 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4775}
4776
4777/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4778GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4779GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4780GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4781GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4782
3479a814 4783#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4784void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4785 CPURISCVState *env, uint32_t desc) \
4786{ \
6438ed61 4787 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4788 uint32_t vm = vext_vm(desc); \
4789 uint32_t vl = env->vl; \
803963f7 4790 uint32_t esz = sizeof(ETYPE); \
4791 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4792 uint32_t vta = vext_vta(desc); \
edabcd0e 4793 uint32_t vma = vext_vma(desc); \
f3f65c40 4794 target_ulong i_max, i_min, i; \
ec17e036 4795 \
f3f65c40
AF
4796 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
4797 i_max = MAX(i_min, env->vstart); \
f714361e 4798 for (i = env->vstart; i < i_max; ++i) { \
edabcd0e
YTC
4799 if (!vm && !vext_elem_mask(v0, i)) { \
4800 /* set masked-off elements to 1s */ \
4801 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4802 continue; \
6438ed61 4803 } \
edabcd0e 4804 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
6438ed61
FC
4805 } \
4806 \
4807 for (i = i_max; i < vl; ++i) { \
4808 if (vm || vext_elem_mask(v0, i)) { \
4809 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4810 } \
ec17e036 4811 } \
f714361e
FC
4812 \
4813 env->vstart = 0; \
803963f7 4814 /* set tail elements to 1s */ \
4815 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4816}
4817
4818/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4819GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4820GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4821GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4822GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4823
c7b8a421 4824#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
8c89d50c 4825static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
4826 void *vs2, CPURISCVState *env, \
4827 uint32_t desc) \
8500d4ab 4828{ \
c7b8a421 4829 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4830 uint32_t vm = vext_vm(desc); \
4831 uint32_t vl = env->vl; \
803963f7 4832 uint32_t esz = sizeof(ETYPE); \
4833 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4834 uint32_t vta = vext_vta(desc); \
edabcd0e 4835 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
4836 uint32_t i; \
4837 \
f714361e 4838 for (i = env->vstart; i < vl; i++) { \
8500d4ab 4839 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4840 /* set masked-off elements to 1s */ \
4841 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
4842 continue; \
4843 } \
4844 if (i == 0) { \
4845 *((ETYPE *)vd + H(i)) = s1; \
4846 } else { \
4847 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4848 } \
4849 } \
f714361e 4850 env->vstart = 0; \
803963f7 4851 /* set tail elements to 1s */ \
4852 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
4853}
4854
4855GEN_VEXT_VSLIE1UP(8, H1)
4856GEN_VEXT_VSLIE1UP(16, H2)
4857GEN_VEXT_VSLIE1UP(32, H4)
4858GEN_VEXT_VSLIE1UP(64, H8)
4859
c7b8a421 4860#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
4861void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4862 CPURISCVState *env, uint32_t desc) \
4863{ \
c7b8a421 4864 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4865}
4866
4867/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
4868GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4869GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4870GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4871GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4872
c7b8a421 4873#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
8c89d50c 4874static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
4875 void *vs2, CPURISCVState *env, \
4876 uint32_t desc) \
8500d4ab 4877{ \
c7b8a421 4878 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4879 uint32_t vm = vext_vm(desc); \
4880 uint32_t vl = env->vl; \
803963f7 4881 uint32_t esz = sizeof(ETYPE); \
4882 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4883 uint32_t vta = vext_vta(desc); \
edabcd0e 4884 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
4885 uint32_t i; \
4886 \
f714361e 4887 for (i = env->vstart; i < vl; i++) { \
8500d4ab 4888 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4889 /* set masked-off elements to 1s */ \
4890 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
4891 continue; \
4892 } \
4893 if (i == vl - 1) { \
4894 *((ETYPE *)vd + H(i)) = s1; \
4895 } else { \
4896 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4897 } \
4898 } \
f714361e 4899 env->vstart = 0; \
803963f7 4900 /* set tail elements to 1s */ \
4901 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
4902}
4903
4904GEN_VEXT_VSLIDE1DOWN(8, H1)
4905GEN_VEXT_VSLIDE1DOWN(16, H2)
4906GEN_VEXT_VSLIDE1DOWN(32, H4)
4907GEN_VEXT_VSLIDE1DOWN(64, H8)
4908
c7b8a421 4909#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
4910void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4911 CPURISCVState *env, uint32_t desc) \
4912{ \
c7b8a421 4913 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4914}
4915
4916/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
4917GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4918GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4919GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4920GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4921
4922/* Vector Floating-Point Slide Instructions */
c7b8a421 4923#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
4924void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4925 CPURISCVState *env, uint32_t desc) \
4926{ \
c7b8a421 4927 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
4928}
4929
4930/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4931GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4932GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4933GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4934
c7b8a421 4935#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
4936void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4937 CPURISCVState *env, uint32_t desc) \
4938{ \
c7b8a421 4939 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
4940}
4941
4942/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4943GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4944GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4945GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
4946
4947/* Vector Register Gather Instruction */
50bfb45b 4948#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
4949void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4950 CPURISCVState *env, uint32_t desc) \
4951{ \
f714361e 4952 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
4953 uint32_t vm = vext_vm(desc); \
4954 uint32_t vl = env->vl; \
803963f7 4955 uint32_t esz = sizeof(TS2); \
4956 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4957 uint32_t vta = vext_vta(desc); \
edabcd0e 4958 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
4959 uint64_t index; \
4960 uint32_t i; \
e4b83d5c 4961 \
f714361e 4962 for (i = env->vstart; i < vl; i++) { \
f9298de5 4963 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4964 /* set masked-off elements to 1s */ \
4965 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
4966 continue; \
4967 } \
50bfb45b 4968 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 4969 if (index >= vlmax) { \
50bfb45b 4970 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 4971 } else { \
50bfb45b 4972 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
4973 } \
4974 } \
f714361e 4975 env->vstart = 0; \
803963f7 4976 /* set tail elements to 1s */ \
4977 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
4978}
4979
4980/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
4981GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4982GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4983GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4984GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4985
4986GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
4987GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4988GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4989GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 4990
3479a814 4991#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
4992void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4993 CPURISCVState *env, uint32_t desc) \
4994{ \
5a9f8e15 4995 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
4996 uint32_t vm = vext_vm(desc); \
4997 uint32_t vl = env->vl; \
803963f7 4998 uint32_t esz = sizeof(ETYPE); \
4999 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5000 uint32_t vta = vext_vta(desc); \
edabcd0e 5001 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5002 uint64_t index = s1; \
5003 uint32_t i; \
e4b83d5c 5004 \
f714361e 5005 for (i = env->vstart; i < vl; i++) { \
f9298de5 5006 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5007 /* set masked-off elements to 1s */ \
5008 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5009 continue; \
5010 } \
5011 if (index >= vlmax) { \
5012 *((ETYPE *)vd + H(i)) = 0; \
5013 } else { \
5014 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5015 } \
5016 } \
f714361e 5017 env->vstart = 0; \
803963f7 5018 /* set tail elements to 1s */ \
5019 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5020}
5021
5022/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5023GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5024GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5025GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5026GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5027
5028/* Vector Compress Instruction */
3479a814 5029#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5030void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5031 CPURISCVState *env, uint32_t desc) \
5032{ \
31bf42a2 5033 uint32_t vl = env->vl; \
803963f7 5034 uint32_t esz = sizeof(ETYPE); \
5035 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5036 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5037 uint32_t num = 0, i; \
5038 \
f714361e 5039 for (i = env->vstart; i < vl; i++) { \
f9298de5 5040 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5041 continue; \
5042 } \
5043 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5044 num++; \
5045 } \
f714361e 5046 env->vstart = 0; \
803963f7 5047 /* set tail elements to 1s */ \
5048 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5049}
5050
5051/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5052GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5053GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5054GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5055GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5056
f714361e 5057/* Vector Whole Register Move */
f32d82f6
WL
5058void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5059{
f06193c4 5060 /* EEW = SEW */
f32d82f6 5061 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5062 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5063 uint32_t startb = env->vstart * sewb;
5064 uint32_t i = startb;
f32d82f6
WL
5065
5066 memcpy((uint8_t *)vd + H1(i),
5067 (uint8_t *)vs2 + H1(i),
f06193c4 5068 maxsz - startb);
f714361e 5069
f32d82f6
WL
5070 env->vstart = 0;
5071}
f714361e 5072
cd01340e
FC
5073/* Vector Integer Extension */
5074#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5075void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5076 CPURISCVState *env, uint32_t desc) \
5077{ \
5078 uint32_t vl = env->vl; \
5079 uint32_t vm = vext_vm(desc); \
803963f7 5080 uint32_t esz = sizeof(ETYPE); \
5081 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5082 uint32_t vta = vext_vta(desc); \
edabcd0e 5083 uint32_t vma = vext_vma(desc); \
cd01340e
FC
5084 uint32_t i; \
5085 \
f714361e 5086 for (i = env->vstart; i < vl; i++) { \
cd01340e 5087 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5088 /* set masked-off elements to 1s */ \
5089 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
cd01340e
FC
5090 continue; \
5091 } \
5092 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5093 } \
f714361e 5094 env->vstart = 0; \
803963f7 5095 /* set tail elements to 1s */ \
5096 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5097}
5098
5099GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5100GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5101GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5102GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5103GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5104GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5105
5106GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5107GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5108GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5109GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5110GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5111GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)