]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
Merge tag 'pull-maintainer-may24-160524-2' of https://gitlab.com/stsquad/qemu into...
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc 24#include "exec/exec-all.h"
09b07f28 25#include "exec/cpu_ldst.h"
74781c08 26#include "exec/page-protection.h"
2b7168fc 27#include "exec/helper-proto.h"
ce2a0343 28#include "fpu/softfloat.h"
751538d5
LZ
29#include "tcg/tcg-gvec-desc.h"
30#include "internals.h"
98f40dd2 31#include "vector_internals.h"
2b7168fc
LZ
32#include <math.h>
33
34target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
35 target_ulong s2)
36{
37 int vlmax, vl;
38 RISCVCPU *cpu = env_archcpu(env);
cd21576d
DHB
39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
41 uint16_t sew = 8 << vsew;
2b7168fc 42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
43 int xlen = riscv_cpu_xlen(env);
44 bool vill = (s2 >> (xlen - 1)) & 0x1;
45 target_ulong reserved = s2 &
46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
47 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
938dd05e 48 uint16_t vlen = cpu->cfg.vlenb << 3;
cd21576d 49 int8_t lmul;
2b7168fc 50
cd21576d 51 if (vlmul & 4) {
7aa4d519
DHB
52 /*
53 * Fractional LMUL, check:
54 *
55 * VLEN * LMUL >= SEW
56 * VLEN >> (8 - lmul) >= sew
57 * (vlenb << 3) >> (8 - lmul) >= sew
7aa4d519 58 */
938dd05e 59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
d9b7609a
FC
60 vill = true;
61 }
62 }
63
c45eff30 64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
2b7168fc 65 /* only set vill bit. */
d96a271a
LZ
66 env->vill = 1;
67 env->vtype = 0;
2b7168fc
LZ
68 env->vl = 0;
69 env->vstart = 0;
70 return 0;
71 }
72
cd21576d
DHB
73 /* lmul encoded as in DisasContext::lmul */
74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
2b7168fc
LZ
76 if (s1 <= vlmax) {
77 vl = s1;
78 } else {
79 vl = vlmax;
80 }
81 env->vl = vl;
82 env->vtype = s2;
83 env->vstart = 0;
ac6bcf4d 84 env->vill = 0;
2b7168fc
LZ
85 return vl;
86}
751538d5 87
751538d5 88/*
5a9f8e15 89 * Get the maximum number of elements can be operated.
751538d5 90 *
c7b8a421 91 * log2_esz: log2 of element size in bytes.
751538d5 92 */
c7b8a421 93static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 94{
5a9f8e15 95 /*
8a4b5257 96 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
97 * so vlen in bytes (vlenb) is encoded as maxsz.
98 */
99 uint32_t vlenb = simd_maxsz(desc);
100
101 /* Return VLMAX */
c7b8a421 102 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 103 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
104}
105
d6b9d930
LZ
106static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
107{
7b945bdc 108 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
d6b9d930
LZ
109}
110
751538d5
LZ
111/*
112 * This function checks watchpoint before real load operation.
113 *
7893e42d 114 * In system mode, the TLB API probe_access is enough for watchpoint check.
751538d5
LZ
115 * In user mode, there is no watchpoint support now.
116 *
117 * It will trigger an exception if there is no mapping in TLB
118 * and page table walk can't fill the TLB entry. Then the guest
119 * software can return here after process the exception or never return.
120 */
121static void probe_pages(CPURISCVState *env, target_ulong addr,
122 target_ulong len, uintptr_t ra,
123 MMUAccessType access_type)
124{
125 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
126 target_ulong curlen = MIN(pagelen, len);
d9996d09 127 int mmu_index = riscv_env_mmu_index(env, false);
751538d5 128
d6b9d930 129 probe_access(env, adjust_addr(env, addr), curlen, access_type,
d9996d09 130 mmu_index, ra);
751538d5
LZ
131 if (len > curlen) {
132 addr += curlen;
133 curlen = len - curlen;
d6b9d930 134 probe_access(env, adjust_addr(env, addr), curlen, access_type,
d9996d09 135 mmu_index, ra);
751538d5
LZ
136 }
137}
138
f9298de5
FC
139static inline void vext_set_elem_mask(void *v0, int index,
140 uint8_t value)
3a6f8f68 141{
f9298de5
FC
142 int idx = index / 64;
143 int pos = index % 64;
3a6f8f68 144 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 145 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 146}
751538d5 147
751538d5 148/* elements operations for load and store */
022b9bce 149typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
751538d5 150 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 151
79556fb6 152#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
153static void NAME(CPURISCVState *env, abi_ptr addr, \
154 uint32_t idx, void *vd, uintptr_t retaddr)\
155{ \
751538d5 156 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 157 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
158} \
159
79556fb6
FC
160GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
161GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
162GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
163GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
164
165#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
166static void NAME(CPURISCVState *env, abi_ptr addr, \
167 uint32_t idx, void *vd, uintptr_t retaddr)\
168{ \
169 ETYPE data = *((ETYPE *)vd + H(idx)); \
170 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
171}
172
751538d5
LZ
173GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
174GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
175GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
176GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
177
949b6bcb
XW
178static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
179 uint32_t desc, uint32_t nf,
e130683f
DHB
180 uint32_t esz, uint32_t max_elems)
181{
e130683f 182 uint32_t vta = vext_vta(desc);
e130683f
DHB
183 int k;
184
bc0ec52e
DHB
185 if (vta == 0) {
186 return;
187 }
188
e130683f
DHB
189 for (k = 0; k < nf; ++k) {
190 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
191 (k * max_elems + max_elems) * esz);
192 }
e130683f
DHB
193}
194
751538d5 195/*
3b57254d 196 * stride: access vector element from strided memory
751538d5
LZ
197 */
198static void
199vext_ldst_stride(void *vd, void *v0, target_ulong base,
200 target_ulong stride, CPURISCVState *env,
201 uint32_t desc, uint32_t vm,
3479a814 202 vext_ldst_elem_fn *ldst_elem,
c7b8a421 203 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
204{
205 uint32_t i, k;
206 uint32_t nf = vext_nf(desc);
c7b8a421 207 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 208 uint32_t esz = 1 << log2_esz;
265ecd4c 209 uint32_t vma = vext_vma(desc);
751538d5 210
df4252b2
DHB
211 VSTART_CHECK_EARLY_EXIT(env);
212
0a11629c 213 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
751538d5 214 k = 0;
751538d5 215 while (k < nf) {
265ecd4c
YTC
216 if (!vm && !vext_elem_mask(v0, i)) {
217 /* set masked-off elements to 1s */
218 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
219 (i + k * max_elems + 1) * esz);
220 k++;
221 continue;
222 }
c7b8a421 223 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 224 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
225 k++;
226 }
227 }
f714361e 228 env->vstart = 0;
e130683f 229
949b6bcb 230 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
751538d5
LZ
231}
232
79556fb6 233#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
234void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
235 target_ulong stride, CPURISCVState *env, \
236 uint32_t desc) \
237{ \
238 uint32_t vm = vext_vm(desc); \
239 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 240 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
241}
242
79556fb6
FC
243GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
244GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
245GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
246GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
247
248#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
249void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
250 target_ulong stride, CPURISCVState *env, \
251 uint32_t desc) \
252{ \
253 uint32_t vm = vext_vm(desc); \
254 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 255 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
256}
257
79556fb6
FC
258GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
259GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
260GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
261GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
262
263/*
3b57254d 264 * unit-stride: access elements stored contiguously in memory
751538d5
LZ
265 */
266
3b57254d 267/* unmasked unit-stride load and store operation */
751538d5
LZ
268static void
269vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 270 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 271 uintptr_t ra)
751538d5
LZ
272{
273 uint32_t i, k;
274 uint32_t nf = vext_nf(desc);
c7b8a421 275 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 276 uint32_t esz = 1 << log2_esz;
751538d5 277
df4252b2
DHB
278 VSTART_CHECK_EARLY_EXIT(env);
279
751538d5 280 /* load bytes from guest memory */
0a11629c 281 for (i = env->vstart; i < evl; env->vstart = ++i) {
751538d5
LZ
282 k = 0;
283 while (k < nf) {
c7b8a421 284 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 285 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
286 k++;
287 }
288 }
f714361e 289 env->vstart = 0;
e130683f 290
949b6bcb 291 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
751538d5
LZ
292}
293
294/*
246f8796 295 * masked unit-stride load and store operation will be a special case of
fba59e0f 296 * stride, stride = NF * sizeof (ETYPE)
751538d5
LZ
297 */
298
79556fb6 299#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
300void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
301 CPURISCVState *env, uint32_t desc) \
302{ \
5a9f8e15 303 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 304 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 305 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
306} \
307 \
308void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
309 CPURISCVState *env, uint32_t desc) \
310{ \
3479a814 311 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 312 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
313}
314
79556fb6
FC
315GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
316GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
317GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
318GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
319
5c89e9c0
FC
320#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
321void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
322 CPURISCVState *env, uint32_t desc) \
323{ \
324 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
325 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 326 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
327} \
328 \
329void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
330 CPURISCVState *env, uint32_t desc) \
331{ \
332 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 333 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
334}
335
79556fb6
FC
336GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
337GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
338GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
339GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 340
26086aea 341/*
3b57254d 342 * unit stride mask load and store, EEW = 1
26086aea
FC
343 */
344void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
345 CPURISCVState *env, uint32_t desc)
346{
347 /* evl = ceil(vl/8) */
348 uint8_t evl = (env->vl + 7) >> 3;
349 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 350 0, evl, GETPC());
26086aea
FC
351}
352
353void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
354 CPURISCVState *env, uint32_t desc)
355{
356 /* evl = ceil(vl/8) */
357 uint8_t evl = (env->vl + 7) >> 3;
358 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 359 0, evl, GETPC());
26086aea
FC
360}
361
f732560e 362/*
3b57254d 363 * index: access vector element from indexed memory
f732560e
LZ
364 */
365typedef target_ulong vext_get_index_addr(target_ulong base,
366 uint32_t idx, void *vs2);
367
368#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
369static target_ulong NAME(target_ulong base, \
370 uint32_t idx, void *vs2) \
371{ \
372 return (base + *((ETYPE *)vs2 + H(idx))); \
373}
374
83fcd573
FC
375GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
376GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
377GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
378GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
379
380static inline void
381vext_ldst_index(void *vd, void *v0, target_ulong base,
382 void *vs2, CPURISCVState *env, uint32_t desc,
383 vext_get_index_addr get_index_addr,
384 vext_ldst_elem_fn *ldst_elem,
c7b8a421 385 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
386{
387 uint32_t i, k;
388 uint32_t nf = vext_nf(desc);
389 uint32_t vm = vext_vm(desc);
c7b8a421 390 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 391 uint32_t esz = 1 << log2_esz;
265ecd4c 392 uint32_t vma = vext_vma(desc);
f732560e 393
df4252b2
DHB
394 VSTART_CHECK_EARLY_EXIT(env);
395
f732560e 396 /* load bytes from guest memory */
0a11629c 397 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
f714361e 398 k = 0;
f732560e 399 while (k < nf) {
265ecd4c
YTC
400 if (!vm && !vext_elem_mask(v0, i)) {
401 /* set masked-off elements to 1s */
402 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
403 (i + k * max_elems + 1) * esz);
404 k++;
405 continue;
406 }
c7b8a421 407 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 408 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
409 k++;
410 }
411 }
f714361e 412 env->vstart = 0;
e130683f 413
949b6bcb 414 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
f732560e
LZ
415}
416
08b9d0ed 417#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
418void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
419 void *vs2, CPURISCVState *env, uint32_t desc) \
420{ \
421 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 422 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
423}
424
08b9d0ed
FC
425GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
426GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
427GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
428GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
429GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
430GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
431GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
432GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
433GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
434GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
435GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
436GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
437GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
438GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
439GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
440GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
441
442#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
443void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
444 void *vs2, CPURISCVState *env, uint32_t desc) \
445{ \
446 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 447 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 448 GETPC()); \
f732560e
LZ
449}
450
08b9d0ed
FC
451GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
452GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
453GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
454GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
455GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
456GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
457GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
458GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
459GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
460GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
461GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
462GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
463GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
464GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
465GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
466GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
467
468/*
3b57254d 469 * unit-stride fault-only-fisrt load instructions
022b4ecf
LZ
470 */
471static inline void
472vext_ldff(void *vd, void *v0, target_ulong base,
473 CPURISCVState *env, uint32_t desc,
474 vext_ldst_elem_fn *ldst_elem,
c7b8a421 475 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
476{
477 void *host;
478 uint32_t i, k, vl = 0;
022b4ecf
LZ
479 uint32_t nf = vext_nf(desc);
480 uint32_t vm = vext_vm(desc);
c7b8a421 481 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 482 uint32_t esz = 1 << log2_esz;
265ecd4c 483 uint32_t vma = vext_vma(desc);
022b4ecf 484 target_ulong addr, offset, remain;
d9996d09 485 int mmu_index = riscv_env_mmu_index(env, false);
022b4ecf 486
df4252b2
DHB
487 VSTART_CHECK_EARLY_EXIT(env);
488
3b57254d 489 /* probe every access */
f714361e 490 for (i = env->vstart; i < env->vl; i++) {
f9298de5 491 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
492 continue;
493 }
c7b8a421 494 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 495 if (i == 0) {
c7b8a421 496 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
497 } else {
498 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 499 remain = nf << log2_esz;
022b4ecf
LZ
500 while (remain > 0) {
501 offset = -(addr | TARGET_PAGE_MASK);
d9996d09 502 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
022b4ecf
LZ
503 if (host) {
504#ifdef CONFIG_USER_ONLY
4cc9f284 505 if (!page_check_range(addr, offset, PAGE_READ)) {
022b4ecf
LZ
506 vl = i;
507 goto ProbeSuccess;
508 }
509#else
01d09525 510 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
511#endif
512 } else {
513 vl = i;
514 goto ProbeSuccess;
515 }
516 if (remain <= offset) {
517 break;
518 }
519 remain -= offset;
d6b9d930 520 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
521 }
522 }
523 }
524ProbeSuccess:
525 /* load bytes from guest memory */
526 if (vl != 0) {
527 env->vl = vl;
528 }
f714361e 529 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 530 k = 0;
022b4ecf 531 while (k < nf) {
265ecd4c
YTC
532 if (!vm && !vext_elem_mask(v0, i)) {
533 /* set masked-off elements to 1s */
534 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
535 (i + k * max_elems + 1) * esz);
536 k++;
537 continue;
538 }
f3f65c40 539 addr = base + ((i * nf + k) << log2_esz);
d6b9d930 540 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
541 k++;
542 }
543 }
f714361e 544 env->vstart = 0;
e130683f 545
949b6bcb 546 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
022b4ecf
LZ
547}
548
d3e5e2ff
FC
549#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
550void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
551 CPURISCVState *env, uint32_t desc) \
552{ \
553 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 554 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
555}
556
d3e5e2ff
FC
557GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
558GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
559GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
560GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 561
268fcca6
LZ
562#define DO_SWAP(N, M) (M)
563#define DO_AND(N, M) (N & M)
564#define DO_XOR(N, M) (N ^ M)
565#define DO_OR(N, M) (N | M)
566#define DO_ADD(N, M) (N + M)
567
268fcca6
LZ
568/* Signed min/max */
569#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
570#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
571
30206bd8 572/*
3b57254d 573 * load and store whole register instructions
30206bd8
FC
574 */
575static void
576vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 577 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 578{
f714361e 579 uint32_t i, k, off, pos;
30206bd8 580 uint32_t nf = vext_nf(desc);
58bc9063 581 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
c7b8a421 582 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 583
929e521a
DHB
584 if (env->vstart >= ((vlenb * nf) >> log2_esz)) {
585 env->vstart = 0;
586 return;
587 }
588
f714361e
FC
589 k = env->vstart / max_elems;
590 off = env->vstart % max_elems;
30206bd8 591
f714361e
FC
592 if (off) {
593 /* load/store rest of elements of current segment pointed by vstart */
594 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 595 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
246f8796
WL
596 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd,
597 ra);
f714361e
FC
598 }
599 k++;
600 }
601
602 /* load/store elements for rest of segments */
603 for (; k < nf; k++) {
604 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 605 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 606 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
607 }
608 }
f714361e
FC
609
610 env->vstart = 0;
30206bd8
FC
611}
612
613#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
614void HELPER(NAME)(void *vd, target_ulong base, \
615 CPURISCVState *env, uint32_t desc) \
616{ \
617 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 618 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
619}
620
621GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
622GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
623GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
624GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
625GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
626GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
627GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
628GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
629GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
630GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
631GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
632GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
633GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
634GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
635GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
636GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
637
638#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
639void HELPER(NAME)(void *vd, target_ulong base, \
640 CPURISCVState *env, uint32_t desc) \
641{ \
642 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 643 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
644}
645
646GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
647GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
648GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
649GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
650
43740e3a 651/*
3b57254d 652 * Vector Integer Arithmetic Instructions
43740e3a
LZ
653 */
654
43740e3a
LZ
655/* (TD, T1, T2, TX1, TX2) */
656#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
657#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
658#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
659#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
958b85f3
LZ
660#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
661#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
662#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
663#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
664#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
665#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
666#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
667#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
668#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
669#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
670#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
671#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
672#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
673#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
674#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
675#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
676#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
677#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
678#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a 679
43740e3a
LZ
680#define DO_SUB(N, M) (N - M)
681#define DO_RSUB(N, M) (M - N)
682
683RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
684RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
685RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
686RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
687RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
688RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
689RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
690RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
691
f1eed927 692GEN_VEXT_VV(vadd_vv_b, 1)
693GEN_VEXT_VV(vadd_vv_h, 2)
694GEN_VEXT_VV(vadd_vv_w, 4)
695GEN_VEXT_VV(vadd_vv_d, 8)
696GEN_VEXT_VV(vsub_vv_b, 1)
697GEN_VEXT_VV(vsub_vv_h, 2)
698GEN_VEXT_VV(vsub_vv_w, 4)
699GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a 700
43740e3a
LZ
701
702RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
703RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
704RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
705RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
706RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
707RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
708RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
709RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
710RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
711RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
712RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
713RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
714
5c19fc15 715GEN_VEXT_VX(vadd_vx_b, 1)
716GEN_VEXT_VX(vadd_vx_h, 2)
717GEN_VEXT_VX(vadd_vx_w, 4)
718GEN_VEXT_VX(vadd_vx_d, 8)
719GEN_VEXT_VX(vsub_vx_b, 1)
720GEN_VEXT_VX(vsub_vx_h, 2)
721GEN_VEXT_VX(vsub_vx_w, 4)
722GEN_VEXT_VX(vsub_vx_d, 8)
723GEN_VEXT_VX(vrsub_vx_b, 1)
724GEN_VEXT_VX(vrsub_vx_h, 2)
725GEN_VEXT_VX(vrsub_vx_w, 4)
726GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
727
728void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
729{
730 intptr_t oprsz = simd_oprsz(desc);
731 intptr_t i;
732
733 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
734 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
735 }
736}
737
738void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
739{
740 intptr_t oprsz = simd_oprsz(desc);
741 intptr_t i;
742
743 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
744 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
745 }
746}
747
748void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
749{
750 intptr_t oprsz = simd_oprsz(desc);
751 intptr_t i;
752
753 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
754 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
755 }
756}
757
758void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
759{
760 intptr_t oprsz = simd_oprsz(desc);
761 intptr_t i;
762
763 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
764 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
765 }
766}
8fcdf776
LZ
767
768/* Vector Widening Integer Add/Subtract */
769#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
770#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
771#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
772#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
773#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
774#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
775#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
776#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
777#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
778#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
779#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
780#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
781RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
782RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
783RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
784RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
785RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
786RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
787RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
788RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
789RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
790RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
791RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
792RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
793RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
794RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
795RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
796RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
797RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
798RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
799RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
800RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
801RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
802RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
803RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
804RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 805GEN_VEXT_VV(vwaddu_vv_b, 2)
806GEN_VEXT_VV(vwaddu_vv_h, 4)
807GEN_VEXT_VV(vwaddu_vv_w, 8)
808GEN_VEXT_VV(vwsubu_vv_b, 2)
809GEN_VEXT_VV(vwsubu_vv_h, 4)
810GEN_VEXT_VV(vwsubu_vv_w, 8)
811GEN_VEXT_VV(vwadd_vv_b, 2)
812GEN_VEXT_VV(vwadd_vv_h, 4)
813GEN_VEXT_VV(vwadd_vv_w, 8)
814GEN_VEXT_VV(vwsub_vv_b, 2)
815GEN_VEXT_VV(vwsub_vv_h, 4)
816GEN_VEXT_VV(vwsub_vv_w, 8)
817GEN_VEXT_VV(vwaddu_wv_b, 2)
818GEN_VEXT_VV(vwaddu_wv_h, 4)
819GEN_VEXT_VV(vwaddu_wv_w, 8)
820GEN_VEXT_VV(vwsubu_wv_b, 2)
821GEN_VEXT_VV(vwsubu_wv_h, 4)
822GEN_VEXT_VV(vwsubu_wv_w, 8)
823GEN_VEXT_VV(vwadd_wv_b, 2)
824GEN_VEXT_VV(vwadd_wv_h, 4)
825GEN_VEXT_VV(vwadd_wv_w, 8)
826GEN_VEXT_VV(vwsub_wv_b, 2)
827GEN_VEXT_VV(vwsub_wv_h, 4)
828GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
829
830RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
831RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
832RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
833RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
834RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
835RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
836RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
837RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
838RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
839RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
840RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
841RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
842RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
843RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
844RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
845RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
846RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
847RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
848RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
849RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
850RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
851RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
852RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
853RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 854GEN_VEXT_VX(vwaddu_vx_b, 2)
855GEN_VEXT_VX(vwaddu_vx_h, 4)
856GEN_VEXT_VX(vwaddu_vx_w, 8)
857GEN_VEXT_VX(vwsubu_vx_b, 2)
858GEN_VEXT_VX(vwsubu_vx_h, 4)
859GEN_VEXT_VX(vwsubu_vx_w, 8)
860GEN_VEXT_VX(vwadd_vx_b, 2)
861GEN_VEXT_VX(vwadd_vx_h, 4)
862GEN_VEXT_VX(vwadd_vx_w, 8)
863GEN_VEXT_VX(vwsub_vx_b, 2)
864GEN_VEXT_VX(vwsub_vx_h, 4)
865GEN_VEXT_VX(vwsub_vx_w, 8)
866GEN_VEXT_VX(vwaddu_wx_b, 2)
867GEN_VEXT_VX(vwaddu_wx_h, 4)
868GEN_VEXT_VX(vwaddu_wx_w, 8)
869GEN_VEXT_VX(vwsubu_wx_b, 2)
870GEN_VEXT_VX(vwsubu_wx_h, 4)
871GEN_VEXT_VX(vwsubu_wx_w, 8)
872GEN_VEXT_VX(vwadd_wx_b, 2)
873GEN_VEXT_VX(vwadd_wx_h, 4)
874GEN_VEXT_VX(vwadd_wx_w, 8)
875GEN_VEXT_VX(vwsub_wx_b, 2)
876GEN_VEXT_VX(vwsub_wx_h, 4)
877GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
878
879/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
880#define DO_VADC(N, M, C) (N + M + C)
881#define DO_VSBC(N, M, C) (N - M - C)
882
3479a814 883#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
884void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
885 CPURISCVState *env, uint32_t desc) \
886{ \
3a6f8f68 887 uint32_t vl = env->vl; \
5c19fc15 888 uint32_t esz = sizeof(ETYPE); \
889 uint32_t total_elems = \
890 vext_get_total_elems(env, desc, esz); \
891 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
892 uint32_t i; \
893 \
df4252b2
DHB
894 VSTART_CHECK_EARLY_EXIT(env); \
895 \
f714361e 896 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
897 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
898 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 899 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
900 \
901 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
902 } \
f714361e 903 env->vstart = 0; \
5c19fc15 904 /* set tail elements to 1s */ \
905 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
906}
907
3479a814
FC
908GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
909GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
910GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
911GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 912
3479a814
FC
913GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
914GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
915GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
916GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 917
3479a814 918#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
919void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
920 CPURISCVState *env, uint32_t desc) \
921{ \
3a6f8f68 922 uint32_t vl = env->vl; \
5c19fc15 923 uint32_t esz = sizeof(ETYPE); \
924 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
925 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
926 uint32_t i; \
927 \
df4252b2
DHB
928 VSTART_CHECK_EARLY_EXIT(env); \
929 \
f714361e 930 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 931 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 932 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
933 \
934 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
935 } \
c45eff30 936 env->vstart = 0; \
5c19fc15 937 /* set tail elements to 1s */ \
938 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
939}
940
3479a814
FC
941GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
942GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
943GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
944GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 945
3479a814
FC
946GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
947GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
948GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
949GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
950
951#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
952 (__typeof(N))(N + M) < N)
953#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
954
955#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
956void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
957 CPURISCVState *env, uint32_t desc) \
958{ \
3a6f8f68 959 uint32_t vl = env->vl; \
bb45485a 960 uint32_t vm = vext_vm(desc); \
58bc9063 961 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5c19fc15 962 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
963 uint32_t i; \
964 \
df4252b2
DHB
965 VSTART_CHECK_EARLY_EXIT(env); \
966 \
f714361e 967 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
968 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
969 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 970 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 971 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 972 } \
f714361e 973 env->vstart = 0; \
3b57254d
WL
974 /*
975 * mask destination register are always tail-agnostic
976 * set tail elements to 1s
977 */ \
5c19fc15 978 if (vta_all_1s) { \
979 for (; i < total_elems; i++) { \
980 vext_set_elem_mask(vd, i, 1); \
981 } \
982 } \
3a6f8f68
LZ
983}
984
985GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
986GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
987GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
988GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
989
990GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
991GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
992GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
993GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
994
995#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
996void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
997 void *vs2, CPURISCVState *env, uint32_t desc) \
998{ \
3a6f8f68 999 uint32_t vl = env->vl; \
bb45485a 1000 uint32_t vm = vext_vm(desc); \
58bc9063 1001 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5c19fc15 1002 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1003 uint32_t i; \
1004 \
df4252b2
DHB
1005 VSTART_CHECK_EARLY_EXIT(env); \
1006 \
f714361e 1007 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1008 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1009 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1010 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1011 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1012 } \
f714361e 1013 env->vstart = 0; \
3b57254d
WL
1014 /*
1015 * mask destination register are always tail-agnostic
1016 * set tail elements to 1s
1017 */ \
5c19fc15 1018 if (vta_all_1s) { \
1019 for (; i < total_elems; i++) { \
1020 vext_set_elem_mask(vd, i, 1); \
1021 } \
1022 } \
3a6f8f68
LZ
1023}
1024
1025GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1026GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1027GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1028GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1029
1030GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1031GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1032GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1033GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1034
1035/* Vector Bitwise Logical Instructions */
1036RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1037RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1038RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1039RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1040RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1041RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1042RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1043RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1044RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1045RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1046RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1047RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1048GEN_VEXT_VV(vand_vv_b, 1)
1049GEN_VEXT_VV(vand_vv_h, 2)
1050GEN_VEXT_VV(vand_vv_w, 4)
1051GEN_VEXT_VV(vand_vv_d, 8)
1052GEN_VEXT_VV(vor_vv_b, 1)
1053GEN_VEXT_VV(vor_vv_h, 2)
1054GEN_VEXT_VV(vor_vv_w, 4)
1055GEN_VEXT_VV(vor_vv_d, 8)
1056GEN_VEXT_VV(vxor_vv_b, 1)
1057GEN_VEXT_VV(vxor_vv_h, 2)
1058GEN_VEXT_VV(vxor_vv_w, 4)
1059GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1060
1061RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1062RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1063RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1064RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1065RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1066RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1067RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1068RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1069RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1070RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1071RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1072RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1073GEN_VEXT_VX(vand_vx_b, 1)
1074GEN_VEXT_VX(vand_vx_h, 2)
1075GEN_VEXT_VX(vand_vx_w, 4)
1076GEN_VEXT_VX(vand_vx_d, 8)
1077GEN_VEXT_VX(vor_vx_b, 1)
1078GEN_VEXT_VX(vor_vx_h, 2)
1079GEN_VEXT_VX(vor_vx_w, 4)
1080GEN_VEXT_VX(vor_vx_d, 8)
1081GEN_VEXT_VX(vxor_vx_b, 1)
1082GEN_VEXT_VX(vxor_vx_h, 2)
1083GEN_VEXT_VX(vxor_vx_w, 4)
1084GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1085
1086/* Vector Single-Width Bit Shift Instructions */
1087#define DO_SLL(N, M) (N << (M))
1088#define DO_SRL(N, M) (N >> (M))
1089
1090/* generate the helpers for shift instructions with two vector operators */
3479a814 1091#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1092void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1093 void *vs2, CPURISCVState *env, uint32_t desc) \
1094{ \
3277d955
LZ
1095 uint32_t vm = vext_vm(desc); \
1096 uint32_t vl = env->vl; \
7b1bff41 1097 uint32_t esz = sizeof(TS1); \
1098 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1099 uint32_t vta = vext_vta(desc); \
fd93045e 1100 uint32_t vma = vext_vma(desc); \
3277d955
LZ
1101 uint32_t i; \
1102 \
df4252b2
DHB
1103 VSTART_CHECK_EARLY_EXIT(env); \
1104 \
f714361e 1105 for (i = env->vstart; i < vl; i++) { \
f9298de5 1106 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1107 /* set masked-off elements to 1s */ \
1108 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
3277d955
LZ
1109 continue; \
1110 } \
1111 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1112 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1113 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1114 } \
f714361e 1115 env->vstart = 0; \
7b1bff41 1116 /* set tail elements to 1s */ \
1117 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1118}
1119
3479a814
FC
1120GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1121GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1122GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1123GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1124
3479a814
FC
1125GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1126GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1127GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1128GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1129
3479a814
FC
1130GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1131GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1132GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1133GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1134
246f8796
WL
1135/*
1136 * generate the helpers for shift instructions with one vector and one scalar
1137 */
3479a814
FC
1138#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1139void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
1140 void *vs2, CPURISCVState *env, \
1141 uint32_t desc) \
3479a814
FC
1142{ \
1143 uint32_t vm = vext_vm(desc); \
1144 uint32_t vl = env->vl; \
7b1bff41 1145 uint32_t esz = sizeof(TD); \
1146 uint32_t total_elems = \
1147 vext_get_total_elems(env, desc, esz); \
1148 uint32_t vta = vext_vta(desc); \
fd93045e 1149 uint32_t vma = vext_vma(desc); \
3479a814
FC
1150 uint32_t i; \
1151 \
df4252b2
DHB
1152 VSTART_CHECK_EARLY_EXIT(env); \
1153 \
f714361e 1154 for (i = env->vstart; i < vl; i++) { \
3479a814 1155 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1156 /* set masked-off elements to 1s */ \
1157 vext_set_elems_1s(vd, vma, i * esz, \
1158 (i + 1) * esz); \
3479a814
FC
1159 continue; \
1160 } \
1161 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1162 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1163 } \
f714361e 1164 env->vstart = 0; \
7b1bff41 1165 /* set tail elements to 1s */ \
1166 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1167}
1168
1169GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1170GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1171GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1172GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1173
1174GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1175GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1176GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1177GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1178
1179GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1180GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1181GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1182GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1183
1184/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1185GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1186GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1187GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1188GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1189GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1190GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1191GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1192GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1193GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1194GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1195GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1196GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1197
1198/* Vector Integer Comparison Instructions */
1199#define DO_MSEQ(N, M) (N == M)
1200#define DO_MSNE(N, M) (N != M)
1201#define DO_MSLT(N, M) (N < M)
1202#define DO_MSLE(N, M) (N <= M)
1203#define DO_MSGT(N, M) (N > M)
1204
1205#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1206void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1207 CPURISCVState *env, uint32_t desc) \
1208{ \
1366fc79
LZ
1209 uint32_t vm = vext_vm(desc); \
1210 uint32_t vl = env->vl; \
58bc9063 1211 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
38581e5c 1212 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1213 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1214 uint32_t i; \
1215 \
df4252b2
DHB
1216 VSTART_CHECK_EARLY_EXIT(env); \
1217 \
f714361e 1218 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1219 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1220 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1221 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1222 /* set masked-off elements to 1s */ \
1223 if (vma) { \
1224 vext_set_elem_mask(vd, i, 1); \
1225 } \
1366fc79
LZ
1226 continue; \
1227 } \
f9298de5 1228 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1229 } \
f714361e 1230 env->vstart = 0; \
3b57254d
WL
1231 /*
1232 * mask destination register are always tail-agnostic
1233 * set tail elements to 1s
1234 */ \
38581e5c 1235 if (vta_all_1s) { \
1236 for (; i < total_elems; i++) { \
1237 vext_set_elem_mask(vd, i, 1); \
1238 } \
1239 } \
1366fc79
LZ
1240}
1241
1242GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1243GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1244GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1245GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1246
1247GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1248GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1249GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1250GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1251
1252GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1253GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1254GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1255GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1256
1257GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1258GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1259GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1260GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1261
1262GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1263GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1264GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1265GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1266
1267GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1268GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1269GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1270GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1271
1272#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1273void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1274 CPURISCVState *env, uint32_t desc) \
1275{ \
1366fc79
LZ
1276 uint32_t vm = vext_vm(desc); \
1277 uint32_t vl = env->vl; \
58bc9063 1278 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
38581e5c 1279 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1280 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1281 uint32_t i; \
1282 \
df4252b2
DHB
1283 VSTART_CHECK_EARLY_EXIT(env); \
1284 \
f714361e 1285 for (i = env->vstart; i < vl; i++) { \
1366fc79 1286 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1287 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1288 /* set masked-off elements to 1s */ \
1289 if (vma) { \
1290 vext_set_elem_mask(vd, i, 1); \
1291 } \
1366fc79
LZ
1292 continue; \
1293 } \
f9298de5 1294 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1295 DO_OP(s2, (ETYPE)(target_long)s1)); \
1296 } \
f714361e 1297 env->vstart = 0; \
3b57254d
WL
1298 /*
1299 * mask destination register are always tail-agnostic
1300 * set tail elements to 1s
1301 */ \
38581e5c 1302 if (vta_all_1s) { \
1303 for (; i < total_elems; i++) { \
1304 vext_set_elem_mask(vd, i, 1); \
1305 } \
1306 } \
1366fc79
LZ
1307}
1308
1309GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1310GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1311GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1312GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1313
1314GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1315GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1316GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1317GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1318
1319GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1320GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1321GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1322GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1323
1324GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1325GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1326GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1327GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1328
1329GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1330GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1331GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1332GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1333
1334GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1335GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1336GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1337GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1338
1339GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1340GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1341GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1342GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1343
1344GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1345GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1346GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1347GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1348
1349/* Vector Integer Min/Max Instructions */
1350RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1351RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1352RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1353RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1354RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1355RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1356RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1357RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1358RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1359RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1360RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1361RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1362RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1363RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1364RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1365RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1366GEN_VEXT_VV(vminu_vv_b, 1)
1367GEN_VEXT_VV(vminu_vv_h, 2)
1368GEN_VEXT_VV(vminu_vv_w, 4)
1369GEN_VEXT_VV(vminu_vv_d, 8)
1370GEN_VEXT_VV(vmin_vv_b, 1)
1371GEN_VEXT_VV(vmin_vv_h, 2)
1372GEN_VEXT_VV(vmin_vv_w, 4)
1373GEN_VEXT_VV(vmin_vv_d, 8)
1374GEN_VEXT_VV(vmaxu_vv_b, 1)
1375GEN_VEXT_VV(vmaxu_vv_h, 2)
1376GEN_VEXT_VV(vmaxu_vv_w, 4)
1377GEN_VEXT_VV(vmaxu_vv_d, 8)
1378GEN_VEXT_VV(vmax_vv_b, 1)
1379GEN_VEXT_VV(vmax_vv_h, 2)
1380GEN_VEXT_VV(vmax_vv_w, 4)
1381GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1382
1383RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1384RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1385RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1386RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1387RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1388RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1389RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1390RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1391RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1392RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1393RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1394RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1395RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1396RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1397RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1398RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1399GEN_VEXT_VX(vminu_vx_b, 1)
1400GEN_VEXT_VX(vminu_vx_h, 2)
1401GEN_VEXT_VX(vminu_vx_w, 4)
1402GEN_VEXT_VX(vminu_vx_d, 8)
1403GEN_VEXT_VX(vmin_vx_b, 1)
1404GEN_VEXT_VX(vmin_vx_h, 2)
1405GEN_VEXT_VX(vmin_vx_w, 4)
1406GEN_VEXT_VX(vmin_vx_d, 8)
1407GEN_VEXT_VX(vmaxu_vx_b, 1)
1408GEN_VEXT_VX(vmaxu_vx_h, 2)
1409GEN_VEXT_VX(vmaxu_vx_w, 4)
1410GEN_VEXT_VX(vmaxu_vx_d, 8)
1411GEN_VEXT_VX(vmax_vx_b, 1)
1412GEN_VEXT_VX(vmax_vx_h, 2)
1413GEN_VEXT_VX(vmax_vx_w, 4)
1414GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1415
1416/* Vector Single-Width Integer Multiply Instructions */
1417#define DO_MUL(N, M) (N * M)
1418RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1419RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1420RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1421RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1422GEN_VEXT_VV(vmul_vv_b, 1)
1423GEN_VEXT_VV(vmul_vv_h, 2)
1424GEN_VEXT_VV(vmul_vv_w, 4)
1425GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1426
1427static int8_t do_mulh_b(int8_t s2, int8_t s1)
1428{
1429 return (int16_t)s2 * (int16_t)s1 >> 8;
1430}
1431
1432static int16_t do_mulh_h(int16_t s2, int16_t s1)
1433{
1434 return (int32_t)s2 * (int32_t)s1 >> 16;
1435}
1436
1437static int32_t do_mulh_w(int32_t s2, int32_t s1)
1438{
1439 return (int64_t)s2 * (int64_t)s1 >> 32;
1440}
1441
1442static int64_t do_mulh_d(int64_t s2, int64_t s1)
1443{
1444 uint64_t hi_64, lo_64;
1445
1446 muls64(&lo_64, &hi_64, s1, s2);
1447 return hi_64;
1448}
1449
1450static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1451{
1452 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1453}
1454
1455static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1456{
1457 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1458}
1459
1460static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1461{
1462 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1463}
1464
1465static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1466{
1467 uint64_t hi_64, lo_64;
1468
1469 mulu64(&lo_64, &hi_64, s2, s1);
1470 return hi_64;
1471}
1472
1473static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1474{
1475 return (int16_t)s2 * (uint16_t)s1 >> 8;
1476}
1477
1478static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1479{
1480 return (int32_t)s2 * (uint32_t)s1 >> 16;
1481}
1482
1483static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1484{
1485 return (int64_t)s2 * (uint64_t)s1 >> 32;
1486}
1487
1488/*
1489 * Let A = signed operand,
1490 * B = unsigned operand
1491 * P = mulu64(A, B), unsigned product
1492 *
1493 * LET X = 2 ** 64 - A, 2's complement of A
1494 * SP = signed product
1495 * THEN
1496 * IF A < 0
1497 * SP = -X * B
1498 * = -(2 ** 64 - A) * B
1499 * = A * B - 2 ** 64 * B
1500 * = P - 2 ** 64 * B
1501 * ELSE
1502 * SP = P
1503 * THEN
1504 * HI_P -= (A < 0 ? B : 0)
1505 */
1506
1507static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1508{
1509 uint64_t hi_64, lo_64;
1510
1511 mulu64(&lo_64, &hi_64, s2, s1);
1512
1513 hi_64 -= s2 < 0 ? s1 : 0;
1514 return hi_64;
1515}
1516
1517RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1518RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1519RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1520RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1521RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1522RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1523RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1524RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1525RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1526RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1527RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1528RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1529GEN_VEXT_VV(vmulh_vv_b, 1)
1530GEN_VEXT_VV(vmulh_vv_h, 2)
1531GEN_VEXT_VV(vmulh_vv_w, 4)
1532GEN_VEXT_VV(vmulh_vv_d, 8)
1533GEN_VEXT_VV(vmulhu_vv_b, 1)
1534GEN_VEXT_VV(vmulhu_vv_h, 2)
1535GEN_VEXT_VV(vmulhu_vv_w, 4)
1536GEN_VEXT_VV(vmulhu_vv_d, 8)
1537GEN_VEXT_VV(vmulhsu_vv_b, 1)
1538GEN_VEXT_VV(vmulhsu_vv_h, 2)
1539GEN_VEXT_VV(vmulhsu_vv_w, 4)
1540GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1541
1542RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1543RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1544RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1545RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1546RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1547RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1548RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1549RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1550RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1551RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1552RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1553RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1554RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1555RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1556RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1557RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1558GEN_VEXT_VX(vmul_vx_b, 1)
1559GEN_VEXT_VX(vmul_vx_h, 2)
1560GEN_VEXT_VX(vmul_vx_w, 4)
1561GEN_VEXT_VX(vmul_vx_d, 8)
1562GEN_VEXT_VX(vmulh_vx_b, 1)
1563GEN_VEXT_VX(vmulh_vx_h, 2)
1564GEN_VEXT_VX(vmulh_vx_w, 4)
1565GEN_VEXT_VX(vmulh_vx_d, 8)
1566GEN_VEXT_VX(vmulhu_vx_b, 1)
1567GEN_VEXT_VX(vmulhu_vx_h, 2)
1568GEN_VEXT_VX(vmulhu_vx_w, 4)
1569GEN_VEXT_VX(vmulhu_vx_d, 8)
1570GEN_VEXT_VX(vmulhsu_vx_b, 1)
1571GEN_VEXT_VX(vmulhsu_vx_h, 2)
1572GEN_VEXT_VX(vmulhsu_vx_w, 4)
1573GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1574
1575/* Vector Integer Divide Instructions */
1576#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1577#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
c45eff30 1578#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
85e6658c 1579 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
c45eff30 1580#define DO_REM(N, M) (unlikely(M == 0) ? N : \
85e6658c
LZ
1581 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1582
1583RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1584RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1585RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1586RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1587RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1588RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1589RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1590RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1591RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1592RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1593RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1594RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1595RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1596RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1597RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1598RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1599GEN_VEXT_VV(vdivu_vv_b, 1)
1600GEN_VEXT_VV(vdivu_vv_h, 2)
1601GEN_VEXT_VV(vdivu_vv_w, 4)
1602GEN_VEXT_VV(vdivu_vv_d, 8)
1603GEN_VEXT_VV(vdiv_vv_b, 1)
1604GEN_VEXT_VV(vdiv_vv_h, 2)
1605GEN_VEXT_VV(vdiv_vv_w, 4)
1606GEN_VEXT_VV(vdiv_vv_d, 8)
1607GEN_VEXT_VV(vremu_vv_b, 1)
1608GEN_VEXT_VV(vremu_vv_h, 2)
1609GEN_VEXT_VV(vremu_vv_w, 4)
1610GEN_VEXT_VV(vremu_vv_d, 8)
1611GEN_VEXT_VV(vrem_vv_b, 1)
1612GEN_VEXT_VV(vrem_vv_h, 2)
1613GEN_VEXT_VV(vrem_vv_w, 4)
1614GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1615
1616RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1617RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1618RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1619RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1620RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1621RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1622RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1623RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1624RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1625RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1626RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1627RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1628RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1629RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1630RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1631RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1632GEN_VEXT_VX(vdivu_vx_b, 1)
1633GEN_VEXT_VX(vdivu_vx_h, 2)
1634GEN_VEXT_VX(vdivu_vx_w, 4)
1635GEN_VEXT_VX(vdivu_vx_d, 8)
1636GEN_VEXT_VX(vdiv_vx_b, 1)
1637GEN_VEXT_VX(vdiv_vx_h, 2)
1638GEN_VEXT_VX(vdiv_vx_w, 4)
1639GEN_VEXT_VX(vdiv_vx_d, 8)
1640GEN_VEXT_VX(vremu_vx_b, 1)
1641GEN_VEXT_VX(vremu_vx_h, 2)
1642GEN_VEXT_VX(vremu_vx_w, 4)
1643GEN_VEXT_VX(vremu_vx_d, 8)
1644GEN_VEXT_VX(vrem_vx_b, 1)
1645GEN_VEXT_VX(vrem_vx_h, 2)
1646GEN_VEXT_VX(vrem_vx_w, 4)
1647GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1648
1649/* Vector Widening Integer Multiply Instructions */
1650RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1651RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1652RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1653RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1654RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1655RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1656RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1657RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1658RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1659GEN_VEXT_VV(vwmul_vv_b, 2)
1660GEN_VEXT_VV(vwmul_vv_h, 4)
1661GEN_VEXT_VV(vwmul_vv_w, 8)
1662GEN_VEXT_VV(vwmulu_vv_b, 2)
1663GEN_VEXT_VV(vwmulu_vv_h, 4)
1664GEN_VEXT_VV(vwmulu_vv_w, 8)
1665GEN_VEXT_VV(vwmulsu_vv_b, 2)
1666GEN_VEXT_VV(vwmulsu_vv_h, 4)
1667GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1668
1669RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1670RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1671RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1672RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1673RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1674RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1675RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1676RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1677RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1678GEN_VEXT_VX(vwmul_vx_b, 2)
1679GEN_VEXT_VX(vwmul_vx_h, 4)
1680GEN_VEXT_VX(vwmul_vx_w, 8)
1681GEN_VEXT_VX(vwmulu_vx_b, 2)
1682GEN_VEXT_VX(vwmulu_vx_h, 4)
1683GEN_VEXT_VX(vwmulu_vx_w, 8)
1684GEN_VEXT_VX(vwmulsu_vx_b, 2)
1685GEN_VEXT_VX(vwmulsu_vx_h, 4)
1686GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1687
1688/* Vector Single-Width Integer Multiply-Add Instructions */
c45eff30 1689#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
54df813a
LZ
1690static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1691{ \
1692 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1693 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1694 TD d = *((TD *)vd + HD(i)); \
1695 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1696}
1697
1698#define DO_MACC(N, M, D) (M * N + D)
1699#define DO_NMSAC(N, M, D) (-(M * N) + D)
1700#define DO_MADD(N, M, D) (M * D + N)
1701#define DO_NMSUB(N, M, D) (-(M * D) + N)
1702RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1703RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1704RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1705RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1706RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1707RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1708RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1709RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1710RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1711RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1712RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1713RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1714RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1715RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1716RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1717RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1718GEN_VEXT_VV(vmacc_vv_b, 1)
1719GEN_VEXT_VV(vmacc_vv_h, 2)
1720GEN_VEXT_VV(vmacc_vv_w, 4)
1721GEN_VEXT_VV(vmacc_vv_d, 8)
1722GEN_VEXT_VV(vnmsac_vv_b, 1)
1723GEN_VEXT_VV(vnmsac_vv_h, 2)
1724GEN_VEXT_VV(vnmsac_vv_w, 4)
1725GEN_VEXT_VV(vnmsac_vv_d, 8)
1726GEN_VEXT_VV(vmadd_vv_b, 1)
1727GEN_VEXT_VV(vmadd_vv_h, 2)
1728GEN_VEXT_VV(vmadd_vv_w, 4)
1729GEN_VEXT_VV(vmadd_vv_d, 8)
1730GEN_VEXT_VV(vnmsub_vv_b, 1)
1731GEN_VEXT_VV(vnmsub_vv_h, 2)
1732GEN_VEXT_VV(vnmsub_vv_w, 4)
1733GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1734
1735#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1736static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1737{ \
1738 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1739 TD d = *((TD *)vd + HD(i)); \
1740 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1741}
1742
1743RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1744RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1745RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1746RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1747RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1748RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1749RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1750RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1751RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1752RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1753RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1754RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1755RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1756RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1757RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1758RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1759GEN_VEXT_VX(vmacc_vx_b, 1)
1760GEN_VEXT_VX(vmacc_vx_h, 2)
1761GEN_VEXT_VX(vmacc_vx_w, 4)
1762GEN_VEXT_VX(vmacc_vx_d, 8)
1763GEN_VEXT_VX(vnmsac_vx_b, 1)
1764GEN_VEXT_VX(vnmsac_vx_h, 2)
1765GEN_VEXT_VX(vnmsac_vx_w, 4)
1766GEN_VEXT_VX(vnmsac_vx_d, 8)
1767GEN_VEXT_VX(vmadd_vx_b, 1)
1768GEN_VEXT_VX(vmadd_vx_h, 2)
1769GEN_VEXT_VX(vmadd_vx_w, 4)
1770GEN_VEXT_VX(vmadd_vx_d, 8)
1771GEN_VEXT_VX(vnmsub_vx_b, 1)
1772GEN_VEXT_VX(vnmsub_vx_h, 2)
1773GEN_VEXT_VX(vnmsub_vx_w, 4)
1774GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1775
1776/* Vector Widening Integer Multiply-Add Instructions */
1777RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1778RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1779RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1780RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1781RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1782RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1783RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1784RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1785RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1786GEN_VEXT_VV(vwmaccu_vv_b, 2)
1787GEN_VEXT_VV(vwmaccu_vv_h, 4)
1788GEN_VEXT_VV(vwmaccu_vv_w, 8)
1789GEN_VEXT_VV(vwmacc_vv_b, 2)
1790GEN_VEXT_VV(vwmacc_vv_h, 4)
1791GEN_VEXT_VV(vwmacc_vv_w, 8)
1792GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1793GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1794GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1795
1796RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1797RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1798RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1799RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1800RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1801RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1802RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1803RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1804RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1805RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1806RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1807RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1808GEN_VEXT_VX(vwmaccu_vx_b, 2)
1809GEN_VEXT_VX(vwmaccu_vx_h, 4)
1810GEN_VEXT_VX(vwmaccu_vx_w, 8)
1811GEN_VEXT_VX(vwmacc_vx_b, 2)
1812GEN_VEXT_VX(vwmacc_vx_h, 4)
1813GEN_VEXT_VX(vwmacc_vx_w, 8)
1814GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1815GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1816GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1817GEN_VEXT_VX(vwmaccus_vx_b, 2)
1818GEN_VEXT_VX(vwmaccus_vx_h, 4)
1819GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1820
1821/* Vector Integer Merge and Move Instructions */
3479a814 1822#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1823void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1824 uint32_t desc) \
1825{ \
1826 uint32_t vl = env->vl; \
89a32de2 1827 uint32_t esz = sizeof(ETYPE); \
1828 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1829 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1830 uint32_t i; \
1831 \
df4252b2
DHB
1832 VSTART_CHECK_EARLY_EXIT(env); \
1833 \
f714361e 1834 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1835 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1836 *((ETYPE *)vd + H(i)) = s1; \
1837 } \
f714361e 1838 env->vstart = 0; \
89a32de2 1839 /* set tail elements to 1s */ \
1840 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1841}
1842
3479a814
FC
1843GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1844GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1845GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1846GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1847
3479a814 1848#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1849void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1850 uint32_t desc) \
1851{ \
1852 uint32_t vl = env->vl; \
89a32de2 1853 uint32_t esz = sizeof(ETYPE); \
1854 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1855 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1856 uint32_t i; \
1857 \
df4252b2
DHB
1858 VSTART_CHECK_EARLY_EXIT(env); \
1859 \
f714361e 1860 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1861 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1862 } \
f714361e 1863 env->vstart = 0; \
89a32de2 1864 /* set tail elements to 1s */ \
1865 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1866}
1867
3479a814
FC
1868GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1869GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1870GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1871GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 1872
3479a814 1873#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1874void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1875 CPURISCVState *env, uint32_t desc) \
1876{ \
f020a7a1 1877 uint32_t vl = env->vl; \
89a32de2 1878 uint32_t esz = sizeof(ETYPE); \
1879 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1880 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1881 uint32_t i; \
1882 \
df4252b2
DHB
1883 VSTART_CHECK_EARLY_EXIT(env); \
1884 \
f714361e 1885 for (i = env->vstart; i < vl; i++) { \
f9298de5 1886 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
1887 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1888 } \
f714361e 1889 env->vstart = 0; \
89a32de2 1890 /* set tail elements to 1s */ \
1891 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1892}
1893
3479a814
FC
1894GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1895GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1896GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1897GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 1898
3479a814 1899#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1900void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1901 void *vs2, CPURISCVState *env, uint32_t desc) \
1902{ \
f020a7a1 1903 uint32_t vl = env->vl; \
89a32de2 1904 uint32_t esz = sizeof(ETYPE); \
1905 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1906 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1907 uint32_t i; \
1908 \
df4252b2
DHB
1909 VSTART_CHECK_EARLY_EXIT(env); \
1910 \
f714361e 1911 for (i = env->vstart; i < vl; i++) { \
f020a7a1 1912 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1913 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
1914 (ETYPE)(target_long)s1); \
1915 *((ETYPE *)vd + H(i)) = d; \
1916 } \
f714361e 1917 env->vstart = 0; \
89a32de2 1918 /* set tail elements to 1s */ \
1919 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1920}
1921
3479a814
FC
1922GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1923GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1924GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1925GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
1926
1927/*
3b57254d 1928 * Vector Fixed-Point Arithmetic Instructions
eb2650e3
LZ
1929 */
1930
1931/* Vector Single-Width Saturating Add and Subtract */
1932
1933/*
1934 * As fixed point instructions probably have round mode and saturation,
1935 * define common macros for fixed point here.
1936 */
1937typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1938 CPURISCVState *env, int vxrm);
1939
1940#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1941static inline void \
1942do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1943 CPURISCVState *env, int vxrm) \
1944{ \
1945 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1946 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1947 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1948}
1949
1950static inline void
1951vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1952 CPURISCVState *env,
f9298de5 1953 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 1954 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 1955{
df4252b2
DHB
1956 VSTART_CHECK_EARLY_EXIT(env);
1957
f714361e 1958 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 1959 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
1960 /* set masked-off elements to 1s */
1961 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
1962 continue;
1963 }
1964 fn(vd, vs1, vs2, i, env, vxrm);
1965 }
f714361e 1966 env->vstart = 0;
eb2650e3
LZ
1967}
1968
1969static inline void
1970vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1971 CPURISCVState *env,
8a085fb2 1972 uint32_t desc,
09106eed 1973 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 1974{
eb2650e3
LZ
1975 uint32_t vm = vext_vm(desc);
1976 uint32_t vl = env->vl;
09106eed 1977 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
1978 uint32_t vta = vext_vta(desc);
72e17a9f 1979 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
1980
1981 switch (env->vxrm) {
1982 case 0: /* rnu */
1983 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1984 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
1985 break;
1986 case 1: /* rne */
1987 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1988 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
1989 break;
1990 case 2: /* rdn */
1991 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1992 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
1993 break;
1994 default: /* rod */
1995 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1996 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
1997 break;
1998 }
09106eed 1999 /* set tail elements to 1s */
2000 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2001}
2002
2003/* generate helpers for fixed point instructions with OPIVV format */
09106eed 2004#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
2005void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2006 CPURISCVState *env, uint32_t desc) \
2007{ \
8a085fb2 2008 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 2009 do_##NAME, ESZ); \
eb2650e3
LZ
2010}
2011
246f8796
WL
2012static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2013 uint8_t b)
eb2650e3
LZ
2014{
2015 uint8_t res = a + b;
2016 if (res < a) {
2017 res = UINT8_MAX;
2018 env->vxsat = 0x1;
2019 }
2020 return res;
2021}
2022
2023static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2024 uint16_t b)
2025{
2026 uint16_t res = a + b;
2027 if (res < a) {
2028 res = UINT16_MAX;
2029 env->vxsat = 0x1;
2030 }
2031 return res;
2032}
2033
2034static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2035 uint32_t b)
2036{
2037 uint32_t res = a + b;
2038 if (res < a) {
2039 res = UINT32_MAX;
2040 env->vxsat = 0x1;
2041 }
2042 return res;
2043}
2044
2045static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2046 uint64_t b)
2047{
2048 uint64_t res = a + b;
2049 if (res < a) {
2050 res = UINT64_MAX;
2051 env->vxsat = 0x1;
2052 }
2053 return res;
2054}
2055
2056RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2057RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2058RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2059RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2060GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2061GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2062GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2063GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2064
2065typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2066 CPURISCVState *env, int vxrm);
2067
2068#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2069static inline void \
2070do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2071 CPURISCVState *env, int vxrm) \
2072{ \
2073 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2074 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2075}
2076
2077static inline void
2078vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2079 CPURISCVState *env,
f9298de5 2080 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2081 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2082{
df4252b2
DHB
2083 VSTART_CHECK_EARLY_EXIT(env);
2084
f714361e 2085 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2086 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2087 /* set masked-off elements to 1s */
2088 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2089 continue;
2090 }
2091 fn(vd, s1, vs2, i, env, vxrm);
2092 }
f714361e 2093 env->vstart = 0;
eb2650e3
LZ
2094}
2095
2096static inline void
2097vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2098 CPURISCVState *env,
8a085fb2 2099 uint32_t desc,
09106eed 2100 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2101{
eb2650e3
LZ
2102 uint32_t vm = vext_vm(desc);
2103 uint32_t vl = env->vl;
09106eed 2104 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2105 uint32_t vta = vext_vta(desc);
72e17a9f 2106 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2107
2108 switch (env->vxrm) {
2109 case 0: /* rnu */
2110 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2111 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2112 break;
2113 case 1: /* rne */
2114 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2115 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2116 break;
2117 case 2: /* rdn */
2118 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2119 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2120 break;
2121 default: /* rod */
2122 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2123 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2124 break;
2125 }
09106eed 2126 /* set tail elements to 1s */
2127 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2128}
2129
2130/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2131#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3 2132void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
2133 void *vs2, CPURISCVState *env, \
2134 uint32_t desc) \
eb2650e3 2135{ \
8a085fb2 2136 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2137 do_##NAME, ESZ); \
eb2650e3
LZ
2138}
2139
2140RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2141RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2142RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2143RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2144GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2145GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2146GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2147GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2148
2149static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2150{
2151 int8_t res = a + b;
2152 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2153 res = a > 0 ? INT8_MAX : INT8_MIN;
2154 env->vxsat = 0x1;
2155 }
2156 return res;
2157}
2158
246f8796
WL
2159static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2160 int16_t b)
eb2650e3
LZ
2161{
2162 int16_t res = a + b;
2163 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2164 res = a > 0 ? INT16_MAX : INT16_MIN;
2165 env->vxsat = 0x1;
2166 }
2167 return res;
2168}
2169
246f8796
WL
2170static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2171 int32_t b)
eb2650e3
LZ
2172{
2173 int32_t res = a + b;
2174 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2175 res = a > 0 ? INT32_MAX : INT32_MIN;
2176 env->vxsat = 0x1;
2177 }
2178 return res;
2179}
2180
246f8796
WL
2181static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2182 int64_t b)
eb2650e3
LZ
2183{
2184 int64_t res = a + b;
2185 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2186 res = a > 0 ? INT64_MAX : INT64_MIN;
2187 env->vxsat = 0x1;
2188 }
2189 return res;
2190}
2191
2192RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2193RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2194RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2195RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2196GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2197GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2198GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2199GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2200
2201RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2202RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2203RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2204RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2205GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2206GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2207GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2208GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3 2209
246f8796
WL
2210static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2211 uint8_t b)
eb2650e3
LZ
2212{
2213 uint8_t res = a - b;
2214 if (res > a) {
2215 res = 0;
2216 env->vxsat = 0x1;
2217 }
2218 return res;
2219}
2220
2221static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2222 uint16_t b)
2223{
2224 uint16_t res = a - b;
2225 if (res > a) {
2226 res = 0;
2227 env->vxsat = 0x1;
2228 }
2229 return res;
2230}
2231
2232static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2233 uint32_t b)
2234{
2235 uint32_t res = a - b;
2236 if (res > a) {
2237 res = 0;
2238 env->vxsat = 0x1;
2239 }
2240 return res;
2241}
2242
2243static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2244 uint64_t b)
2245{
2246 uint64_t res = a - b;
2247 if (res > a) {
2248 res = 0;
2249 env->vxsat = 0x1;
2250 }
2251 return res;
2252}
2253
2254RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2255RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2256RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2257RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2258GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2259GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2260GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2261GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2262
2263RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2264RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2265RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2266RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2267GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2268GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2269GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2270GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2271
2272static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2273{
2274 int8_t res = a - b;
2275 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2276 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2277 env->vxsat = 0x1;
2278 }
2279 return res;
2280}
2281
246f8796
WL
2282static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2283 int16_t b)
eb2650e3
LZ
2284{
2285 int16_t res = a - b;
2286 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2287 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2288 env->vxsat = 0x1;
2289 }
2290 return res;
2291}
2292
246f8796
WL
2293static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2294 int32_t b)
eb2650e3
LZ
2295{
2296 int32_t res = a - b;
2297 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2298 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2299 env->vxsat = 0x1;
2300 }
2301 return res;
2302}
2303
246f8796
WL
2304static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2305 int64_t b)
eb2650e3
LZ
2306{
2307 int64_t res = a - b;
2308 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2309 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2310 env->vxsat = 0x1;
2311 }
2312 return res;
2313}
2314
2315RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2316RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2317RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2318RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2319GEN_VEXT_VV_RM(vssub_vv_b, 1)
2320GEN_VEXT_VV_RM(vssub_vv_h, 2)
2321GEN_VEXT_VV_RM(vssub_vv_w, 4)
2322GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2323
2324RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2325RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2326RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2327RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2328GEN_VEXT_VX_RM(vssub_vx_b, 1)
2329GEN_VEXT_VX_RM(vssub_vx_h, 2)
2330GEN_VEXT_VX_RM(vssub_vx_w, 4)
2331GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2332
2333/* Vector Single-Width Averaging Add and Subtract */
2334static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2335{
2336 uint8_t d = extract64(v, shift, 1);
2337 uint8_t d1;
2338 uint64_t D1, D2;
2339
2340 if (shift == 0 || shift > 64) {
2341 return 0;
2342 }
2343
2344 d1 = extract64(v, shift - 1, 1);
2345 D1 = extract64(v, 0, shift);
2346 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2347 return d1;
2348 } else if (vxrm == 1) { /* round-to-nearest-even */
2349 if (shift > 1) {
2350 D2 = extract64(v, 0, shift - 1);
2351 return d1 & ((D2 != 0) | d);
2352 } else {
2353 return d1 & d;
2354 }
2355 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2356 return !d & (D1 != 0);
2357 }
2358 return 0; /* round-down (truncate) */
2359}
2360
246f8796
WL
2361static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2362 int32_t b)
b7aee481
LZ
2363{
2364 int64_t res = (int64_t)a + b;
2365 uint8_t round = get_round(vxrm, res, 1);
2366
2367 return (res >> 1) + round;
2368}
2369
246f8796
WL
2370static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2371 int64_t b)
b7aee481
LZ
2372{
2373 int64_t res = a + b;
2374 uint8_t round = get_round(vxrm, res, 1);
2375 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2376
2377 /* With signed overflow, bit 64 is inverse of bit 63. */
2378 return ((res >> 1) ^ over) + round;
2379}
2380
2381RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2382RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2383RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2384RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2385GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2386GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2387GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2388GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2389
2390RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2391RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2392RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2393RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2394GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2395GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2396GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2397GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2398
8b99a110
FC
2399static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2400 uint32_t a, uint32_t b)
2401{
2402 uint64_t res = (uint64_t)a + b;
2403 uint8_t round = get_round(vxrm, res, 1);
2404
2405 return (res >> 1) + round;
2406}
2407
2408static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2409 uint64_t a, uint64_t b)
2410{
2411 uint64_t res = a + b;
2412 uint8_t round = get_round(vxrm, res, 1);
2413 uint64_t over = (uint64_t)(res < a) << 63;
2414
2415 return ((res >> 1) | over) + round;
2416}
2417
2418RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2419RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2420RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2421RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2422GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2423GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2424GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2425GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2426
2427RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2428RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2429RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2430RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2431GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2432GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2433GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2434GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2435
246f8796
WL
2436static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2437 int32_t b)
b7aee481
LZ
2438{
2439 int64_t res = (int64_t)a - b;
2440 uint8_t round = get_round(vxrm, res, 1);
2441
2442 return (res >> 1) + round;
2443}
2444
246f8796
WL
2445static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2446 int64_t b)
b7aee481
LZ
2447{
2448 int64_t res = (int64_t)a - b;
2449 uint8_t round = get_round(vxrm, res, 1);
2450 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2451
2452 /* With signed overflow, bit 64 is inverse of bit 63. */
2453 return ((res >> 1) ^ over) + round;
2454}
2455
2456RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2457RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2458RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2459RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2460GEN_VEXT_VV_RM(vasub_vv_b, 1)
2461GEN_VEXT_VV_RM(vasub_vv_h, 2)
2462GEN_VEXT_VV_RM(vasub_vv_w, 4)
2463GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2464
2465RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2466RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2467RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2468RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2469GEN_VEXT_VX_RM(vasub_vx_b, 1)
2470GEN_VEXT_VX_RM(vasub_vx_h, 2)
2471GEN_VEXT_VX_RM(vasub_vx_w, 4)
2472GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2473
8b99a110
FC
2474static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2475 uint32_t a, uint32_t b)
2476{
2477 int64_t res = (int64_t)a - b;
2478 uint8_t round = get_round(vxrm, res, 1);
2479
2480 return (res >> 1) + round;
2481}
2482
2483static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2484 uint64_t a, uint64_t b)
2485{
2486 uint64_t res = (uint64_t)a - b;
2487 uint8_t round = get_round(vxrm, res, 1);
2488 uint64_t over = (uint64_t)(res > a) << 63;
2489
2490 return ((res >> 1) | over) + round;
2491}
2492
2493RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2494RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2495RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2496RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2497GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2498GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2499GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2500GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2501
2502RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2503RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2504RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2505RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2506GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2507GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2508GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2509GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2510
9f0ff9e5
LZ
2511/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2512static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2513{
2514 uint8_t round;
2515 int16_t res;
2516
2517 res = (int16_t)a * (int16_t)b;
2518 round = get_round(vxrm, res, 7);
c45eff30 2519 res = (res >> 7) + round;
9f0ff9e5
LZ
2520
2521 if (res > INT8_MAX) {
2522 env->vxsat = 0x1;
2523 return INT8_MAX;
2524 } else if (res < INT8_MIN) {
2525 env->vxsat = 0x1;
2526 return INT8_MIN;
2527 } else {
2528 return res;
2529 }
2530}
2531
2532static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2533{
2534 uint8_t round;
2535 int32_t res;
2536
2537 res = (int32_t)a * (int32_t)b;
2538 round = get_round(vxrm, res, 15);
c45eff30 2539 res = (res >> 15) + round;
9f0ff9e5
LZ
2540
2541 if (res > INT16_MAX) {
2542 env->vxsat = 0x1;
2543 return INT16_MAX;
2544 } else if (res < INT16_MIN) {
2545 env->vxsat = 0x1;
2546 return INT16_MIN;
2547 } else {
2548 return res;
2549 }
2550}
2551
2552static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2553{
2554 uint8_t round;
2555 int64_t res;
2556
2557 res = (int64_t)a * (int64_t)b;
2558 round = get_round(vxrm, res, 31);
c45eff30 2559 res = (res >> 31) + round;
9f0ff9e5
LZ
2560
2561 if (res > INT32_MAX) {
2562 env->vxsat = 0x1;
2563 return INT32_MAX;
2564 } else if (res < INT32_MIN) {
2565 env->vxsat = 0x1;
2566 return INT32_MIN;
2567 } else {
2568 return res;
2569 }
2570}
2571
2572static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2573{
2574 uint8_t round;
2575 uint64_t hi_64, lo_64;
2576 int64_t res;
2577
2578 if (a == INT64_MIN && b == INT64_MIN) {
2579 env->vxsat = 1;
2580 return INT64_MAX;
2581 }
2582
2583 muls64(&lo_64, &hi_64, a, b);
2584 round = get_round(vxrm, lo_64, 63);
2585 /*
2586 * Cannot overflow, as there are always
2587 * 2 sign bits after multiply.
2588 */
2589 res = (hi_64 << 1) | (lo_64 >> 63);
2590 if (round) {
2591 if (res == INT64_MAX) {
2592 env->vxsat = 1;
2593 } else {
2594 res += 1;
2595 }
2596 }
2597 return res;
2598}
2599
2600RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2601RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2602RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2603RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2604GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2605GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2606GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2607GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2608
2609RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2610RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2611RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2612RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2613GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2614GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2615GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2616GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2617
04a61406
LZ
2618/* Vector Single-Width Scaling Shift Instructions */
2619static inline uint8_t
2620vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2621{
2622 uint8_t round, shift = b & 0x7;
2623 uint8_t res;
2624
2625 round = get_round(vxrm, a, shift);
c45eff30 2626 res = (a >> shift) + round;
04a61406
LZ
2627 return res;
2628}
2629static inline uint16_t
2630vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2631{
2632 uint8_t round, shift = b & 0xf;
04a61406
LZ
2633
2634 round = get_round(vxrm, a, shift);
66997c42 2635 return (a >> shift) + round;
04a61406
LZ
2636}
2637static inline uint32_t
2638vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2639{
2640 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2641
2642 round = get_round(vxrm, a, shift);
66997c42 2643 return (a >> shift) + round;
04a61406
LZ
2644}
2645static inline uint64_t
2646vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2647{
2648 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2649
2650 round = get_round(vxrm, a, shift);
66997c42 2651 return (a >> shift) + round;
04a61406
LZ
2652}
2653RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2654RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2655RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2656RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2657GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2658GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2659GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2660GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2661
2662RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2663RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2664RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2665RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2666GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2667GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2668GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2669GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2670
2671static inline int8_t
2672vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2673{
2674 uint8_t round, shift = b & 0x7;
04a61406
LZ
2675
2676 round = get_round(vxrm, a, shift);
66997c42 2677 return (a >> shift) + round;
04a61406
LZ
2678}
2679static inline int16_t
2680vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2681{
2682 uint8_t round, shift = b & 0xf;
04a61406
LZ
2683
2684 round = get_round(vxrm, a, shift);
66997c42 2685 return (a >> shift) + round;
04a61406
LZ
2686}
2687static inline int32_t
2688vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2689{
2690 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2691
2692 round = get_round(vxrm, a, shift);
66997c42 2693 return (a >> shift) + round;
04a61406
LZ
2694}
2695static inline int64_t
2696vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2697{
2698 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2699
2700 round = get_round(vxrm, a, shift);
66997c42 2701 return (a >> shift) + round;
04a61406 2702}
9ff3d287 2703
04a61406
LZ
2704RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2705RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2706RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2707RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2708GEN_VEXT_VV_RM(vssra_vv_b, 1)
2709GEN_VEXT_VV_RM(vssra_vv_h, 2)
2710GEN_VEXT_VV_RM(vssra_vv_w, 4)
2711GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2712
2713RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2714RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2715RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2716RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2717GEN_VEXT_VX_RM(vssra_vx_b, 1)
2718GEN_VEXT_VX_RM(vssra_vx_h, 2)
2719GEN_VEXT_VX_RM(vssra_vx_w, 4)
2720GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2721
2722/* Vector Narrowing Fixed-Point Clip Instructions */
2723static inline int8_t
2724vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2725{
2726 uint8_t round, shift = b & 0xf;
2727 int16_t res;
2728
2729 round = get_round(vxrm, a, shift);
c45eff30 2730 res = (a >> shift) + round;
9ff3d287
LZ
2731 if (res > INT8_MAX) {
2732 env->vxsat = 0x1;
2733 return INT8_MAX;
2734 } else if (res < INT8_MIN) {
2735 env->vxsat = 0x1;
2736 return INT8_MIN;
2737 } else {
2738 return res;
2739 }
2740}
2741
2742static inline int16_t
2743vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2744{
2745 uint8_t round, shift = b & 0x1f;
2746 int32_t res;
2747
2748 round = get_round(vxrm, a, shift);
c45eff30 2749 res = (a >> shift) + round;
9ff3d287
LZ
2750 if (res > INT16_MAX) {
2751 env->vxsat = 0x1;
2752 return INT16_MAX;
2753 } else if (res < INT16_MIN) {
2754 env->vxsat = 0x1;
2755 return INT16_MIN;
2756 } else {
2757 return res;
2758 }
2759}
2760
2761static inline int32_t
2762vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2763{
2764 uint8_t round, shift = b & 0x3f;
2765 int64_t res;
2766
2767 round = get_round(vxrm, a, shift);
c45eff30 2768 res = (a >> shift) + round;
9ff3d287
LZ
2769 if (res > INT32_MAX) {
2770 env->vxsat = 0x1;
2771 return INT32_MAX;
2772 } else if (res < INT32_MIN) {
2773 env->vxsat = 0x1;
2774 return INT32_MIN;
2775 } else {
2776 return res;
2777 }
2778}
2779
a70b3a73
FC
2780RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2781RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2782RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2783GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2784GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2785GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2786
2787RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2788RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2789RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2790GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2791GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2792GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2793
2794static inline uint8_t
2795vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2796{
2797 uint8_t round, shift = b & 0xf;
2798 uint16_t res;
2799
2800 round = get_round(vxrm, a, shift);
c45eff30 2801 res = (a >> shift) + round;
9ff3d287
LZ
2802 if (res > UINT8_MAX) {
2803 env->vxsat = 0x1;
2804 return UINT8_MAX;
2805 } else {
2806 return res;
2807 }
2808}
2809
2810static inline uint16_t
2811vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2812{
2813 uint8_t round, shift = b & 0x1f;
2814 uint32_t res;
2815
2816 round = get_round(vxrm, a, shift);
c45eff30 2817 res = (a >> shift) + round;
9ff3d287
LZ
2818 if (res > UINT16_MAX) {
2819 env->vxsat = 0x1;
2820 return UINT16_MAX;
2821 } else {
2822 return res;
2823 }
2824}
2825
2826static inline uint32_t
2827vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2828{
2829 uint8_t round, shift = b & 0x3f;
a70b3a73 2830 uint64_t res;
9ff3d287
LZ
2831
2832 round = get_round(vxrm, a, shift);
c45eff30 2833 res = (a >> shift) + round;
9ff3d287
LZ
2834 if (res > UINT32_MAX) {
2835 env->vxsat = 0x1;
2836 return UINT32_MAX;
2837 } else {
2838 return res;
2839 }
2840}
2841
a70b3a73
FC
2842RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2843RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2844RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 2845GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2846GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2847GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 2848
a70b3a73
FC
2849RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2850RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2851RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 2852GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
2853GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
2854GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
2855
2856/*
3b57254d 2857 * Vector Float Point Arithmetic Instructions
ce2a0343
LZ
2858 */
2859/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2860#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2861static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2862 CPURISCVState *env) \
2863{ \
2864 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2865 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2866 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2867}
2868
5eacf7d8 2869#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
2870void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2871 void *vs2, CPURISCVState *env, \
2872 uint32_t desc) \
2873{ \
ce2a0343
LZ
2874 uint32_t vm = vext_vm(desc); \
2875 uint32_t vl = env->vl; \
5eacf7d8 2876 uint32_t total_elems = \
2877 vext_get_total_elems(env, desc, ESZ); \
2878 uint32_t vta = vext_vta(desc); \
5b448f44 2879 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
2880 uint32_t i; \
2881 \
df4252b2
DHB
2882 VSTART_CHECK_EARLY_EXIT(env); \
2883 \
f714361e 2884 for (i = env->vstart; i < vl; i++) { \
f9298de5 2885 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
2886 /* set masked-off elements to 1s */ \
2887 vext_set_elems_1s(vd, vma, i * ESZ, \
2888 (i + 1) * ESZ); \
ce2a0343
LZ
2889 continue; \
2890 } \
2891 do_##NAME(vd, vs1, vs2, i, env); \
2892 } \
f714361e 2893 env->vstart = 0; \
5eacf7d8 2894 /* set tail elements to 1s */ \
2895 vext_set_elems_1s(vd, vta, vl * ESZ, \
2896 total_elems * ESZ); \
ce2a0343
LZ
2897}
2898
2899RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2900RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2901RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 2902GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
2903GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
2904GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
2905
2906#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2907static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2908 CPURISCVState *env) \
2909{ \
2910 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2911 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2912}
2913
5eacf7d8 2914#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
2915void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2916 void *vs2, CPURISCVState *env, \
2917 uint32_t desc) \
2918{ \
ce2a0343
LZ
2919 uint32_t vm = vext_vm(desc); \
2920 uint32_t vl = env->vl; \
5eacf7d8 2921 uint32_t total_elems = \
c45eff30 2922 vext_get_total_elems(env, desc, ESZ); \
5eacf7d8 2923 uint32_t vta = vext_vta(desc); \
5b448f44 2924 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
2925 uint32_t i; \
2926 \
df4252b2
DHB
2927 VSTART_CHECK_EARLY_EXIT(env); \
2928 \
f714361e 2929 for (i = env->vstart; i < vl; i++) { \
f9298de5 2930 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
2931 /* set masked-off elements to 1s */ \
2932 vext_set_elems_1s(vd, vma, i * ESZ, \
2933 (i + 1) * ESZ); \
ce2a0343
LZ
2934 continue; \
2935 } \
2936 do_##NAME(vd, s1, vs2, i, env); \
2937 } \
f714361e 2938 env->vstart = 0; \
5eacf7d8 2939 /* set tail elements to 1s */ \
2940 vext_set_elems_1s(vd, vta, vl * ESZ, \
2941 total_elems * ESZ); \
ce2a0343
LZ
2942}
2943
2944RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2945RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2946RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 2947GEN_VEXT_VF(vfadd_vf_h, 2)
2948GEN_VEXT_VF(vfadd_vf_w, 4)
2949GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
2950
2951RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2952RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2953RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 2954GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
2955GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
2956GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
2957RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2958RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2959RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 2960GEN_VEXT_VF(vfsub_vf_h, 2)
2961GEN_VEXT_VF(vfsub_vf_w, 4)
2962GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
2963
2964static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2965{
2966 return float16_sub(b, a, s);
2967}
2968
2969static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2970{
2971 return float32_sub(b, a, s);
2972}
2973
2974static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2975{
2976 return float64_sub(b, a, s);
2977}
2978
2979RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2980RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2981RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 2982GEN_VEXT_VF(vfrsub_vf_h, 2)
2983GEN_VEXT_VF(vfrsub_vf_w, 4)
2984GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
2985
2986/* Vector Widening Floating-Point Add/Subtract Instructions */
2987static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2988{
2989 return float32_add(float16_to_float32(a, true, s),
c45eff30 2990 float16_to_float32(b, true, s), s);
eeffab2e
LZ
2991}
2992
2993static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2994{
2995 return float64_add(float32_to_float64(a, s),
c45eff30 2996 float32_to_float64(b, s), s);
eeffab2e
LZ
2997
2998}
2999
3000RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3001RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 3002GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3003GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
3004RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3005RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 3006GEN_VEXT_VF(vfwadd_vf_h, 4)
3007GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
3008
3009static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3010{
3011 return float32_sub(float16_to_float32(a, true, s),
c45eff30 3012 float16_to_float32(b, true, s), s);
eeffab2e
LZ
3013}
3014
3015static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3016{
3017 return float64_sub(float32_to_float64(a, s),
c45eff30 3018 float32_to_float64(b, s), s);
eeffab2e
LZ
3019
3020}
3021
3022RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3023RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 3024GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3025GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
3026RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3027RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 3028GEN_VEXT_VF(vfwsub_vf_h, 4)
3029GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
3030
3031static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3032{
3033 return float32_add(a, float16_to_float32(b, true, s), s);
3034}
3035
3036static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3037{
3038 return float64_add(a, float32_to_float64(b, s), s);
3039}
3040
3041RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3042RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3043GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3044GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3045RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3046RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3047GEN_VEXT_VF(vfwadd_wf_h, 4)
3048GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3049
3050static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3051{
3052 return float32_sub(a, float16_to_float32(b, true, s), s);
3053}
3054
3055static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3056{
3057 return float64_sub(a, float32_to_float64(b, s), s);
3058}
3059
3060RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3061RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3062GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3063GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3064RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3065RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3066GEN_VEXT_VF(vfwsub_wf_h, 4)
3067GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3068
3069/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3070RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3071RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3072RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3073GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3074GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3075GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3076RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3077RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3078RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3079GEN_VEXT_VF(vfmul_vf_h, 2)
3080GEN_VEXT_VF(vfmul_vf_w, 4)
3081GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3082
3083RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3084RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3085RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3086GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3087GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3088GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3089RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3090RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3091RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3092GEN_VEXT_VF(vfdiv_vf_h, 2)
3093GEN_VEXT_VF(vfdiv_vf_w, 4)
3094GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3095
3096static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3097{
3098 return float16_div(b, a, s);
3099}
3100
3101static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3102{
3103 return float32_div(b, a, s);
3104}
3105
3106static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3107{
3108 return float64_div(b, a, s);
3109}
3110
3111RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3112RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3113RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3114GEN_VEXT_VF(vfrdiv_vf_h, 2)
3115GEN_VEXT_VF(vfrdiv_vf_w, 4)
3116GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3117
3118/* Vector Widening Floating-Point Multiply */
3119static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3120{
3121 return float32_mul(float16_to_float32(a, true, s),
c45eff30 3122 float16_to_float32(b, true, s), s);
f7c7b7cd
LZ
3123}
3124
3125static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3126{
3127 return float64_mul(float32_to_float64(a, s),
c45eff30 3128 float32_to_float64(b, s), s);
f7c7b7cd
LZ
3129
3130}
3131RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3132RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3133GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3134GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3135RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3136RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3137GEN_VEXT_VF(vfwmul_vf_h, 4)
3138GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3139
3140/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3141#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3142static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
c45eff30 3143 CPURISCVState *env) \
4aa5a8fe
LZ
3144{ \
3145 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3146 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3147 TD d = *((TD *)vd + HD(i)); \
3148 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3149}
3150
3151static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3152{
3153 return float16_muladd(a, b, d, 0, s);
3154}
3155
3156static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3157{
3158 return float32_muladd(a, b, d, 0, s);
3159}
3160
3161static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3162{
3163 return float64_muladd(a, b, d, 0, s);
3164}
3165
3166RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3167RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3168RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3169GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3170GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3171GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3172
3173#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3174static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
c45eff30 3175 CPURISCVState *env) \
4aa5a8fe
LZ
3176{ \
3177 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3178 TD d = *((TD *)vd + HD(i)); \
3179 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3180}
3181
3182RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3183RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3184RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3185GEN_VEXT_VF(vfmacc_vf_h, 2)
3186GEN_VEXT_VF(vfmacc_vf_w, 4)
3187GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3188
3189static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3190{
c45eff30
WL
3191 return float16_muladd(a, b, d, float_muladd_negate_c |
3192 float_muladd_negate_product, s);
4aa5a8fe
LZ
3193}
3194
3195static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3196{
c45eff30
WL
3197 return float32_muladd(a, b, d, float_muladd_negate_c |
3198 float_muladd_negate_product, s);
4aa5a8fe
LZ
3199}
3200
3201static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3202{
c45eff30
WL
3203 return float64_muladd(a, b, d, float_muladd_negate_c |
3204 float_muladd_negate_product, s);
4aa5a8fe
LZ
3205}
3206
3207RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3208RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3209RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3210GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3211GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3212GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3213RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3214RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3215RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3216GEN_VEXT_VF(vfnmacc_vf_h, 2)
3217GEN_VEXT_VF(vfnmacc_vf_w, 4)
3218GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3219
3220static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3221{
3222 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3223}
3224
3225static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3226{
3227 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3228}
3229
3230static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3231{
3232 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3233}
3234
3235RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3236RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3237RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3238GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3239GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3240GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3241RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3242RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3243RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3244GEN_VEXT_VF(vfmsac_vf_h, 2)
3245GEN_VEXT_VF(vfmsac_vf_w, 4)
3246GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3247
3248static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3249{
3250 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3251}
3252
3253static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3254{
3255 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3256}
3257
3258static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3259{
3260 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3261}
3262
3263RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3264RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3265RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3266GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3267GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3268GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3269RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3270RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3271RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3272GEN_VEXT_VF(vfnmsac_vf_h, 2)
3273GEN_VEXT_VF(vfnmsac_vf_w, 4)
3274GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3275
3276static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3277{
3278 return float16_muladd(d, b, a, 0, s);
3279}
3280
3281static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3282{
3283 return float32_muladd(d, b, a, 0, s);
3284}
3285
3286static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3287{
3288 return float64_muladd(d, b, a, 0, s);
3289}
3290
3291RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3292RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3293RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3294GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3295GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3296GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3297RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3298RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3299RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3300GEN_VEXT_VF(vfmadd_vf_h, 2)
3301GEN_VEXT_VF(vfmadd_vf_w, 4)
3302GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3303
3304static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3305{
c45eff30
WL
3306 return float16_muladd(d, b, a, float_muladd_negate_c |
3307 float_muladd_negate_product, s);
4aa5a8fe
LZ
3308}
3309
3310static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3311{
c45eff30
WL
3312 return float32_muladd(d, b, a, float_muladd_negate_c |
3313 float_muladd_negate_product, s);
4aa5a8fe
LZ
3314}
3315
3316static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3317{
c45eff30
WL
3318 return float64_muladd(d, b, a, float_muladd_negate_c |
3319 float_muladd_negate_product, s);
4aa5a8fe
LZ
3320}
3321
3322RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3323RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3324RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3325GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3326GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3327GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3328RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3329RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3330RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3331GEN_VEXT_VF(vfnmadd_vf_h, 2)
3332GEN_VEXT_VF(vfnmadd_vf_w, 4)
3333GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3334
3335static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3336{
3337 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3338}
3339
3340static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3341{
3342 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3343}
3344
3345static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3346{
3347 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3348}
3349
3350RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3351RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3352RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3353GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3354GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3355GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3356RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3357RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3358RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3359GEN_VEXT_VF(vfmsub_vf_h, 2)
3360GEN_VEXT_VF(vfmsub_vf_w, 4)
3361GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3362
3363static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3364{
3365 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3366}
3367
3368static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3369{
3370 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3371}
3372
3373static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3374{
3375 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3376}
3377
3378RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3379RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3380RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3381GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3382GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3383GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3384RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3385RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3386RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3387GEN_VEXT_VF(vfnmsub_vf_h, 2)
3388GEN_VEXT_VF(vfnmsub_vf_w, 4)
3389GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3390
3391/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3392static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3393{
3394 return float32_muladd(float16_to_float32(a, true, s),
c45eff30 3395 float16_to_float32(b, true, s), d, 0, s);
0dd50959
LZ
3396}
3397
3398static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3399{
3400 return float64_muladd(float32_to_float64(a, s),
c45eff30 3401 float32_to_float64(b, s), d, 0, s);
0dd50959
LZ
3402}
3403
3404RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3405RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3406GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3407GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3408RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3409RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3410GEN_VEXT_VF(vfwmacc_vf_h, 4)
3411GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959 3412
adf772b0
WL
3413static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3414{
3415 return float32_muladd(bfloat16_to_float32(a, s),
3416 bfloat16_to_float32(b, s), d, 0, s);
3417}
3418
3419RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3420GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
837570ce 3421RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
adf772b0
WL
3422GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3423
0dd50959
LZ
3424static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3425{
3426 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3427 float16_to_float32(b, true, s), d,
3428 float_muladd_negate_c | float_muladd_negate_product,
3429 s);
0dd50959
LZ
3430}
3431
3432static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3433{
c45eff30
WL
3434 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3435 d, float_muladd_negate_c |
3436 float_muladd_negate_product, s);
0dd50959
LZ
3437}
3438
3439RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3440RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3441GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3442GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3443RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3444RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3445GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3446GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3447
3448static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3449{
3450 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3451 float16_to_float32(b, true, s), d,
3452 float_muladd_negate_c, s);
0dd50959
LZ
3453}
3454
3455static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3456{
3457 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3458 float32_to_float64(b, s), d,
3459 float_muladd_negate_c, s);
0dd50959
LZ
3460}
3461
3462RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3463RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3464GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3465GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3466RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3467RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3468GEN_VEXT_VF(vfwmsac_vf_h, 4)
3469GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3470
3471static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3472{
3473 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3474 float16_to_float32(b, true, s), d,
3475 float_muladd_negate_product, s);
0dd50959
LZ
3476}
3477
3478static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3479{
3480 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3481 float32_to_float64(b, s), d,
3482 float_muladd_negate_product, s);
0dd50959
LZ
3483}
3484
3485RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3486RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3487GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3488GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3489RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3490RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3491GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3492GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3493
3494/* Vector Floating-Point Square-Root Instruction */
c45eff30 3495#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
d9e4ce72 3496static void do_##NAME(void *vd, void *vs2, int i, \
c45eff30 3497 CPURISCVState *env) \
d9e4ce72
LZ
3498{ \
3499 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3500 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3501}
3502
5eacf7d8 3503#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72 3504void HELPER(NAME)(void *vd, void *v0, void *vs2, \
c45eff30 3505 CPURISCVState *env, uint32_t desc) \
d9e4ce72 3506{ \
d9e4ce72
LZ
3507 uint32_t vm = vext_vm(desc); \
3508 uint32_t vl = env->vl; \
5eacf7d8 3509 uint32_t total_elems = \
3510 vext_get_total_elems(env, desc, ESZ); \
3511 uint32_t vta = vext_vta(desc); \
5b448f44 3512 uint32_t vma = vext_vma(desc); \
d9e4ce72
LZ
3513 uint32_t i; \
3514 \
df4252b2
DHB
3515 VSTART_CHECK_EARLY_EXIT(env); \
3516 \
d9e4ce72
LZ
3517 if (vl == 0) { \
3518 return; \
3519 } \
f714361e 3520 for (i = env->vstart; i < vl; i++) { \
f9298de5 3521 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3522 /* set masked-off elements to 1s */ \
3523 vext_set_elems_1s(vd, vma, i * ESZ, \
3524 (i + 1) * ESZ); \
d9e4ce72
LZ
3525 continue; \
3526 } \
3527 do_##NAME(vd, vs2, i, env); \
3528 } \
f714361e 3529 env->vstart = 0; \
5eacf7d8 3530 vext_set_elems_1s(vd, vta, vl * ESZ, \
3531 total_elems * ESZ); \
d9e4ce72
LZ
3532}
3533
3534RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3535RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3536RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3537GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3538GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3539GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3540
e848a1e5
FC
3541/*
3542 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3543 *
3544 * Adapted from riscv-v-spec recip.c:
3545 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3546 */
3547static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3548{
3549 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3550 uint64_t exp = extract64(f, frac_size, exp_size);
3551 uint64_t frac = extract64(f, 0, frac_size);
3552
3553 const uint8_t lookup_table[] = {
3554 52, 51, 50, 48, 47, 46, 44, 43,
3555 42, 41, 40, 39, 38, 36, 35, 34,
3556 33, 32, 31, 30, 30, 29, 28, 27,
3557 26, 25, 24, 23, 23, 22, 21, 20,
3558 19, 19, 18, 17, 16, 16, 15, 14,
3559 14, 13, 12, 12, 11, 10, 10, 9,
3560 9, 8, 7, 7, 6, 6, 5, 4,
3561 4, 3, 3, 2, 2, 1, 1, 0,
3562 127, 125, 123, 121, 119, 118, 116, 114,
3563 113, 111, 109, 108, 106, 105, 103, 102,
3564 100, 99, 97, 96, 95, 93, 92, 91,
3565 90, 88, 87, 86, 85, 84, 83, 82,
3566 80, 79, 78, 77, 76, 75, 74, 73,
3567 72, 71, 70, 70, 69, 68, 67, 66,
3568 65, 64, 63, 63, 62, 61, 60, 59,
3569 59, 58, 57, 56, 56, 55, 54, 53
3570 };
3571 const int precision = 7;
3572
3573 if (exp == 0 && frac != 0) { /* subnormal */
3574 /* Normalize the subnormal. */
3575 while (extract64(frac, frac_size - 1, 1) == 0) {
3576 exp--;
3577 frac <<= 1;
3578 }
3579
3580 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3581 }
3582
3583 int idx = ((exp & 1) << (precision - 1)) |
c45eff30 3584 (frac >> (frac_size - precision + 1));
e848a1e5 3585 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3586 (frac_size - precision);
e848a1e5
FC
3587 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3588
3589 uint64_t val = 0;
3590 val = deposit64(val, 0, frac_size, out_frac);
3591 val = deposit64(val, frac_size, exp_size, out_exp);
3592 val = deposit64(val, frac_size + exp_size, 1, sign);
3593 return val;
3594}
3595
3596static float16 frsqrt7_h(float16 f, float_status *s)
3597{
3598 int exp_size = 5, frac_size = 10;
3599 bool sign = float16_is_neg(f);
3600
3601 /*
3602 * frsqrt7(sNaN) = canonical NaN
3603 * frsqrt7(-inf) = canonical NaN
3604 * frsqrt7(-normal) = canonical NaN
3605 * frsqrt7(-subnormal) = canonical NaN
3606 */
3607 if (float16_is_signaling_nan(f, s) ||
c45eff30
WL
3608 (float16_is_infinity(f) && sign) ||
3609 (float16_is_normal(f) && sign) ||
3610 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
e848a1e5
FC
3611 s->float_exception_flags |= float_flag_invalid;
3612 return float16_default_nan(s);
3613 }
3614
3615 /* frsqrt7(qNaN) = canonical NaN */
3616 if (float16_is_quiet_nan(f, s)) {
3617 return float16_default_nan(s);
3618 }
3619
3620 /* frsqrt7(+-0) = +-inf */
3621 if (float16_is_zero(f)) {
3622 s->float_exception_flags |= float_flag_divbyzero;
3623 return float16_set_sign(float16_infinity, sign);
3624 }
3625
3626 /* frsqrt7(+inf) = +0 */
3627 if (float16_is_infinity(f) && !sign) {
3628 return float16_set_sign(float16_zero, sign);
3629 }
3630
3631 /* +normal, +subnormal */
3632 uint64_t val = frsqrt7(f, exp_size, frac_size);
3633 return make_float16(val);
3634}
3635
3636static float32 frsqrt7_s(float32 f, float_status *s)
3637{
3638 int exp_size = 8, frac_size = 23;
3639 bool sign = float32_is_neg(f);
3640
3641 /*
3642 * frsqrt7(sNaN) = canonical NaN
3643 * frsqrt7(-inf) = canonical NaN
3644 * frsqrt7(-normal) = canonical NaN
3645 * frsqrt7(-subnormal) = canonical NaN
3646 */
3647 if (float32_is_signaling_nan(f, s) ||
c45eff30
WL
3648 (float32_is_infinity(f) && sign) ||
3649 (float32_is_normal(f) && sign) ||
3650 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
e848a1e5
FC
3651 s->float_exception_flags |= float_flag_invalid;
3652 return float32_default_nan(s);
3653 }
3654
3655 /* frsqrt7(qNaN) = canonical NaN */
3656 if (float32_is_quiet_nan(f, s)) {
3657 return float32_default_nan(s);
3658 }
3659
3660 /* frsqrt7(+-0) = +-inf */
3661 if (float32_is_zero(f)) {
3662 s->float_exception_flags |= float_flag_divbyzero;
3663 return float32_set_sign(float32_infinity, sign);
3664 }
3665
3666 /* frsqrt7(+inf) = +0 */
3667 if (float32_is_infinity(f) && !sign) {
3668 return float32_set_sign(float32_zero, sign);
3669 }
3670
3671 /* +normal, +subnormal */
3672 uint64_t val = frsqrt7(f, exp_size, frac_size);
3673 return make_float32(val);
3674}
3675
3676static float64 frsqrt7_d(float64 f, float_status *s)
3677{
3678 int exp_size = 11, frac_size = 52;
3679 bool sign = float64_is_neg(f);
3680
3681 /*
3682 * frsqrt7(sNaN) = canonical NaN
3683 * frsqrt7(-inf) = canonical NaN
3684 * frsqrt7(-normal) = canonical NaN
3685 * frsqrt7(-subnormal) = canonical NaN
3686 */
3687 if (float64_is_signaling_nan(f, s) ||
c45eff30
WL
3688 (float64_is_infinity(f) && sign) ||
3689 (float64_is_normal(f) && sign) ||
3690 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
e848a1e5
FC
3691 s->float_exception_flags |= float_flag_invalid;
3692 return float64_default_nan(s);
3693 }
3694
3695 /* frsqrt7(qNaN) = canonical NaN */
3696 if (float64_is_quiet_nan(f, s)) {
3697 return float64_default_nan(s);
3698 }
3699
3700 /* frsqrt7(+-0) = +-inf */
3701 if (float64_is_zero(f)) {
3702 s->float_exception_flags |= float_flag_divbyzero;
3703 return float64_set_sign(float64_infinity, sign);
3704 }
3705
3706 /* frsqrt7(+inf) = +0 */
3707 if (float64_is_infinity(f) && !sign) {
3708 return float64_set_sign(float64_zero, sign);
3709 }
3710
3711 /* +normal, +subnormal */
3712 uint64_t val = frsqrt7(f, exp_size, frac_size);
3713 return make_float64(val);
3714}
3715
3716RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3717RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3718RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3719GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3720GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3721GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3722
55c35407
FC
3723/*
3724 * Vector Floating-Point Reciprocal Estimate Instruction
3725 *
3726 * Adapted from riscv-v-spec recip.c:
3727 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3728 */
3729static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3730 float_status *s)
3731{
3732 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3733 uint64_t exp = extract64(f, frac_size, exp_size);
3734 uint64_t frac = extract64(f, 0, frac_size);
3735
3736 const uint8_t lookup_table[] = {
3737 127, 125, 123, 121, 119, 117, 116, 114,
3738 112, 110, 109, 107, 105, 104, 102, 100,
3739 99, 97, 96, 94, 93, 91, 90, 88,
3740 87, 85, 84, 83, 81, 80, 79, 77,
3741 76, 75, 74, 72, 71, 70, 69, 68,
3742 66, 65, 64, 63, 62, 61, 60, 59,
3743 58, 57, 56, 55, 54, 53, 52, 51,
3744 50, 49, 48, 47, 46, 45, 44, 43,
3745 42, 41, 40, 40, 39, 38, 37, 36,
3746 35, 35, 34, 33, 32, 31, 31, 30,
3747 29, 28, 28, 27, 26, 25, 25, 24,
3748 23, 23, 22, 21, 21, 20, 19, 19,
3749 18, 17, 17, 16, 15, 15, 14, 14,
3750 13, 12, 12, 11, 11, 10, 9, 9,
3751 8, 8, 7, 7, 6, 5, 5, 4,
3752 4, 3, 3, 2, 2, 1, 1, 0
3753 };
3754 const int precision = 7;
3755
3756 if (exp == 0 && frac != 0) { /* subnormal */
3757 /* Normalize the subnormal. */
3758 while (extract64(frac, frac_size - 1, 1) == 0) {
3759 exp--;
3760 frac <<= 1;
3761 }
3762
3763 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3764
3765 if (exp != 0 && exp != UINT64_MAX) {
3766 /*
3767 * Overflow to inf or max value of same sign,
3768 * depending on sign and rounding mode.
3769 */
3770 s->float_exception_flags |= (float_flag_inexact |
3771 float_flag_overflow);
3772
3773 if ((s->float_rounding_mode == float_round_to_zero) ||
3774 ((s->float_rounding_mode == float_round_down) && !sign) ||
3775 ((s->float_rounding_mode == float_round_up) && sign)) {
3776 /* Return greatest/negative finite value. */
3777 return (sign << (exp_size + frac_size)) |
c45eff30 3778 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
55c35407
FC
3779 } else {
3780 /* Return +-inf. */
3781 return (sign << (exp_size + frac_size)) |
c45eff30 3782 MAKE_64BIT_MASK(frac_size, exp_size);
55c35407
FC
3783 }
3784 }
3785 }
3786
3787 int idx = frac >> (frac_size - precision);
3788 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3789 (frac_size - precision);
55c35407
FC
3790 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3791
3792 if (out_exp == 0 || out_exp == UINT64_MAX) {
3793 /*
3794 * The result is subnormal, but don't raise the underflow exception,
3795 * because there's no additional loss of precision.
3796 */
3797 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3798 if (out_exp == UINT64_MAX) {
3799 out_frac >>= 1;
3800 out_exp = 0;
3801 }
3802 }
3803
3804 uint64_t val = 0;
3805 val = deposit64(val, 0, frac_size, out_frac);
3806 val = deposit64(val, frac_size, exp_size, out_exp);
3807 val = deposit64(val, frac_size + exp_size, 1, sign);
3808 return val;
3809}
3810
3811static float16 frec7_h(float16 f, float_status *s)
3812{
3813 int exp_size = 5, frac_size = 10;
3814 bool sign = float16_is_neg(f);
3815
3816 /* frec7(+-inf) = +-0 */
3817 if (float16_is_infinity(f)) {
3818 return float16_set_sign(float16_zero, sign);
3819 }
3820
3821 /* frec7(+-0) = +-inf */
3822 if (float16_is_zero(f)) {
3823 s->float_exception_flags |= float_flag_divbyzero;
3824 return float16_set_sign(float16_infinity, sign);
3825 }
3826
3827 /* frec7(sNaN) = canonical NaN */
3828 if (float16_is_signaling_nan(f, s)) {
3829 s->float_exception_flags |= float_flag_invalid;
3830 return float16_default_nan(s);
3831 }
3832
3833 /* frec7(qNaN) = canonical NaN */
3834 if (float16_is_quiet_nan(f, s)) {
3835 return float16_default_nan(s);
3836 }
3837
3838 /* +-normal, +-subnormal */
3839 uint64_t val = frec7(f, exp_size, frac_size, s);
3840 return make_float16(val);
3841}
3842
3843static float32 frec7_s(float32 f, float_status *s)
3844{
3845 int exp_size = 8, frac_size = 23;
3846 bool sign = float32_is_neg(f);
3847
3848 /* frec7(+-inf) = +-0 */
3849 if (float32_is_infinity(f)) {
3850 return float32_set_sign(float32_zero, sign);
3851 }
3852
3853 /* frec7(+-0) = +-inf */
3854 if (float32_is_zero(f)) {
3855 s->float_exception_flags |= float_flag_divbyzero;
3856 return float32_set_sign(float32_infinity, sign);
3857 }
3858
3859 /* frec7(sNaN) = canonical NaN */
3860 if (float32_is_signaling_nan(f, s)) {
3861 s->float_exception_flags |= float_flag_invalid;
3862 return float32_default_nan(s);
3863 }
3864
3865 /* frec7(qNaN) = canonical NaN */
3866 if (float32_is_quiet_nan(f, s)) {
3867 return float32_default_nan(s);
3868 }
3869
3870 /* +-normal, +-subnormal */
3871 uint64_t val = frec7(f, exp_size, frac_size, s);
3872 return make_float32(val);
3873}
3874
3875static float64 frec7_d(float64 f, float_status *s)
3876{
3877 int exp_size = 11, frac_size = 52;
3878 bool sign = float64_is_neg(f);
3879
3880 /* frec7(+-inf) = +-0 */
3881 if (float64_is_infinity(f)) {
3882 return float64_set_sign(float64_zero, sign);
3883 }
3884
3885 /* frec7(+-0) = +-inf */
3886 if (float64_is_zero(f)) {
3887 s->float_exception_flags |= float_flag_divbyzero;
3888 return float64_set_sign(float64_infinity, sign);
3889 }
3890
3891 /* frec7(sNaN) = canonical NaN */
3892 if (float64_is_signaling_nan(f, s)) {
3893 s->float_exception_flags |= float_flag_invalid;
3894 return float64_default_nan(s);
3895 }
3896
3897 /* frec7(qNaN) = canonical NaN */
3898 if (float64_is_quiet_nan(f, s)) {
3899 return float64_default_nan(s);
3900 }
3901
3902 /* +-normal, +-subnormal */
3903 uint64_t val = frec7(f, exp_size, frac_size, s);
3904 return make_float64(val);
3905}
3906
3907RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3908RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3909RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 3910GEN_VEXT_V_ENV(vfrec7_v_h, 2)
3911GEN_VEXT_V_ENV(vfrec7_v_w, 4)
3912GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 3913
230b53dd 3914/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
3915RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3916RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3917RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 3918GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
3919GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
3920GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
3921RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3922RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3923RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 3924GEN_VEXT_VF(vfmin_vf_h, 2)
3925GEN_VEXT_VF(vfmin_vf_w, 4)
3926GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 3927
49c5611a
FC
3928RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3929RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3930RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 3931GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
3932GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
3933GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
3934RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3935RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3936RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 3937GEN_VEXT_VF(vfmax_vf_h, 2)
3938GEN_VEXT_VF(vfmax_vf_w, 4)
3939GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
3940
3941/* Vector Floating-Point Sign-Injection Instructions */
3942static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3943{
3944 return deposit64(b, 0, 15, a);
3945}
3946
3947static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3948{
3949 return deposit64(b, 0, 31, a);
3950}
3951
3952static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3953{
3954 return deposit64(b, 0, 63, a);
3955}
3956
3957RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3958RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3959RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 3960GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
3961GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
3962GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
3963RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3964RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3965RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 3966GEN_VEXT_VF(vfsgnj_vf_h, 2)
3967GEN_VEXT_VF(vfsgnj_vf_w, 4)
3968GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
3969
3970static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3971{
3972 return deposit64(~b, 0, 15, a);
3973}
3974
3975static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3976{
3977 return deposit64(~b, 0, 31, a);
3978}
3979
3980static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3981{
3982 return deposit64(~b, 0, 63, a);
3983}
3984
3985RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3986RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3987RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 3988GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
3989GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
3990GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
3991RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3992RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3993RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 3994GEN_VEXT_VF(vfsgnjn_vf_h, 2)
3995GEN_VEXT_VF(vfsgnjn_vf_w, 4)
3996GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
3997
3998static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3999{
4000 return deposit64(b ^ a, 0, 15, a);
4001}
4002
4003static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4004{
4005 return deposit64(b ^ a, 0, 31, a);
4006}
4007
4008static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4009{
4010 return deposit64(b ^ a, 0, 63, a);
4011}
4012
4013RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4014RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4015RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 4016GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4017GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4018GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
4019RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4020RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4021RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 4022GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4023GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4024GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
4025
4026/* Vector Floating-Point Compare Instructions */
4027#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4028void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4029 CPURISCVState *env, uint32_t desc) \
4030{ \
2a68e9e5
LZ
4031 uint32_t vm = vext_vm(desc); \
4032 uint32_t vl = env->vl; \
58bc9063 4033 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5eacf7d8 4034 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4035 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4036 uint32_t i; \
4037 \
df4252b2
DHB
4038 VSTART_CHECK_EARLY_EXIT(env); \
4039 \
f714361e 4040 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4041 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4042 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4043 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4044 /* set masked-off elements to 1s */ \
4045 if (vma) { \
4046 vext_set_elem_mask(vd, i, 1); \
4047 } \
2a68e9e5
LZ
4048 continue; \
4049 } \
f9298de5 4050 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4051 DO_OP(s2, s1, &env->fp_status)); \
4052 } \
f714361e 4053 env->vstart = 0; \
3b57254d
WL
4054 /*
4055 * mask destination register are always tail-agnostic
4056 * set tail elements to 1s
4057 */ \
5eacf7d8 4058 if (vta_all_1s) { \
4059 for (; i < total_elems; i++) { \
4060 vext_set_elem_mask(vd, i, 1); \
4061 } \
4062 } \
2a68e9e5
LZ
4063}
4064
2a68e9e5
LZ
4065GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4066GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4067GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4068
4069#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4070void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4071 CPURISCVState *env, uint32_t desc) \
4072{ \
2a68e9e5
LZ
4073 uint32_t vm = vext_vm(desc); \
4074 uint32_t vl = env->vl; \
58bc9063 4075 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5eacf7d8 4076 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4077 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4078 uint32_t i; \
4079 \
df4252b2
DHB
4080 VSTART_CHECK_EARLY_EXIT(env); \
4081 \
f714361e 4082 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4083 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4084 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4085 /* set masked-off elements to 1s */ \
4086 if (vma) { \
4087 vext_set_elem_mask(vd, i, 1); \
4088 } \
2a68e9e5
LZ
4089 continue; \
4090 } \
f9298de5 4091 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4092 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4093 } \
f714361e 4094 env->vstart = 0; \
3b57254d
WL
4095 /*
4096 * mask destination register are always tail-agnostic
4097 * set tail elements to 1s
4098 */ \
5eacf7d8 4099 if (vta_all_1s) { \
4100 for (; i < total_elems; i++) { \
4101 vext_set_elem_mask(vd, i, 1); \
4102 } \
4103 } \
2a68e9e5
LZ
4104}
4105
4106GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4107GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4108GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4109
4110static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4111{
4112 FloatRelation compare = float16_compare_quiet(a, b, s);
4113 return compare != float_relation_equal;
4114}
4115
4116static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4117{
4118 FloatRelation compare = float32_compare_quiet(a, b, s);
4119 return compare != float_relation_equal;
4120}
4121
4122static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4123{
4124 FloatRelation compare = float64_compare_quiet(a, b, s);
4125 return compare != float_relation_equal;
4126}
4127
4128GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4129GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4130GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4131GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4132GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4133GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4134
2a68e9e5
LZ
4135GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4136GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4137GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4138GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4139GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4140GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4141
2a68e9e5
LZ
4142GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4143GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4144GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4145GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4146GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4147GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4148
4149static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4150{
4151 FloatRelation compare = float16_compare(a, b, s);
4152 return compare == float_relation_greater;
4153}
4154
4155static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4156{
4157 FloatRelation compare = float32_compare(a, b, s);
4158 return compare == float_relation_greater;
4159}
4160
4161static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4162{
4163 FloatRelation compare = float64_compare(a, b, s);
4164 return compare == float_relation_greater;
4165}
4166
4167GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4168GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4169GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4170
4171static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4172{
4173 FloatRelation compare = float16_compare(a, b, s);
4174 return compare == float_relation_greater ||
4175 compare == float_relation_equal;
4176}
4177
4178static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4179{
4180 FloatRelation compare = float32_compare(a, b, s);
4181 return compare == float_relation_greater ||
4182 compare == float_relation_equal;
4183}
4184
4185static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4186{
4187 FloatRelation compare = float64_compare(a, b, s);
4188 return compare == float_relation_greater ||
4189 compare == float_relation_equal;
4190}
4191
4192GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4193GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4194GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4195
121ddbb3 4196/* Vector Floating-Point Classify Instruction */
121ddbb3
LZ
4197target_ulong fclass_h(uint64_t frs1)
4198{
4199 float16 f = frs1;
4200 bool sign = float16_is_neg(f);
4201
4202 if (float16_is_infinity(f)) {
4203 return sign ? 1 << 0 : 1 << 7;
4204 } else if (float16_is_zero(f)) {
4205 return sign ? 1 << 3 : 1 << 4;
4206 } else if (float16_is_zero_or_denormal(f)) {
4207 return sign ? 1 << 2 : 1 << 5;
4208 } else if (float16_is_any_nan(f)) {
4209 float_status s = { }; /* for snan_bit_is_one */
4210 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4211 } else {
4212 return sign ? 1 << 1 : 1 << 6;
4213 }
4214}
4215
4216target_ulong fclass_s(uint64_t frs1)
4217{
4218 float32 f = frs1;
4219 bool sign = float32_is_neg(f);
4220
4221 if (float32_is_infinity(f)) {
4222 return sign ? 1 << 0 : 1 << 7;
4223 } else if (float32_is_zero(f)) {
4224 return sign ? 1 << 3 : 1 << 4;
4225 } else if (float32_is_zero_or_denormal(f)) {
4226 return sign ? 1 << 2 : 1 << 5;
4227 } else if (float32_is_any_nan(f)) {
4228 float_status s = { }; /* for snan_bit_is_one */
4229 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4230 } else {
4231 return sign ? 1 << 1 : 1 << 6;
4232 }
4233}
4234
4235target_ulong fclass_d(uint64_t frs1)
4236{
4237 float64 f = frs1;
4238 bool sign = float64_is_neg(f);
4239
4240 if (float64_is_infinity(f)) {
4241 return sign ? 1 << 0 : 1 << 7;
4242 } else if (float64_is_zero(f)) {
4243 return sign ? 1 << 3 : 1 << 4;
4244 } else if (float64_is_zero_or_denormal(f)) {
4245 return sign ? 1 << 2 : 1 << 5;
4246 } else if (float64_is_any_nan(f)) {
4247 float_status s = { }; /* for snan_bit_is_one */
4248 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4249 } else {
4250 return sign ? 1 << 1 : 1 << 6;
4251 }
4252}
4253
4254RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4255RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4256RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4257GEN_VEXT_V(vfclass_v_h, 2)
4258GEN_VEXT_V(vfclass_v_w, 4)
4259GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4260
4261/* Vector Floating-Point Merge Instruction */
5eacf7d8 4262
3479a814 4263#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4264void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4265 CPURISCVState *env, uint32_t desc) \
4266{ \
64ab5846
LZ
4267 uint32_t vm = vext_vm(desc); \
4268 uint32_t vl = env->vl; \
5eacf7d8 4269 uint32_t esz = sizeof(ETYPE); \
4270 uint32_t total_elems = \
4271 vext_get_total_elems(env, desc, esz); \
4272 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4273 uint32_t i; \
4274 \
df4252b2
DHB
4275 VSTART_CHECK_EARLY_EXIT(env); \
4276 \
f714361e 4277 for (i = env->vstart; i < vl; i++) { \
64ab5846 4278 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
c45eff30
WL
4279 *((ETYPE *)vd + H(i)) = \
4280 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4281 } \
f714361e 4282 env->vstart = 0; \
5eacf7d8 4283 /* set tail elements to 1s */ \
4284 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4285}
4286
3479a814
FC
4287GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4288GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4289GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4290
4291/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4292/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4293RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4294RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4295RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4296GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4297GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4298GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4299
4300/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4301RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4302RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4303RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4304GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4305GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4306GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4307
4308/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4309RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4310RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4311RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4312GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4313GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4314GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4315
4316/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4317RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4318RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4319RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4320GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4321GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4322GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4323
4324/* Widening Floating-Point/Integer Type-Convert Instructions */
4325/* (TD, T2, TX2) */
3ce4c09d 4326#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4327#define WOP_UU_H uint32_t, uint16_t, uint16_t
4328#define WOP_UU_W uint64_t, uint32_t, uint32_t
3b57254d
WL
4329/*
4330 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4331 */
4514b7b1
LZ
4332RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4333RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4334GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4335GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4336
4337/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4338RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4339RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4340GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4341GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1 4342
246f8796
WL
4343/*
4344 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4345 */
3ce4c09d 4346RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4347RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4348RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4349GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4350GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4351GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4352
4353/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4354RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4355RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4356RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4357GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4358GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4359GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4360
4361/*
246f8796 4362 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4514b7b1
LZ
4363 */
4364static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4365{
4366 return float16_to_float32(a, true, s);
4367}
4368
4369RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4370RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4371GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4372GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e 4373
87b27bfc
WL
4374RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4375GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4376
878d406e
LZ
4377/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4378/* (TD, T2, TX2) */
ff679b58 4379#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4380#define NOP_UU_H uint16_t, uint32_t, uint32_t
4381#define NOP_UU_W uint32_t, uint64_t, uint64_t
4382/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4383RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4384RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4385RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4386GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4387GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4388GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4389
4390/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4391RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4392RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4393RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4394GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4395GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4396GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e 4397
246f8796
WL
4398/*
4399 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4400 */
ff679b58
FC
4401RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4402RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4403GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4404GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4405
4406/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4407RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4408RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4409GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4410GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4411
4412/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4413static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4414{
4415 return float32_to_float16(a, true, s);
4416}
4417
ff679b58
FC
4418RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4419RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4420GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4421GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1 4422
87b27bfc
WL
4423RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4424GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4425
fe5c9ab1 4426/*
3b57254d 4427 * Vector Reduction Operations
fe5c9ab1
LZ
4428 */
4429/* Vector Single-Width Integer Reduction Instructions */
3479a814 4430#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1 4431void HELPER(NAME)(void *vd, void *v0, void *vs1, \
c45eff30
WL
4432 void *vs2, CPURISCVState *env, \
4433 uint32_t desc) \
fe5c9ab1 4434{ \
fe5c9ab1
LZ
4435 uint32_t vm = vext_vm(desc); \
4436 uint32_t vl = env->vl; \
df4f52a7 4437 uint32_t esz = sizeof(TD); \
4438 uint32_t vlenb = simd_maxsz(desc); \
4439 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4440 uint32_t i; \
fe5c9ab1
LZ
4441 TD s1 = *((TD *)vs1 + HD(0)); \
4442 \
f714361e 4443 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4444 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4445 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4446 continue; \
4447 } \
4448 s1 = OP(s1, (TD)s2); \
4449 } \
4450 *((TD *)vd + HD(0)) = s1; \
f714361e 4451 env->vstart = 0; \
df4f52a7 4452 /* set tail elements to 1s */ \
4453 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4454}
4455
4456/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4457GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4458GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4459GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4460GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4461
4462/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4463GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4464GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4465GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4466GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4467
4468/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4469GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4470GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4471GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4472GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4473
4474/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4475GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4476GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4477GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4478GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4479
4480/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4481GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4482GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4483GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4484GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4485
4486/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4487GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4488GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4489GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4490GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4491
4492/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4493GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4494GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4495GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4496GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4497
4498/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4499GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4500GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4501GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4502GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4503
4504/* Vector Widening Integer Reduction Instructions */
4505/* signed sum reduction into double-width accumulator */
3479a814
FC
4506GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4507GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4508GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4509
4510/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4511GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4512GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4513GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4514
4515/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4516#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4517void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4518 void *vs2, CPURISCVState *env, \
4519 uint32_t desc) \
4520{ \
523547f1
LZ
4521 uint32_t vm = vext_vm(desc); \
4522 uint32_t vl = env->vl; \
df4f52a7 4523 uint32_t esz = sizeof(TD); \
4524 uint32_t vlenb = simd_maxsz(desc); \
4525 uint32_t vta = vext_vta(desc); \
523547f1 4526 uint32_t i; \
523547f1
LZ
4527 TD s1 = *((TD *)vs1 + HD(0)); \
4528 \
f714361e 4529 for (i = env->vstart; i < vl; i++) { \
523547f1 4530 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4531 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4532 continue; \
4533 } \
4534 s1 = OP(s1, (TD)s2, &env->fp_status); \
4535 } \
4536 *((TD *)vd + HD(0)) = s1; \
f714361e 4537 env->vstart = 0; \
df4f52a7 4538 /* set tail elements to 1s */ \
4539 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4540}
4541
4542/* Unordered sum */
a3ab69f9
YL
4543GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4544GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4545GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4546
4547/* Ordered sum */
4548GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4549GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4550GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4551
4552/* Maximum value */
246f8796
WL
4553GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4554 float16_maximum_number)
4555GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4556 float32_maximum_number)
4557GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4558 float64_maximum_number)
523547f1
LZ
4559
4560/* Minimum value */
246f8796
WL
4561GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4562 float16_minimum_number)
4563GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4564 float32_minimum_number)
4565GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4566 float64_minimum_number)
696b0c26 4567
5bda21c0
YL
4568/* Vector Widening Floating-Point Add Instructions */
4569static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
696b0c26 4570{
5bda21c0 4571 return float32_add(a, float16_to_float32(b, true, s), s);
696b0c26
LZ
4572}
4573
5bda21c0 4574static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
696b0c26 4575{
5bda21c0 4576 return float64_add(a, float32_to_float64(b, s), s);
696b0c26 4577}
c21f34ae 4578
5bda21c0 4579/* Vector Widening Floating-Point Reduction Instructions */
a3ab69f9
YL
4580/* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4581GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4582GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4583GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4584GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
5bda21c0 4585
c21f34ae 4586/*
3b57254d 4587 * Vector Mask Operations
c21f34ae
LZ
4588 */
4589/* Vector Mask-Register Logical Instructions */
4590#define GEN_VEXT_MASK_VV(NAME, OP) \
4591void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4592 void *vs2, CPURISCVState *env, \
4593 uint32_t desc) \
4594{ \
c21f34ae 4595 uint32_t vl = env->vl; \
58bc9063 4596 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
acc6ffd4 4597 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4598 uint32_t i; \
4599 int a, b; \
4600 \
df4252b2
DHB
4601 VSTART_CHECK_EARLY_EXIT(env); \
4602 \
f714361e 4603 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4604 a = vext_elem_mask(vs1, i); \
4605 b = vext_elem_mask(vs2, i); \
4606 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4607 } \
f714361e 4608 env->vstart = 0; \
3b57254d
WL
4609 /*
4610 * mask destination register are always tail-agnostic
4611 * set tail elements to 1s
acc6ffd4 4612 */ \
acc6ffd4 4613 if (vta_all_1s) { \
4614 for (; i < total_elems; i++) { \
4615 vext_set_elem_mask(vd, i, 1); \
4616 } \
4617 } \
c21f34ae
LZ
4618}
4619
4620#define DO_NAND(N, M) (!(N & M))
4621#define DO_ANDNOT(N, M) (N & !M)
4622#define DO_NOR(N, M) (!(N | M))
4623#define DO_ORNOT(N, M) (N | !M)
4624#define DO_XNOR(N, M) (!(N ^ M))
4625
4626GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4627GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4628GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4629GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4630GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4631GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4632GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4633GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4634
0014aa74
FC
4635/* Vector count population in mask vcpop */
4636target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4637 uint32_t desc)
2e88f551
LZ
4638{
4639 target_ulong cnt = 0;
2e88f551
LZ
4640 uint32_t vm = vext_vm(desc);
4641 uint32_t vl = env->vl;
4642 int i;
4643
f714361e 4644 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4645 if (vm || vext_elem_mask(v0, i)) {
4646 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4647 cnt++;
4648 }
4649 }
4650 }
f714361e 4651 env->vstart = 0;
2e88f551
LZ
4652 return cnt;
4653}
0db67e1c 4654
3b57254d 4655/* vfirst find-first-set mask bit */
d71a24fc
FC
4656target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4657 uint32_t desc)
0db67e1c 4658{
0db67e1c
LZ
4659 uint32_t vm = vext_vm(desc);
4660 uint32_t vl = env->vl;
4661 int i;
4662
f714361e 4663 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4664 if (vm || vext_elem_mask(v0, i)) {
4665 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4666 return i;
4667 }
4668 }
4669 }
f714361e 4670 env->vstart = 0;
0db67e1c
LZ
4671 return -1LL;
4672}
81fbf7da
LZ
4673
4674enum set_mask_type {
4675 ONLY_FIRST = 1,
4676 INCLUDE_FIRST,
4677 BEFORE_FIRST,
4678};
4679
4680static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4681 uint32_t desc, enum set_mask_type type)
4682{
81fbf7da
LZ
4683 uint32_t vm = vext_vm(desc);
4684 uint32_t vl = env->vl;
58bc9063 4685 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
acc6ffd4 4686 uint32_t vta_all_1s = vext_vta_all_1s(desc);
35f2d795 4687 uint32_t vma = vext_vma(desc);
81fbf7da
LZ
4688 int i;
4689 bool first_mask_bit = false;
4690
f714361e 4691 for (i = env->vstart; i < vl; i++) {
f9298de5 4692 if (!vm && !vext_elem_mask(v0, i)) {
35f2d795
YTC
4693 /* set masked-off elements to 1s */
4694 if (vma) {
4695 vext_set_elem_mask(vd, i, 1);
4696 }
81fbf7da
LZ
4697 continue;
4698 }
4699 /* write a zero to all following active elements */
4700 if (first_mask_bit) {
f9298de5 4701 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4702 continue;
4703 }
f9298de5 4704 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4705 first_mask_bit = true;
4706 if (type == BEFORE_FIRST) {
f9298de5 4707 vext_set_elem_mask(vd, i, 0);
81fbf7da 4708 } else {
f9298de5 4709 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4710 }
4711 } else {
4712 if (type == ONLY_FIRST) {
f9298de5 4713 vext_set_elem_mask(vd, i, 0);
81fbf7da 4714 } else {
f9298de5 4715 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4716 }
4717 }
4718 }
f714361e 4719 env->vstart = 0;
3b57254d
WL
4720 /*
4721 * mask destination register are always tail-agnostic
4722 * set tail elements to 1s
4723 */
acc6ffd4 4724 if (vta_all_1s) {
4725 for (; i < total_elems; i++) {
4726 vext_set_elem_mask(vd, i, 1);
4727 }
4728 }
81fbf7da
LZ
4729}
4730
4731void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4732 uint32_t desc)
4733{
4734 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4735}
4736
4737void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4738 uint32_t desc)
4739{
4740 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4741}
4742
4743void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4744 uint32_t desc)
4745{
4746 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4747}
78d90cfe
LZ
4748
4749/* Vector Iota Instruction */
3479a814 4750#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4751void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4752 uint32_t desc) \
4753{ \
78d90cfe
LZ
4754 uint32_t vm = vext_vm(desc); \
4755 uint32_t vl = env->vl; \
acc6ffd4 4756 uint32_t esz = sizeof(ETYPE); \
4757 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4758 uint32_t vta = vext_vta(desc); \
35f2d795 4759 uint32_t vma = vext_vma(desc); \
78d90cfe
LZ
4760 uint32_t sum = 0; \
4761 int i; \
4762 \
f714361e 4763 for (i = env->vstart; i < vl; i++) { \
f9298de5 4764 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4765 /* set masked-off elements to 1s */ \
4766 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
78d90cfe
LZ
4767 continue; \
4768 } \
4769 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4770 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4771 sum++; \
4772 } \
4773 } \
f714361e 4774 env->vstart = 0; \
acc6ffd4 4775 /* set tail elements to 1s */ \
4776 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4777}
4778
3479a814
FC
4779GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4780GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4781GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4782GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4783
4784/* Vector Element Index Instruction */
3479a814 4785#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4786void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4787{ \
126bec3f
LZ
4788 uint32_t vm = vext_vm(desc); \
4789 uint32_t vl = env->vl; \
acc6ffd4 4790 uint32_t esz = sizeof(ETYPE); \
4791 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4792 uint32_t vta = vext_vta(desc); \
35f2d795 4793 uint32_t vma = vext_vma(desc); \
126bec3f
LZ
4794 int i; \
4795 \
df4252b2
DHB
4796 VSTART_CHECK_EARLY_EXIT(env); \
4797 \
f714361e 4798 for (i = env->vstart; i < vl; i++) { \
f9298de5 4799 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4800 /* set masked-off elements to 1s */ \
4801 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
126bec3f
LZ
4802 continue; \
4803 } \
4804 *((ETYPE *)vd + H(i)) = i; \
4805 } \
f714361e 4806 env->vstart = 0; \
acc6ffd4 4807 /* set tail elements to 1s */ \
4808 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4809}
4810
3479a814
FC
4811GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4812GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4813GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4814GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4815
4816/*
3b57254d 4817 * Vector Permutation Instructions
ec17e036
LZ
4818 */
4819
4820/* Vector Slide Instructions */
3479a814 4821#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4822void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4823 CPURISCVState *env, uint32_t desc) \
4824{ \
ec17e036
LZ
4825 uint32_t vm = vext_vm(desc); \
4826 uint32_t vl = env->vl; \
803963f7 4827 uint32_t esz = sizeof(ETYPE); \
4828 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4829 uint32_t vta = vext_vta(desc); \
edabcd0e 4830 uint32_t vma = vext_vma(desc); \
f714361e 4831 target_ulong offset = s1, i_min, i; \
ec17e036 4832 \
df4252b2
DHB
4833 VSTART_CHECK_EARLY_EXIT(env); \
4834 \
f714361e
FC
4835 i_min = MAX(env->vstart, offset); \
4836 for (i = i_min; i < vl; i++) { \
f9298de5 4837 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4838 /* set masked-off elements to 1s */ \
4839 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
ec17e036
LZ
4840 continue; \
4841 } \
4842 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4843 } \
d3646e31 4844 env->vstart = 0; \
803963f7 4845 /* set tail elements to 1s */ \
4846 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4847}
4848
4849/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4850GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4851GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4852GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4853GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4854
3479a814 4855#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4856void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4857 CPURISCVState *env, uint32_t desc) \
4858{ \
6438ed61 4859 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4860 uint32_t vm = vext_vm(desc); \
4861 uint32_t vl = env->vl; \
803963f7 4862 uint32_t esz = sizeof(ETYPE); \
4863 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4864 uint32_t vta = vext_vta(desc); \
edabcd0e 4865 uint32_t vma = vext_vma(desc); \
f3f65c40 4866 target_ulong i_max, i_min, i; \
ec17e036 4867 \
df4252b2
DHB
4868 VSTART_CHECK_EARLY_EXIT(env); \
4869 \
f3f65c40
AF
4870 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
4871 i_max = MAX(i_min, env->vstart); \
f714361e 4872 for (i = env->vstart; i < i_max; ++i) { \
edabcd0e
YTC
4873 if (!vm && !vext_elem_mask(v0, i)) { \
4874 /* set masked-off elements to 1s */ \
4875 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4876 continue; \
6438ed61 4877 } \
edabcd0e 4878 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
6438ed61
FC
4879 } \
4880 \
4881 for (i = i_max; i < vl; ++i) { \
4882 if (vm || vext_elem_mask(v0, i)) { \
4883 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4884 } \
ec17e036 4885 } \
f714361e
FC
4886 \
4887 env->vstart = 0; \
803963f7 4888 /* set tail elements to 1s */ \
4889 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4890}
4891
4892/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4893GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4894GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4895GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4896GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4897
c7b8a421 4898#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
8c89d50c 4899static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
4900 void *vs2, CPURISCVState *env, \
4901 uint32_t desc) \
8500d4ab 4902{ \
c7b8a421 4903 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4904 uint32_t vm = vext_vm(desc); \
4905 uint32_t vl = env->vl; \
803963f7 4906 uint32_t esz = sizeof(ETYPE); \
4907 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4908 uint32_t vta = vext_vta(desc); \
edabcd0e 4909 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
4910 uint32_t i; \
4911 \
df4252b2
DHB
4912 VSTART_CHECK_EARLY_EXIT(env); \
4913 \
f714361e 4914 for (i = env->vstart; i < vl; i++) { \
8500d4ab 4915 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4916 /* set masked-off elements to 1s */ \
4917 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
4918 continue; \
4919 } \
4920 if (i == 0) { \
4921 *((ETYPE *)vd + H(i)) = s1; \
4922 } else { \
4923 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4924 } \
4925 } \
f714361e 4926 env->vstart = 0; \
803963f7 4927 /* set tail elements to 1s */ \
4928 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
4929}
4930
4931GEN_VEXT_VSLIE1UP(8, H1)
4932GEN_VEXT_VSLIE1UP(16, H2)
4933GEN_VEXT_VSLIE1UP(32, H4)
4934GEN_VEXT_VSLIE1UP(64, H8)
4935
c7b8a421 4936#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
4937void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4938 CPURISCVState *env, uint32_t desc) \
4939{ \
c7b8a421 4940 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4941}
4942
4943/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
4944GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4945GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4946GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4947GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4948
c7b8a421 4949#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
8c89d50c 4950static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
4951 void *vs2, CPURISCVState *env, \
4952 uint32_t desc) \
8500d4ab 4953{ \
c7b8a421 4954 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4955 uint32_t vm = vext_vm(desc); \
4956 uint32_t vl = env->vl; \
803963f7 4957 uint32_t esz = sizeof(ETYPE); \
4958 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4959 uint32_t vta = vext_vta(desc); \
edabcd0e 4960 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
4961 uint32_t i; \
4962 \
df4252b2
DHB
4963 VSTART_CHECK_EARLY_EXIT(env); \
4964 \
f714361e 4965 for (i = env->vstart; i < vl; i++) { \
8500d4ab 4966 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4967 /* set masked-off elements to 1s */ \
4968 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
4969 continue; \
4970 } \
4971 if (i == vl - 1) { \
4972 *((ETYPE *)vd + H(i)) = s1; \
4973 } else { \
4974 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4975 } \
4976 } \
f714361e 4977 env->vstart = 0; \
803963f7 4978 /* set tail elements to 1s */ \
4979 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
4980}
4981
4982GEN_VEXT_VSLIDE1DOWN(8, H1)
4983GEN_VEXT_VSLIDE1DOWN(16, H2)
4984GEN_VEXT_VSLIDE1DOWN(32, H4)
4985GEN_VEXT_VSLIDE1DOWN(64, H8)
4986
c7b8a421 4987#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
4988void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4989 CPURISCVState *env, uint32_t desc) \
4990{ \
c7b8a421 4991 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4992}
4993
4994/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
4995GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4996GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4997GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4998GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4999
5000/* Vector Floating-Point Slide Instructions */
c7b8a421 5001#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
5002void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5003 CPURISCVState *env, uint32_t desc) \
5004{ \
c7b8a421 5005 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5006}
5007
5008/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5009GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5010GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5011GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5012
c7b8a421 5013#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
5014void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5015 CPURISCVState *env, uint32_t desc) \
5016{ \
c7b8a421 5017 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5018}
5019
5020/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5021GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5022GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5023GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
5024
5025/* Vector Register Gather Instruction */
50bfb45b 5026#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
5027void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5028 CPURISCVState *env, uint32_t desc) \
5029{ \
f714361e 5030 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
5031 uint32_t vm = vext_vm(desc); \
5032 uint32_t vl = env->vl; \
803963f7 5033 uint32_t esz = sizeof(TS2); \
5034 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5035 uint32_t vta = vext_vta(desc); \
edabcd0e 5036 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5037 uint64_t index; \
5038 uint32_t i; \
e4b83d5c 5039 \
df4252b2
DHB
5040 VSTART_CHECK_EARLY_EXIT(env); \
5041 \
f714361e 5042 for (i = env->vstart; i < vl; i++) { \
f9298de5 5043 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5044 /* set masked-off elements to 1s */ \
5045 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5046 continue; \
5047 } \
50bfb45b 5048 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 5049 if (index >= vlmax) { \
50bfb45b 5050 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 5051 } else { \
50bfb45b 5052 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
5053 } \
5054 } \
f714361e 5055 env->vstart = 0; \
803963f7 5056 /* set tail elements to 1s */ \
5057 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5058}
5059
5060/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
5061GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5062GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5063GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5064GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5065
5066GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5067GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5068GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5069GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5070
3479a814 5071#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5072void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5073 CPURISCVState *env, uint32_t desc) \
5074{ \
5a9f8e15 5075 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5076 uint32_t vm = vext_vm(desc); \
5077 uint32_t vl = env->vl; \
803963f7 5078 uint32_t esz = sizeof(ETYPE); \
5079 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5080 uint32_t vta = vext_vta(desc); \
edabcd0e 5081 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5082 uint64_t index = s1; \
5083 uint32_t i; \
e4b83d5c 5084 \
df4252b2
DHB
5085 VSTART_CHECK_EARLY_EXIT(env); \
5086 \
f714361e 5087 for (i = env->vstart; i < vl; i++) { \
f9298de5 5088 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5089 /* set masked-off elements to 1s */ \
5090 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5091 continue; \
5092 } \
5093 if (index >= vlmax) { \
5094 *((ETYPE *)vd + H(i)) = 0; \
5095 } else { \
5096 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5097 } \
5098 } \
f714361e 5099 env->vstart = 0; \
803963f7 5100 /* set tail elements to 1s */ \
5101 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5102}
5103
5104/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5105GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5106GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5107GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5108GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5109
5110/* Vector Compress Instruction */
3479a814 5111#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5112void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5113 CPURISCVState *env, uint32_t desc) \
5114{ \
31bf42a2 5115 uint32_t vl = env->vl; \
803963f7 5116 uint32_t esz = sizeof(ETYPE); \
5117 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5118 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5119 uint32_t num = 0, i; \
5120 \
f714361e 5121 for (i = env->vstart; i < vl; i++) { \
f9298de5 5122 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5123 continue; \
5124 } \
5125 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5126 num++; \
5127 } \
f714361e 5128 env->vstart = 0; \
803963f7 5129 /* set tail elements to 1s */ \
5130 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5131}
5132
5133/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5134GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5135GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5136GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5137GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5138
f714361e 5139/* Vector Whole Register Move */
f32d82f6
WL
5140void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5141{
f06193c4 5142 /* EEW = SEW */
f32d82f6 5143 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5144 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5145 uint32_t startb = env->vstart * sewb;
5146 uint32_t i = startb;
f32d82f6 5147
7e53e3dd
DHB
5148 if (startb >= maxsz) {
5149 env->vstart = 0;
5150 return;
5151 }
5152
768e7b32
DHB
5153 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5154 uint32_t j = ROUND_UP(i, 8);
5155 memcpy((uint8_t *)vd + H1(j - 1),
5156 (uint8_t *)vs2 + H1(j - 1),
5157 j - i);
5158 i = j;
5159 }
5160
f32d82f6
WL
5161 memcpy((uint8_t *)vd + H1(i),
5162 (uint8_t *)vs2 + H1(i),
768e7b32 5163 maxsz - i);
f714361e 5164
f32d82f6
WL
5165 env->vstart = 0;
5166}
f714361e 5167
cd01340e
FC
5168/* Vector Integer Extension */
5169#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5170void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5171 CPURISCVState *env, uint32_t desc) \
5172{ \
5173 uint32_t vl = env->vl; \
5174 uint32_t vm = vext_vm(desc); \
803963f7 5175 uint32_t esz = sizeof(ETYPE); \
5176 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5177 uint32_t vta = vext_vta(desc); \
edabcd0e 5178 uint32_t vma = vext_vma(desc); \
cd01340e
FC
5179 uint32_t i; \
5180 \
df4252b2
DHB
5181 VSTART_CHECK_EARLY_EXIT(env); \
5182 \
f714361e 5183 for (i = env->vstart; i < vl; i++) { \
cd01340e 5184 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5185 /* set masked-off elements to 1s */ \
5186 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
cd01340e
FC
5187 continue; \
5188 } \
5189 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5190 } \
f714361e 5191 env->vstart = 0; \
803963f7 5192 /* set tail elements to 1s */ \
5193 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5194}
5195
5196GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5197GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5198GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5199GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5200GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5201GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5202
5203GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5204GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5205GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5206GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5207GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5208GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)