]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
exec/cpu: Remove duplicated PAGE_PASSTHROUGH definition
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc 24#include "exec/exec-all.h"
09b07f28 25#include "exec/cpu_ldst.h"
2b7168fc 26#include "exec/helper-proto.h"
ce2a0343 27#include "fpu/softfloat.h"
751538d5
LZ
28#include "tcg/tcg-gvec-desc.h"
29#include "internals.h"
98f40dd2 30#include "vector_internals.h"
2b7168fc
LZ
31#include <math.h>
32
33target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
34 target_ulong s2)
35{
36 int vlmax, vl;
37 RISCVCPU *cpu = env_archcpu(env);
cd21576d
DHB
38 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
39 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
40 uint16_t sew = 8 << vsew;
2b7168fc 41 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
42 int xlen = riscv_cpu_xlen(env);
43 bool vill = (s2 >> (xlen - 1)) & 0x1;
44 target_ulong reserved = s2 &
45 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
46 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
938dd05e 47 uint16_t vlen = cpu->cfg.vlenb << 3;
cd21576d 48 int8_t lmul;
2b7168fc 49
cd21576d 50 if (vlmul & 4) {
7aa4d519
DHB
51 /*
52 * Fractional LMUL, check:
53 *
54 * VLEN * LMUL >= SEW
55 * VLEN >> (8 - lmul) >= sew
56 * (vlenb << 3) >> (8 - lmul) >= sew
7aa4d519 57 */
938dd05e 58 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
d9b7609a
FC
59 vill = true;
60 }
61 }
62
c45eff30 63 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
2b7168fc 64 /* only set vill bit. */
d96a271a
LZ
65 env->vill = 1;
66 env->vtype = 0;
2b7168fc
LZ
67 env->vl = 0;
68 env->vstart = 0;
69 return 0;
70 }
71
cd21576d
DHB
72 /* lmul encoded as in DisasContext::lmul */
73 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
74 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
2b7168fc
LZ
75 if (s1 <= vlmax) {
76 vl = s1;
77 } else {
78 vl = vlmax;
79 }
80 env->vl = vl;
81 env->vtype = s2;
82 env->vstart = 0;
ac6bcf4d 83 env->vill = 0;
2b7168fc
LZ
84 return vl;
85}
751538d5 86
751538d5 87/*
5a9f8e15 88 * Get the maximum number of elements can be operated.
751538d5 89 *
c7b8a421 90 * log2_esz: log2 of element size in bytes.
751538d5 91 */
c7b8a421 92static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 93{
5a9f8e15 94 /*
8a4b5257 95 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
96 * so vlen in bytes (vlenb) is encoded as maxsz.
97 */
98 uint32_t vlenb = simd_maxsz(desc);
99
100 /* Return VLMAX */
c7b8a421 101 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 102 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
103}
104
d6b9d930
LZ
105static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
106{
7b945bdc 107 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
d6b9d930
LZ
108}
109
751538d5
LZ
110/*
111 * This function checks watchpoint before real load operation.
112 *
7893e42d 113 * In system mode, the TLB API probe_access is enough for watchpoint check.
751538d5
LZ
114 * In user mode, there is no watchpoint support now.
115 *
116 * It will trigger an exception if there is no mapping in TLB
117 * and page table walk can't fill the TLB entry. Then the guest
118 * software can return here after process the exception or never return.
119 */
120static void probe_pages(CPURISCVState *env, target_ulong addr,
121 target_ulong len, uintptr_t ra,
122 MMUAccessType access_type)
123{
124 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
125 target_ulong curlen = MIN(pagelen, len);
d9996d09 126 int mmu_index = riscv_env_mmu_index(env, false);
751538d5 127
d6b9d930 128 probe_access(env, adjust_addr(env, addr), curlen, access_type,
d9996d09 129 mmu_index, ra);
751538d5
LZ
130 if (len > curlen) {
131 addr += curlen;
132 curlen = len - curlen;
d6b9d930 133 probe_access(env, adjust_addr(env, addr), curlen, access_type,
d9996d09 134 mmu_index, ra);
751538d5
LZ
135 }
136}
137
f9298de5
FC
138static inline void vext_set_elem_mask(void *v0, int index,
139 uint8_t value)
3a6f8f68 140{
f9298de5
FC
141 int idx = index / 64;
142 int pos = index % 64;
3a6f8f68 143 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 144 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 145}
751538d5 146
751538d5 147/* elements operations for load and store */
022b9bce 148typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
751538d5 149 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 150
79556fb6 151#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
152static void NAME(CPURISCVState *env, abi_ptr addr, \
153 uint32_t idx, void *vd, uintptr_t retaddr)\
154{ \
751538d5 155 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 156 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
157} \
158
79556fb6
FC
159GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
160GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
161GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
162GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
163
164#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
165static void NAME(CPURISCVState *env, abi_ptr addr, \
166 uint32_t idx, void *vd, uintptr_t retaddr)\
167{ \
168 ETYPE data = *((ETYPE *)vd + H(idx)); \
169 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
170}
171
751538d5
LZ
172GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
173GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
174GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
175GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
176
949b6bcb
XW
177static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
178 uint32_t desc, uint32_t nf,
e130683f
DHB
179 uint32_t esz, uint32_t max_elems)
180{
e130683f 181 uint32_t vta = vext_vta(desc);
e130683f
DHB
182 int k;
183
bc0ec52e
DHB
184 if (vta == 0) {
185 return;
186 }
187
e130683f
DHB
188 for (k = 0; k < nf; ++k) {
189 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
190 (k * max_elems + max_elems) * esz);
191 }
e130683f
DHB
192}
193
751538d5 194/*
3b57254d 195 * stride: access vector element from strided memory
751538d5
LZ
196 */
197static void
198vext_ldst_stride(void *vd, void *v0, target_ulong base,
199 target_ulong stride, CPURISCVState *env,
200 uint32_t desc, uint32_t vm,
3479a814 201 vext_ldst_elem_fn *ldst_elem,
c7b8a421 202 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
203{
204 uint32_t i, k;
205 uint32_t nf = vext_nf(desc);
c7b8a421 206 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 207 uint32_t esz = 1 << log2_esz;
265ecd4c 208 uint32_t vma = vext_vma(desc);
751538d5 209
df4252b2
DHB
210 VSTART_CHECK_EARLY_EXIT(env);
211
0a11629c 212 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
751538d5 213 k = 0;
751538d5 214 while (k < nf) {
265ecd4c
YTC
215 if (!vm && !vext_elem_mask(v0, i)) {
216 /* set masked-off elements to 1s */
217 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
218 (i + k * max_elems + 1) * esz);
219 k++;
220 continue;
221 }
c7b8a421 222 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 223 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
224 k++;
225 }
226 }
f714361e 227 env->vstart = 0;
e130683f 228
949b6bcb 229 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
751538d5
LZ
230}
231
79556fb6 232#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
233void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
234 target_ulong stride, CPURISCVState *env, \
235 uint32_t desc) \
236{ \
237 uint32_t vm = vext_vm(desc); \
238 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 239 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
240}
241
79556fb6
FC
242GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
243GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
244GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
245GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
246
247#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
248void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
249 target_ulong stride, CPURISCVState *env, \
250 uint32_t desc) \
251{ \
252 uint32_t vm = vext_vm(desc); \
253 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 254 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
255}
256
79556fb6
FC
257GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
258GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
259GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
260GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
261
262/*
3b57254d 263 * unit-stride: access elements stored contiguously in memory
751538d5
LZ
264 */
265
3b57254d 266/* unmasked unit-stride load and store operation */
751538d5
LZ
267static void
268vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 269 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 270 uintptr_t ra)
751538d5
LZ
271{
272 uint32_t i, k;
273 uint32_t nf = vext_nf(desc);
c7b8a421 274 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 275 uint32_t esz = 1 << log2_esz;
751538d5 276
df4252b2
DHB
277 VSTART_CHECK_EARLY_EXIT(env);
278
751538d5 279 /* load bytes from guest memory */
0a11629c 280 for (i = env->vstart; i < evl; env->vstart = ++i) {
751538d5
LZ
281 k = 0;
282 while (k < nf) {
c7b8a421 283 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 284 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
285 k++;
286 }
287 }
f714361e 288 env->vstart = 0;
e130683f 289
949b6bcb 290 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
751538d5
LZ
291}
292
293/*
246f8796 294 * masked unit-stride load and store operation will be a special case of
fba59e0f 295 * stride, stride = NF * sizeof (ETYPE)
751538d5
LZ
296 */
297
79556fb6 298#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
299void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
300 CPURISCVState *env, uint32_t desc) \
301{ \
5a9f8e15 302 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 303 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 304 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
305} \
306 \
307void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
308 CPURISCVState *env, uint32_t desc) \
309{ \
3479a814 310 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 311 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
312}
313
79556fb6
FC
314GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
315GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
316GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
317GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
318
5c89e9c0
FC
319#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
320void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
321 CPURISCVState *env, uint32_t desc) \
322{ \
323 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
324 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 325 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
326} \
327 \
328void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
329 CPURISCVState *env, uint32_t desc) \
330{ \
331 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 332 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
333}
334
79556fb6
FC
335GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
336GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
337GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
338GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 339
26086aea 340/*
3b57254d 341 * unit stride mask load and store, EEW = 1
26086aea
FC
342 */
343void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
344 CPURISCVState *env, uint32_t desc)
345{
346 /* evl = ceil(vl/8) */
347 uint8_t evl = (env->vl + 7) >> 3;
348 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 349 0, evl, GETPC());
26086aea
FC
350}
351
352void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
353 CPURISCVState *env, uint32_t desc)
354{
355 /* evl = ceil(vl/8) */
356 uint8_t evl = (env->vl + 7) >> 3;
357 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 358 0, evl, GETPC());
26086aea
FC
359}
360
f732560e 361/*
3b57254d 362 * index: access vector element from indexed memory
f732560e
LZ
363 */
364typedef target_ulong vext_get_index_addr(target_ulong base,
365 uint32_t idx, void *vs2);
366
367#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
368static target_ulong NAME(target_ulong base, \
369 uint32_t idx, void *vs2) \
370{ \
371 return (base + *((ETYPE *)vs2 + H(idx))); \
372}
373
83fcd573
FC
374GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
375GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
376GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
377GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
378
379static inline void
380vext_ldst_index(void *vd, void *v0, target_ulong base,
381 void *vs2, CPURISCVState *env, uint32_t desc,
382 vext_get_index_addr get_index_addr,
383 vext_ldst_elem_fn *ldst_elem,
c7b8a421 384 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
385{
386 uint32_t i, k;
387 uint32_t nf = vext_nf(desc);
388 uint32_t vm = vext_vm(desc);
c7b8a421 389 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 390 uint32_t esz = 1 << log2_esz;
265ecd4c 391 uint32_t vma = vext_vma(desc);
f732560e 392
df4252b2
DHB
393 VSTART_CHECK_EARLY_EXIT(env);
394
f732560e 395 /* load bytes from guest memory */
0a11629c 396 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
f714361e 397 k = 0;
f732560e 398 while (k < nf) {
265ecd4c
YTC
399 if (!vm && !vext_elem_mask(v0, i)) {
400 /* set masked-off elements to 1s */
401 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
402 (i + k * max_elems + 1) * esz);
403 k++;
404 continue;
405 }
c7b8a421 406 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 407 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
408 k++;
409 }
410 }
f714361e 411 env->vstart = 0;
e130683f 412
949b6bcb 413 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
f732560e
LZ
414}
415
08b9d0ed 416#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
417void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
418 void *vs2, CPURISCVState *env, uint32_t desc) \
419{ \
420 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 421 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
422}
423
08b9d0ed
FC
424GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
425GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
426GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
427GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
428GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
429GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
430GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
431GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
432GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
433GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
434GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
435GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
436GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
437GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
438GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
439GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
440
441#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
442void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
443 void *vs2, CPURISCVState *env, uint32_t desc) \
444{ \
445 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 446 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 447 GETPC()); \
f732560e
LZ
448}
449
08b9d0ed
FC
450GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
451GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
452GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
453GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
454GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
455GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
456GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
457GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
458GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
459GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
460GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
461GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
462GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
463GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
464GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
465GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
466
467/*
3b57254d 468 * unit-stride fault-only-fisrt load instructions
022b4ecf
LZ
469 */
470static inline void
471vext_ldff(void *vd, void *v0, target_ulong base,
472 CPURISCVState *env, uint32_t desc,
473 vext_ldst_elem_fn *ldst_elem,
c7b8a421 474 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
475{
476 void *host;
477 uint32_t i, k, vl = 0;
022b4ecf
LZ
478 uint32_t nf = vext_nf(desc);
479 uint32_t vm = vext_vm(desc);
c7b8a421 480 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 481 uint32_t esz = 1 << log2_esz;
265ecd4c 482 uint32_t vma = vext_vma(desc);
022b4ecf 483 target_ulong addr, offset, remain;
d9996d09 484 int mmu_index = riscv_env_mmu_index(env, false);
022b4ecf 485
df4252b2
DHB
486 VSTART_CHECK_EARLY_EXIT(env);
487
3b57254d 488 /* probe every access */
f714361e 489 for (i = env->vstart; i < env->vl; i++) {
f9298de5 490 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
491 continue;
492 }
c7b8a421 493 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 494 if (i == 0) {
c7b8a421 495 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
496 } else {
497 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 498 remain = nf << log2_esz;
022b4ecf
LZ
499 while (remain > 0) {
500 offset = -(addr | TARGET_PAGE_MASK);
d9996d09 501 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
022b4ecf
LZ
502 if (host) {
503#ifdef CONFIG_USER_ONLY
4cc9f284 504 if (!page_check_range(addr, offset, PAGE_READ)) {
022b4ecf
LZ
505 vl = i;
506 goto ProbeSuccess;
507 }
508#else
01d09525 509 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
510#endif
511 } else {
512 vl = i;
513 goto ProbeSuccess;
514 }
515 if (remain <= offset) {
516 break;
517 }
518 remain -= offset;
d6b9d930 519 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
520 }
521 }
522 }
523ProbeSuccess:
524 /* load bytes from guest memory */
525 if (vl != 0) {
526 env->vl = vl;
527 }
f714361e 528 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 529 k = 0;
022b4ecf 530 while (k < nf) {
265ecd4c
YTC
531 if (!vm && !vext_elem_mask(v0, i)) {
532 /* set masked-off elements to 1s */
533 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
534 (i + k * max_elems + 1) * esz);
535 k++;
536 continue;
537 }
f3f65c40 538 addr = base + ((i * nf + k) << log2_esz);
d6b9d930 539 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
540 k++;
541 }
542 }
f714361e 543 env->vstart = 0;
e130683f 544
949b6bcb 545 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
022b4ecf
LZ
546}
547
d3e5e2ff
FC
548#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
549void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
550 CPURISCVState *env, uint32_t desc) \
551{ \
552 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 553 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
554}
555
d3e5e2ff
FC
556GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
557GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
558GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
559GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 560
268fcca6
LZ
561#define DO_SWAP(N, M) (M)
562#define DO_AND(N, M) (N & M)
563#define DO_XOR(N, M) (N ^ M)
564#define DO_OR(N, M) (N | M)
565#define DO_ADD(N, M) (N + M)
566
268fcca6
LZ
567/* Signed min/max */
568#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
569#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
570
30206bd8 571/*
3b57254d 572 * load and store whole register instructions
30206bd8
FC
573 */
574static void
575vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 576 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 577{
f714361e 578 uint32_t i, k, off, pos;
30206bd8 579 uint32_t nf = vext_nf(desc);
58bc9063 580 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
c7b8a421 581 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 582
929e521a
DHB
583 if (env->vstart >= ((vlenb * nf) >> log2_esz)) {
584 env->vstart = 0;
585 return;
586 }
587
f714361e
FC
588 k = env->vstart / max_elems;
589 off = env->vstart % max_elems;
30206bd8 590
f714361e
FC
591 if (off) {
592 /* load/store rest of elements of current segment pointed by vstart */
593 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 594 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
246f8796
WL
595 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd,
596 ra);
f714361e
FC
597 }
598 k++;
599 }
600
601 /* load/store elements for rest of segments */
602 for (; k < nf; k++) {
603 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 604 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 605 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
606 }
607 }
f714361e
FC
608
609 env->vstart = 0;
30206bd8
FC
610}
611
612#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
613void HELPER(NAME)(void *vd, target_ulong base, \
614 CPURISCVState *env, uint32_t desc) \
615{ \
616 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 617 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
618}
619
620GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
621GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
622GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
623GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
624GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
625GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
626GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
627GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
628GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
629GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
630GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
631GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
632GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
633GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
634GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
635GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
636
637#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
638void HELPER(NAME)(void *vd, target_ulong base, \
639 CPURISCVState *env, uint32_t desc) \
640{ \
641 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 642 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
643}
644
645GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
646GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
647GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
648GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
649
43740e3a 650/*
3b57254d 651 * Vector Integer Arithmetic Instructions
43740e3a
LZ
652 */
653
43740e3a
LZ
654/* (TD, T1, T2, TX1, TX2) */
655#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
656#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
657#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
658#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
958b85f3
LZ
659#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
660#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
661#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
662#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
663#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
664#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
665#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
666#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
667#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
668#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
669#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
670#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
671#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
672#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
673#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
674#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
675#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
676#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
677#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a 678
43740e3a
LZ
679#define DO_SUB(N, M) (N - M)
680#define DO_RSUB(N, M) (M - N)
681
682RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
683RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
684RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
685RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
686RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
687RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
688RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
689RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
690
f1eed927 691GEN_VEXT_VV(vadd_vv_b, 1)
692GEN_VEXT_VV(vadd_vv_h, 2)
693GEN_VEXT_VV(vadd_vv_w, 4)
694GEN_VEXT_VV(vadd_vv_d, 8)
695GEN_VEXT_VV(vsub_vv_b, 1)
696GEN_VEXT_VV(vsub_vv_h, 2)
697GEN_VEXT_VV(vsub_vv_w, 4)
698GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a 699
43740e3a
LZ
700
701RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
702RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
703RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
704RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
705RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
706RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
707RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
708RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
709RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
710RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
711RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
712RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
713
5c19fc15 714GEN_VEXT_VX(vadd_vx_b, 1)
715GEN_VEXT_VX(vadd_vx_h, 2)
716GEN_VEXT_VX(vadd_vx_w, 4)
717GEN_VEXT_VX(vadd_vx_d, 8)
718GEN_VEXT_VX(vsub_vx_b, 1)
719GEN_VEXT_VX(vsub_vx_h, 2)
720GEN_VEXT_VX(vsub_vx_w, 4)
721GEN_VEXT_VX(vsub_vx_d, 8)
722GEN_VEXT_VX(vrsub_vx_b, 1)
723GEN_VEXT_VX(vrsub_vx_h, 2)
724GEN_VEXT_VX(vrsub_vx_w, 4)
725GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
726
727void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
728{
729 intptr_t oprsz = simd_oprsz(desc);
730 intptr_t i;
731
732 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
733 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
734 }
735}
736
737void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
738{
739 intptr_t oprsz = simd_oprsz(desc);
740 intptr_t i;
741
742 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
743 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
744 }
745}
746
747void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
748{
749 intptr_t oprsz = simd_oprsz(desc);
750 intptr_t i;
751
752 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
753 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
754 }
755}
756
757void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
758{
759 intptr_t oprsz = simd_oprsz(desc);
760 intptr_t i;
761
762 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
763 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
764 }
765}
8fcdf776
LZ
766
767/* Vector Widening Integer Add/Subtract */
768#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
769#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
770#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
771#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
772#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
773#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
774#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
775#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
776#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
777#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
778#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
779#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
780RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
781RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
782RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
783RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
784RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
785RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
786RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
787RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
788RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
789RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
790RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
791RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
792RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
793RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
794RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
795RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
796RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
797RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
798RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
799RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
800RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
801RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
802RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
803RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 804GEN_VEXT_VV(vwaddu_vv_b, 2)
805GEN_VEXT_VV(vwaddu_vv_h, 4)
806GEN_VEXT_VV(vwaddu_vv_w, 8)
807GEN_VEXT_VV(vwsubu_vv_b, 2)
808GEN_VEXT_VV(vwsubu_vv_h, 4)
809GEN_VEXT_VV(vwsubu_vv_w, 8)
810GEN_VEXT_VV(vwadd_vv_b, 2)
811GEN_VEXT_VV(vwadd_vv_h, 4)
812GEN_VEXT_VV(vwadd_vv_w, 8)
813GEN_VEXT_VV(vwsub_vv_b, 2)
814GEN_VEXT_VV(vwsub_vv_h, 4)
815GEN_VEXT_VV(vwsub_vv_w, 8)
816GEN_VEXT_VV(vwaddu_wv_b, 2)
817GEN_VEXT_VV(vwaddu_wv_h, 4)
818GEN_VEXT_VV(vwaddu_wv_w, 8)
819GEN_VEXT_VV(vwsubu_wv_b, 2)
820GEN_VEXT_VV(vwsubu_wv_h, 4)
821GEN_VEXT_VV(vwsubu_wv_w, 8)
822GEN_VEXT_VV(vwadd_wv_b, 2)
823GEN_VEXT_VV(vwadd_wv_h, 4)
824GEN_VEXT_VV(vwadd_wv_w, 8)
825GEN_VEXT_VV(vwsub_wv_b, 2)
826GEN_VEXT_VV(vwsub_wv_h, 4)
827GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
828
829RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
830RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
831RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
832RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
833RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
834RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
835RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
836RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
837RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
838RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
839RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
840RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
841RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
842RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
843RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
844RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
845RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
846RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
847RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
848RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
849RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
850RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
851RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
852RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 853GEN_VEXT_VX(vwaddu_vx_b, 2)
854GEN_VEXT_VX(vwaddu_vx_h, 4)
855GEN_VEXT_VX(vwaddu_vx_w, 8)
856GEN_VEXT_VX(vwsubu_vx_b, 2)
857GEN_VEXT_VX(vwsubu_vx_h, 4)
858GEN_VEXT_VX(vwsubu_vx_w, 8)
859GEN_VEXT_VX(vwadd_vx_b, 2)
860GEN_VEXT_VX(vwadd_vx_h, 4)
861GEN_VEXT_VX(vwadd_vx_w, 8)
862GEN_VEXT_VX(vwsub_vx_b, 2)
863GEN_VEXT_VX(vwsub_vx_h, 4)
864GEN_VEXT_VX(vwsub_vx_w, 8)
865GEN_VEXT_VX(vwaddu_wx_b, 2)
866GEN_VEXT_VX(vwaddu_wx_h, 4)
867GEN_VEXT_VX(vwaddu_wx_w, 8)
868GEN_VEXT_VX(vwsubu_wx_b, 2)
869GEN_VEXT_VX(vwsubu_wx_h, 4)
870GEN_VEXT_VX(vwsubu_wx_w, 8)
871GEN_VEXT_VX(vwadd_wx_b, 2)
872GEN_VEXT_VX(vwadd_wx_h, 4)
873GEN_VEXT_VX(vwadd_wx_w, 8)
874GEN_VEXT_VX(vwsub_wx_b, 2)
875GEN_VEXT_VX(vwsub_wx_h, 4)
876GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
877
878/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
879#define DO_VADC(N, M, C) (N + M + C)
880#define DO_VSBC(N, M, C) (N - M - C)
881
3479a814 882#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
883void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
884 CPURISCVState *env, uint32_t desc) \
885{ \
3a6f8f68 886 uint32_t vl = env->vl; \
5c19fc15 887 uint32_t esz = sizeof(ETYPE); \
888 uint32_t total_elems = \
889 vext_get_total_elems(env, desc, esz); \
890 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
891 uint32_t i; \
892 \
df4252b2
DHB
893 VSTART_CHECK_EARLY_EXIT(env); \
894 \
f714361e 895 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
896 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
897 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 898 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
899 \
900 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
901 } \
f714361e 902 env->vstart = 0; \
5c19fc15 903 /* set tail elements to 1s */ \
904 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
905}
906
3479a814
FC
907GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
908GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
909GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
910GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 911
3479a814
FC
912GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
913GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
914GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
915GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 916
3479a814 917#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
918void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
919 CPURISCVState *env, uint32_t desc) \
920{ \
3a6f8f68 921 uint32_t vl = env->vl; \
5c19fc15 922 uint32_t esz = sizeof(ETYPE); \
923 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
924 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
925 uint32_t i; \
926 \
df4252b2
DHB
927 VSTART_CHECK_EARLY_EXIT(env); \
928 \
f714361e 929 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 930 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 931 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
932 \
933 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
934 } \
c45eff30 935 env->vstart = 0; \
5c19fc15 936 /* set tail elements to 1s */ \
937 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
938}
939
3479a814
FC
940GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
941GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
942GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
943GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 944
3479a814
FC
945GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
946GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
947GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
948GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
949
950#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
951 (__typeof(N))(N + M) < N)
952#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
953
954#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
955void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
956 CPURISCVState *env, uint32_t desc) \
957{ \
3a6f8f68 958 uint32_t vl = env->vl; \
bb45485a 959 uint32_t vm = vext_vm(desc); \
58bc9063 960 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5c19fc15 961 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
962 uint32_t i; \
963 \
df4252b2
DHB
964 VSTART_CHECK_EARLY_EXIT(env); \
965 \
f714361e 966 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
967 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
968 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 969 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 970 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 971 } \
f714361e 972 env->vstart = 0; \
3b57254d
WL
973 /*
974 * mask destination register are always tail-agnostic
975 * set tail elements to 1s
976 */ \
5c19fc15 977 if (vta_all_1s) { \
978 for (; i < total_elems; i++) { \
979 vext_set_elem_mask(vd, i, 1); \
980 } \
981 } \
3a6f8f68
LZ
982}
983
984GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
985GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
986GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
987GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
988
989GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
990GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
991GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
992GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
993
994#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
995void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
996 void *vs2, CPURISCVState *env, uint32_t desc) \
997{ \
3a6f8f68 998 uint32_t vl = env->vl; \
bb45485a 999 uint32_t vm = vext_vm(desc); \
58bc9063 1000 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5c19fc15 1001 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1002 uint32_t i; \
1003 \
df4252b2
DHB
1004 VSTART_CHECK_EARLY_EXIT(env); \
1005 \
f714361e 1006 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1007 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1008 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1009 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1010 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1011 } \
f714361e 1012 env->vstart = 0; \
3b57254d
WL
1013 /*
1014 * mask destination register are always tail-agnostic
1015 * set tail elements to 1s
1016 */ \
5c19fc15 1017 if (vta_all_1s) { \
1018 for (; i < total_elems; i++) { \
1019 vext_set_elem_mask(vd, i, 1); \
1020 } \
1021 } \
3a6f8f68
LZ
1022}
1023
1024GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1025GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1026GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1027GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1028
1029GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1030GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1031GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1032GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1033
1034/* Vector Bitwise Logical Instructions */
1035RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1036RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1037RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1038RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1039RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1040RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1041RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1042RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1043RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1044RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1045RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1046RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1047GEN_VEXT_VV(vand_vv_b, 1)
1048GEN_VEXT_VV(vand_vv_h, 2)
1049GEN_VEXT_VV(vand_vv_w, 4)
1050GEN_VEXT_VV(vand_vv_d, 8)
1051GEN_VEXT_VV(vor_vv_b, 1)
1052GEN_VEXT_VV(vor_vv_h, 2)
1053GEN_VEXT_VV(vor_vv_w, 4)
1054GEN_VEXT_VV(vor_vv_d, 8)
1055GEN_VEXT_VV(vxor_vv_b, 1)
1056GEN_VEXT_VV(vxor_vv_h, 2)
1057GEN_VEXT_VV(vxor_vv_w, 4)
1058GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1059
1060RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1061RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1062RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1063RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1064RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1065RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1066RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1067RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1068RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1069RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1070RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1071RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1072GEN_VEXT_VX(vand_vx_b, 1)
1073GEN_VEXT_VX(vand_vx_h, 2)
1074GEN_VEXT_VX(vand_vx_w, 4)
1075GEN_VEXT_VX(vand_vx_d, 8)
1076GEN_VEXT_VX(vor_vx_b, 1)
1077GEN_VEXT_VX(vor_vx_h, 2)
1078GEN_VEXT_VX(vor_vx_w, 4)
1079GEN_VEXT_VX(vor_vx_d, 8)
1080GEN_VEXT_VX(vxor_vx_b, 1)
1081GEN_VEXT_VX(vxor_vx_h, 2)
1082GEN_VEXT_VX(vxor_vx_w, 4)
1083GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1084
1085/* Vector Single-Width Bit Shift Instructions */
1086#define DO_SLL(N, M) (N << (M))
1087#define DO_SRL(N, M) (N >> (M))
1088
1089/* generate the helpers for shift instructions with two vector operators */
3479a814 1090#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1091void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1092 void *vs2, CPURISCVState *env, uint32_t desc) \
1093{ \
3277d955
LZ
1094 uint32_t vm = vext_vm(desc); \
1095 uint32_t vl = env->vl; \
7b1bff41 1096 uint32_t esz = sizeof(TS1); \
1097 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1098 uint32_t vta = vext_vta(desc); \
fd93045e 1099 uint32_t vma = vext_vma(desc); \
3277d955
LZ
1100 uint32_t i; \
1101 \
df4252b2
DHB
1102 VSTART_CHECK_EARLY_EXIT(env); \
1103 \
f714361e 1104 for (i = env->vstart; i < vl; i++) { \
f9298de5 1105 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1106 /* set masked-off elements to 1s */ \
1107 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
3277d955
LZ
1108 continue; \
1109 } \
1110 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1111 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1112 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1113 } \
f714361e 1114 env->vstart = 0; \
7b1bff41 1115 /* set tail elements to 1s */ \
1116 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1117}
1118
3479a814
FC
1119GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1120GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1121GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1122GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1123
3479a814
FC
1124GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1125GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1126GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1127GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1128
3479a814
FC
1129GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1130GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1131GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1132GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1133
246f8796
WL
1134/*
1135 * generate the helpers for shift instructions with one vector and one scalar
1136 */
3479a814
FC
1137#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1138void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
1139 void *vs2, CPURISCVState *env, \
1140 uint32_t desc) \
3479a814
FC
1141{ \
1142 uint32_t vm = vext_vm(desc); \
1143 uint32_t vl = env->vl; \
7b1bff41 1144 uint32_t esz = sizeof(TD); \
1145 uint32_t total_elems = \
1146 vext_get_total_elems(env, desc, esz); \
1147 uint32_t vta = vext_vta(desc); \
fd93045e 1148 uint32_t vma = vext_vma(desc); \
3479a814
FC
1149 uint32_t i; \
1150 \
df4252b2
DHB
1151 VSTART_CHECK_EARLY_EXIT(env); \
1152 \
f714361e 1153 for (i = env->vstart; i < vl; i++) { \
3479a814 1154 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1155 /* set masked-off elements to 1s */ \
1156 vext_set_elems_1s(vd, vma, i * esz, \
1157 (i + 1) * esz); \
3479a814
FC
1158 continue; \
1159 } \
1160 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1161 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1162 } \
f714361e 1163 env->vstart = 0; \
7b1bff41 1164 /* set tail elements to 1s */ \
1165 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1166}
1167
1168GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1169GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1170GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1171GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1172
1173GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1174GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1175GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1176GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1177
1178GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1179GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1180GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1181GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1182
1183/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1184GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1185GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1186GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1187GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1188GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1189GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1190GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1191GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1192GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1193GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1194GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1195GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1196
1197/* Vector Integer Comparison Instructions */
1198#define DO_MSEQ(N, M) (N == M)
1199#define DO_MSNE(N, M) (N != M)
1200#define DO_MSLT(N, M) (N < M)
1201#define DO_MSLE(N, M) (N <= M)
1202#define DO_MSGT(N, M) (N > M)
1203
1204#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1205void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1206 CPURISCVState *env, uint32_t desc) \
1207{ \
1366fc79
LZ
1208 uint32_t vm = vext_vm(desc); \
1209 uint32_t vl = env->vl; \
58bc9063 1210 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
38581e5c 1211 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1212 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1213 uint32_t i; \
1214 \
df4252b2
DHB
1215 VSTART_CHECK_EARLY_EXIT(env); \
1216 \
f714361e 1217 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1218 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1219 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1220 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1221 /* set masked-off elements to 1s */ \
1222 if (vma) { \
1223 vext_set_elem_mask(vd, i, 1); \
1224 } \
1366fc79
LZ
1225 continue; \
1226 } \
f9298de5 1227 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1228 } \
f714361e 1229 env->vstart = 0; \
3b57254d
WL
1230 /*
1231 * mask destination register are always tail-agnostic
1232 * set tail elements to 1s
1233 */ \
38581e5c 1234 if (vta_all_1s) { \
1235 for (; i < total_elems; i++) { \
1236 vext_set_elem_mask(vd, i, 1); \
1237 } \
1238 } \
1366fc79
LZ
1239}
1240
1241GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1242GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1243GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1244GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1245
1246GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1247GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1248GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1249GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1250
1251GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1252GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1253GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1254GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1255
1256GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1257GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1258GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1259GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1260
1261GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1262GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1263GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1264GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1265
1266GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1267GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1268GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1269GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1270
1271#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1272void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1273 CPURISCVState *env, uint32_t desc) \
1274{ \
1366fc79
LZ
1275 uint32_t vm = vext_vm(desc); \
1276 uint32_t vl = env->vl; \
58bc9063 1277 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
38581e5c 1278 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1279 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1280 uint32_t i; \
1281 \
df4252b2
DHB
1282 VSTART_CHECK_EARLY_EXIT(env); \
1283 \
f714361e 1284 for (i = env->vstart; i < vl; i++) { \
1366fc79 1285 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1286 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1287 /* set masked-off elements to 1s */ \
1288 if (vma) { \
1289 vext_set_elem_mask(vd, i, 1); \
1290 } \
1366fc79
LZ
1291 continue; \
1292 } \
f9298de5 1293 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1294 DO_OP(s2, (ETYPE)(target_long)s1)); \
1295 } \
f714361e 1296 env->vstart = 0; \
3b57254d
WL
1297 /*
1298 * mask destination register are always tail-agnostic
1299 * set tail elements to 1s
1300 */ \
38581e5c 1301 if (vta_all_1s) { \
1302 for (; i < total_elems; i++) { \
1303 vext_set_elem_mask(vd, i, 1); \
1304 } \
1305 } \
1366fc79
LZ
1306}
1307
1308GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1309GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1310GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1311GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1312
1313GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1314GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1315GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1316GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1317
1318GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1319GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1320GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1321GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1322
1323GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1324GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1325GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1326GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1327
1328GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1329GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1330GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1331GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1332
1333GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1334GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1335GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1336GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1337
1338GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1339GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1340GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1341GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1342
1343GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1344GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1345GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1346GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1347
1348/* Vector Integer Min/Max Instructions */
1349RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1350RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1351RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1352RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1353RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1354RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1355RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1356RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1357RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1358RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1359RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1360RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1361RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1362RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1363RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1364RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1365GEN_VEXT_VV(vminu_vv_b, 1)
1366GEN_VEXT_VV(vminu_vv_h, 2)
1367GEN_VEXT_VV(vminu_vv_w, 4)
1368GEN_VEXT_VV(vminu_vv_d, 8)
1369GEN_VEXT_VV(vmin_vv_b, 1)
1370GEN_VEXT_VV(vmin_vv_h, 2)
1371GEN_VEXT_VV(vmin_vv_w, 4)
1372GEN_VEXT_VV(vmin_vv_d, 8)
1373GEN_VEXT_VV(vmaxu_vv_b, 1)
1374GEN_VEXT_VV(vmaxu_vv_h, 2)
1375GEN_VEXT_VV(vmaxu_vv_w, 4)
1376GEN_VEXT_VV(vmaxu_vv_d, 8)
1377GEN_VEXT_VV(vmax_vv_b, 1)
1378GEN_VEXT_VV(vmax_vv_h, 2)
1379GEN_VEXT_VV(vmax_vv_w, 4)
1380GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1381
1382RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1383RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1384RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1385RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1386RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1387RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1388RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1389RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1390RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1391RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1392RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1393RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1394RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1395RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1396RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1397RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1398GEN_VEXT_VX(vminu_vx_b, 1)
1399GEN_VEXT_VX(vminu_vx_h, 2)
1400GEN_VEXT_VX(vminu_vx_w, 4)
1401GEN_VEXT_VX(vminu_vx_d, 8)
1402GEN_VEXT_VX(vmin_vx_b, 1)
1403GEN_VEXT_VX(vmin_vx_h, 2)
1404GEN_VEXT_VX(vmin_vx_w, 4)
1405GEN_VEXT_VX(vmin_vx_d, 8)
1406GEN_VEXT_VX(vmaxu_vx_b, 1)
1407GEN_VEXT_VX(vmaxu_vx_h, 2)
1408GEN_VEXT_VX(vmaxu_vx_w, 4)
1409GEN_VEXT_VX(vmaxu_vx_d, 8)
1410GEN_VEXT_VX(vmax_vx_b, 1)
1411GEN_VEXT_VX(vmax_vx_h, 2)
1412GEN_VEXT_VX(vmax_vx_w, 4)
1413GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1414
1415/* Vector Single-Width Integer Multiply Instructions */
1416#define DO_MUL(N, M) (N * M)
1417RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1418RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1419RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1420RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1421GEN_VEXT_VV(vmul_vv_b, 1)
1422GEN_VEXT_VV(vmul_vv_h, 2)
1423GEN_VEXT_VV(vmul_vv_w, 4)
1424GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1425
1426static int8_t do_mulh_b(int8_t s2, int8_t s1)
1427{
1428 return (int16_t)s2 * (int16_t)s1 >> 8;
1429}
1430
1431static int16_t do_mulh_h(int16_t s2, int16_t s1)
1432{
1433 return (int32_t)s2 * (int32_t)s1 >> 16;
1434}
1435
1436static int32_t do_mulh_w(int32_t s2, int32_t s1)
1437{
1438 return (int64_t)s2 * (int64_t)s1 >> 32;
1439}
1440
1441static int64_t do_mulh_d(int64_t s2, int64_t s1)
1442{
1443 uint64_t hi_64, lo_64;
1444
1445 muls64(&lo_64, &hi_64, s1, s2);
1446 return hi_64;
1447}
1448
1449static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1450{
1451 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1452}
1453
1454static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1455{
1456 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1457}
1458
1459static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1460{
1461 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1462}
1463
1464static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1465{
1466 uint64_t hi_64, lo_64;
1467
1468 mulu64(&lo_64, &hi_64, s2, s1);
1469 return hi_64;
1470}
1471
1472static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1473{
1474 return (int16_t)s2 * (uint16_t)s1 >> 8;
1475}
1476
1477static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1478{
1479 return (int32_t)s2 * (uint32_t)s1 >> 16;
1480}
1481
1482static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1483{
1484 return (int64_t)s2 * (uint64_t)s1 >> 32;
1485}
1486
1487/*
1488 * Let A = signed operand,
1489 * B = unsigned operand
1490 * P = mulu64(A, B), unsigned product
1491 *
1492 * LET X = 2 ** 64 - A, 2's complement of A
1493 * SP = signed product
1494 * THEN
1495 * IF A < 0
1496 * SP = -X * B
1497 * = -(2 ** 64 - A) * B
1498 * = A * B - 2 ** 64 * B
1499 * = P - 2 ** 64 * B
1500 * ELSE
1501 * SP = P
1502 * THEN
1503 * HI_P -= (A < 0 ? B : 0)
1504 */
1505
1506static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1507{
1508 uint64_t hi_64, lo_64;
1509
1510 mulu64(&lo_64, &hi_64, s2, s1);
1511
1512 hi_64 -= s2 < 0 ? s1 : 0;
1513 return hi_64;
1514}
1515
1516RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1517RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1518RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1519RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1520RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1521RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1522RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1523RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1524RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1525RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1526RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1527RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1528GEN_VEXT_VV(vmulh_vv_b, 1)
1529GEN_VEXT_VV(vmulh_vv_h, 2)
1530GEN_VEXT_VV(vmulh_vv_w, 4)
1531GEN_VEXT_VV(vmulh_vv_d, 8)
1532GEN_VEXT_VV(vmulhu_vv_b, 1)
1533GEN_VEXT_VV(vmulhu_vv_h, 2)
1534GEN_VEXT_VV(vmulhu_vv_w, 4)
1535GEN_VEXT_VV(vmulhu_vv_d, 8)
1536GEN_VEXT_VV(vmulhsu_vv_b, 1)
1537GEN_VEXT_VV(vmulhsu_vv_h, 2)
1538GEN_VEXT_VV(vmulhsu_vv_w, 4)
1539GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1540
1541RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1542RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1543RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1544RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1545RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1546RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1547RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1548RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1549RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1550RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1551RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1552RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1553RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1554RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1555RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1556RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1557GEN_VEXT_VX(vmul_vx_b, 1)
1558GEN_VEXT_VX(vmul_vx_h, 2)
1559GEN_VEXT_VX(vmul_vx_w, 4)
1560GEN_VEXT_VX(vmul_vx_d, 8)
1561GEN_VEXT_VX(vmulh_vx_b, 1)
1562GEN_VEXT_VX(vmulh_vx_h, 2)
1563GEN_VEXT_VX(vmulh_vx_w, 4)
1564GEN_VEXT_VX(vmulh_vx_d, 8)
1565GEN_VEXT_VX(vmulhu_vx_b, 1)
1566GEN_VEXT_VX(vmulhu_vx_h, 2)
1567GEN_VEXT_VX(vmulhu_vx_w, 4)
1568GEN_VEXT_VX(vmulhu_vx_d, 8)
1569GEN_VEXT_VX(vmulhsu_vx_b, 1)
1570GEN_VEXT_VX(vmulhsu_vx_h, 2)
1571GEN_VEXT_VX(vmulhsu_vx_w, 4)
1572GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1573
1574/* Vector Integer Divide Instructions */
1575#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1576#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
c45eff30 1577#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
85e6658c 1578 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
c45eff30 1579#define DO_REM(N, M) (unlikely(M == 0) ? N : \
85e6658c
LZ
1580 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1581
1582RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1583RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1584RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1585RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1586RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1587RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1588RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1589RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1590RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1591RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1592RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1593RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1594RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1595RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1596RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1597RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1598GEN_VEXT_VV(vdivu_vv_b, 1)
1599GEN_VEXT_VV(vdivu_vv_h, 2)
1600GEN_VEXT_VV(vdivu_vv_w, 4)
1601GEN_VEXT_VV(vdivu_vv_d, 8)
1602GEN_VEXT_VV(vdiv_vv_b, 1)
1603GEN_VEXT_VV(vdiv_vv_h, 2)
1604GEN_VEXT_VV(vdiv_vv_w, 4)
1605GEN_VEXT_VV(vdiv_vv_d, 8)
1606GEN_VEXT_VV(vremu_vv_b, 1)
1607GEN_VEXT_VV(vremu_vv_h, 2)
1608GEN_VEXT_VV(vremu_vv_w, 4)
1609GEN_VEXT_VV(vremu_vv_d, 8)
1610GEN_VEXT_VV(vrem_vv_b, 1)
1611GEN_VEXT_VV(vrem_vv_h, 2)
1612GEN_VEXT_VV(vrem_vv_w, 4)
1613GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1614
1615RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1616RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1617RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1618RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1619RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1620RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1621RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1622RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1623RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1624RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1625RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1626RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1627RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1628RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1629RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1630RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1631GEN_VEXT_VX(vdivu_vx_b, 1)
1632GEN_VEXT_VX(vdivu_vx_h, 2)
1633GEN_VEXT_VX(vdivu_vx_w, 4)
1634GEN_VEXT_VX(vdivu_vx_d, 8)
1635GEN_VEXT_VX(vdiv_vx_b, 1)
1636GEN_VEXT_VX(vdiv_vx_h, 2)
1637GEN_VEXT_VX(vdiv_vx_w, 4)
1638GEN_VEXT_VX(vdiv_vx_d, 8)
1639GEN_VEXT_VX(vremu_vx_b, 1)
1640GEN_VEXT_VX(vremu_vx_h, 2)
1641GEN_VEXT_VX(vremu_vx_w, 4)
1642GEN_VEXT_VX(vremu_vx_d, 8)
1643GEN_VEXT_VX(vrem_vx_b, 1)
1644GEN_VEXT_VX(vrem_vx_h, 2)
1645GEN_VEXT_VX(vrem_vx_w, 4)
1646GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1647
1648/* Vector Widening Integer Multiply Instructions */
1649RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1650RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1651RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1652RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1653RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1654RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1655RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1656RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1657RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1658GEN_VEXT_VV(vwmul_vv_b, 2)
1659GEN_VEXT_VV(vwmul_vv_h, 4)
1660GEN_VEXT_VV(vwmul_vv_w, 8)
1661GEN_VEXT_VV(vwmulu_vv_b, 2)
1662GEN_VEXT_VV(vwmulu_vv_h, 4)
1663GEN_VEXT_VV(vwmulu_vv_w, 8)
1664GEN_VEXT_VV(vwmulsu_vv_b, 2)
1665GEN_VEXT_VV(vwmulsu_vv_h, 4)
1666GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1667
1668RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1669RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1670RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1671RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1672RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1673RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1674RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1675RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1676RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1677GEN_VEXT_VX(vwmul_vx_b, 2)
1678GEN_VEXT_VX(vwmul_vx_h, 4)
1679GEN_VEXT_VX(vwmul_vx_w, 8)
1680GEN_VEXT_VX(vwmulu_vx_b, 2)
1681GEN_VEXT_VX(vwmulu_vx_h, 4)
1682GEN_VEXT_VX(vwmulu_vx_w, 8)
1683GEN_VEXT_VX(vwmulsu_vx_b, 2)
1684GEN_VEXT_VX(vwmulsu_vx_h, 4)
1685GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1686
1687/* Vector Single-Width Integer Multiply-Add Instructions */
c45eff30 1688#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
54df813a
LZ
1689static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1690{ \
1691 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1692 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1693 TD d = *((TD *)vd + HD(i)); \
1694 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1695}
1696
1697#define DO_MACC(N, M, D) (M * N + D)
1698#define DO_NMSAC(N, M, D) (-(M * N) + D)
1699#define DO_MADD(N, M, D) (M * D + N)
1700#define DO_NMSUB(N, M, D) (-(M * D) + N)
1701RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1702RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1703RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1704RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1705RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1706RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1707RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1708RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1709RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1710RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1711RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1712RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1713RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1714RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1715RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1716RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1717GEN_VEXT_VV(vmacc_vv_b, 1)
1718GEN_VEXT_VV(vmacc_vv_h, 2)
1719GEN_VEXT_VV(vmacc_vv_w, 4)
1720GEN_VEXT_VV(vmacc_vv_d, 8)
1721GEN_VEXT_VV(vnmsac_vv_b, 1)
1722GEN_VEXT_VV(vnmsac_vv_h, 2)
1723GEN_VEXT_VV(vnmsac_vv_w, 4)
1724GEN_VEXT_VV(vnmsac_vv_d, 8)
1725GEN_VEXT_VV(vmadd_vv_b, 1)
1726GEN_VEXT_VV(vmadd_vv_h, 2)
1727GEN_VEXT_VV(vmadd_vv_w, 4)
1728GEN_VEXT_VV(vmadd_vv_d, 8)
1729GEN_VEXT_VV(vnmsub_vv_b, 1)
1730GEN_VEXT_VV(vnmsub_vv_h, 2)
1731GEN_VEXT_VV(vnmsub_vv_w, 4)
1732GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1733
1734#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1735static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1736{ \
1737 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1738 TD d = *((TD *)vd + HD(i)); \
1739 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1740}
1741
1742RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1743RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1744RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1745RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1746RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1747RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1748RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1749RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1750RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1751RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1752RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1753RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1754RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1755RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1756RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1757RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1758GEN_VEXT_VX(vmacc_vx_b, 1)
1759GEN_VEXT_VX(vmacc_vx_h, 2)
1760GEN_VEXT_VX(vmacc_vx_w, 4)
1761GEN_VEXT_VX(vmacc_vx_d, 8)
1762GEN_VEXT_VX(vnmsac_vx_b, 1)
1763GEN_VEXT_VX(vnmsac_vx_h, 2)
1764GEN_VEXT_VX(vnmsac_vx_w, 4)
1765GEN_VEXT_VX(vnmsac_vx_d, 8)
1766GEN_VEXT_VX(vmadd_vx_b, 1)
1767GEN_VEXT_VX(vmadd_vx_h, 2)
1768GEN_VEXT_VX(vmadd_vx_w, 4)
1769GEN_VEXT_VX(vmadd_vx_d, 8)
1770GEN_VEXT_VX(vnmsub_vx_b, 1)
1771GEN_VEXT_VX(vnmsub_vx_h, 2)
1772GEN_VEXT_VX(vnmsub_vx_w, 4)
1773GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1774
1775/* Vector Widening Integer Multiply-Add Instructions */
1776RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1777RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1778RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1779RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1780RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1781RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1782RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1783RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1784RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1785GEN_VEXT_VV(vwmaccu_vv_b, 2)
1786GEN_VEXT_VV(vwmaccu_vv_h, 4)
1787GEN_VEXT_VV(vwmaccu_vv_w, 8)
1788GEN_VEXT_VV(vwmacc_vv_b, 2)
1789GEN_VEXT_VV(vwmacc_vv_h, 4)
1790GEN_VEXT_VV(vwmacc_vv_w, 8)
1791GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1792GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1793GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1794
1795RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1796RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1797RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1798RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1799RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1800RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1801RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1802RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1803RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1804RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1805RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1806RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1807GEN_VEXT_VX(vwmaccu_vx_b, 2)
1808GEN_VEXT_VX(vwmaccu_vx_h, 4)
1809GEN_VEXT_VX(vwmaccu_vx_w, 8)
1810GEN_VEXT_VX(vwmacc_vx_b, 2)
1811GEN_VEXT_VX(vwmacc_vx_h, 4)
1812GEN_VEXT_VX(vwmacc_vx_w, 8)
1813GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1814GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1815GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1816GEN_VEXT_VX(vwmaccus_vx_b, 2)
1817GEN_VEXT_VX(vwmaccus_vx_h, 4)
1818GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1819
1820/* Vector Integer Merge and Move Instructions */
3479a814 1821#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1822void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1823 uint32_t desc) \
1824{ \
1825 uint32_t vl = env->vl; \
89a32de2 1826 uint32_t esz = sizeof(ETYPE); \
1827 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1828 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1829 uint32_t i; \
1830 \
df4252b2
DHB
1831 VSTART_CHECK_EARLY_EXIT(env); \
1832 \
f714361e 1833 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1834 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1835 *((ETYPE *)vd + H(i)) = s1; \
1836 } \
f714361e 1837 env->vstart = 0; \
89a32de2 1838 /* set tail elements to 1s */ \
1839 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1840}
1841
3479a814
FC
1842GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1843GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1844GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1845GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1846
3479a814 1847#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1848void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1849 uint32_t desc) \
1850{ \
1851 uint32_t vl = env->vl; \
89a32de2 1852 uint32_t esz = sizeof(ETYPE); \
1853 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1854 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1855 uint32_t i; \
1856 \
df4252b2
DHB
1857 VSTART_CHECK_EARLY_EXIT(env); \
1858 \
f714361e 1859 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1860 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1861 } \
f714361e 1862 env->vstart = 0; \
89a32de2 1863 /* set tail elements to 1s */ \
1864 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1865}
1866
3479a814
FC
1867GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1868GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1869GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1870GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 1871
3479a814 1872#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1873void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1874 CPURISCVState *env, uint32_t desc) \
1875{ \
f020a7a1 1876 uint32_t vl = env->vl; \
89a32de2 1877 uint32_t esz = sizeof(ETYPE); \
1878 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1879 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1880 uint32_t i; \
1881 \
df4252b2
DHB
1882 VSTART_CHECK_EARLY_EXIT(env); \
1883 \
f714361e 1884 for (i = env->vstart; i < vl; i++) { \
f9298de5 1885 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
1886 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1887 } \
f714361e 1888 env->vstart = 0; \
89a32de2 1889 /* set tail elements to 1s */ \
1890 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1891}
1892
3479a814
FC
1893GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1894GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1895GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1896GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 1897
3479a814 1898#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1899void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1900 void *vs2, CPURISCVState *env, uint32_t desc) \
1901{ \
f020a7a1 1902 uint32_t vl = env->vl; \
89a32de2 1903 uint32_t esz = sizeof(ETYPE); \
1904 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1905 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1906 uint32_t i; \
1907 \
df4252b2
DHB
1908 VSTART_CHECK_EARLY_EXIT(env); \
1909 \
f714361e 1910 for (i = env->vstart; i < vl; i++) { \
f020a7a1 1911 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1912 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
1913 (ETYPE)(target_long)s1); \
1914 *((ETYPE *)vd + H(i)) = d; \
1915 } \
f714361e 1916 env->vstart = 0; \
89a32de2 1917 /* set tail elements to 1s */ \
1918 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1919}
1920
3479a814
FC
1921GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1922GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1923GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1924GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
1925
1926/*
3b57254d 1927 * Vector Fixed-Point Arithmetic Instructions
eb2650e3
LZ
1928 */
1929
1930/* Vector Single-Width Saturating Add and Subtract */
1931
1932/*
1933 * As fixed point instructions probably have round mode and saturation,
1934 * define common macros for fixed point here.
1935 */
1936typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1937 CPURISCVState *env, int vxrm);
1938
1939#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1940static inline void \
1941do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1942 CPURISCVState *env, int vxrm) \
1943{ \
1944 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1945 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1946 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1947}
1948
1949static inline void
1950vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1951 CPURISCVState *env,
f9298de5 1952 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 1953 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 1954{
df4252b2
DHB
1955 VSTART_CHECK_EARLY_EXIT(env);
1956
f714361e 1957 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 1958 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
1959 /* set masked-off elements to 1s */
1960 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
1961 continue;
1962 }
1963 fn(vd, vs1, vs2, i, env, vxrm);
1964 }
f714361e 1965 env->vstart = 0;
eb2650e3
LZ
1966}
1967
1968static inline void
1969vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1970 CPURISCVState *env,
8a085fb2 1971 uint32_t desc,
09106eed 1972 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 1973{
eb2650e3
LZ
1974 uint32_t vm = vext_vm(desc);
1975 uint32_t vl = env->vl;
09106eed 1976 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
1977 uint32_t vta = vext_vta(desc);
72e17a9f 1978 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
1979
1980 switch (env->vxrm) {
1981 case 0: /* rnu */
1982 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1983 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
1984 break;
1985 case 1: /* rne */
1986 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1987 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
1988 break;
1989 case 2: /* rdn */
1990 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1991 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
1992 break;
1993 default: /* rod */
1994 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1995 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
1996 break;
1997 }
09106eed 1998 /* set tail elements to 1s */
1999 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2000}
2001
2002/* generate helpers for fixed point instructions with OPIVV format */
09106eed 2003#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
2004void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2005 CPURISCVState *env, uint32_t desc) \
2006{ \
8a085fb2 2007 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 2008 do_##NAME, ESZ); \
eb2650e3
LZ
2009}
2010
246f8796
WL
2011static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2012 uint8_t b)
eb2650e3
LZ
2013{
2014 uint8_t res = a + b;
2015 if (res < a) {
2016 res = UINT8_MAX;
2017 env->vxsat = 0x1;
2018 }
2019 return res;
2020}
2021
2022static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2023 uint16_t b)
2024{
2025 uint16_t res = a + b;
2026 if (res < a) {
2027 res = UINT16_MAX;
2028 env->vxsat = 0x1;
2029 }
2030 return res;
2031}
2032
2033static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2034 uint32_t b)
2035{
2036 uint32_t res = a + b;
2037 if (res < a) {
2038 res = UINT32_MAX;
2039 env->vxsat = 0x1;
2040 }
2041 return res;
2042}
2043
2044static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2045 uint64_t b)
2046{
2047 uint64_t res = a + b;
2048 if (res < a) {
2049 res = UINT64_MAX;
2050 env->vxsat = 0x1;
2051 }
2052 return res;
2053}
2054
2055RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2056RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2057RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2058RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2059GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2060GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2061GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2062GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2063
2064typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2065 CPURISCVState *env, int vxrm);
2066
2067#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2068static inline void \
2069do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2070 CPURISCVState *env, int vxrm) \
2071{ \
2072 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2073 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2074}
2075
2076static inline void
2077vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2078 CPURISCVState *env,
f9298de5 2079 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2080 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2081{
df4252b2
DHB
2082 VSTART_CHECK_EARLY_EXIT(env);
2083
f714361e 2084 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2085 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2086 /* set masked-off elements to 1s */
2087 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2088 continue;
2089 }
2090 fn(vd, s1, vs2, i, env, vxrm);
2091 }
f714361e 2092 env->vstart = 0;
eb2650e3
LZ
2093}
2094
2095static inline void
2096vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2097 CPURISCVState *env,
8a085fb2 2098 uint32_t desc,
09106eed 2099 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2100{
eb2650e3
LZ
2101 uint32_t vm = vext_vm(desc);
2102 uint32_t vl = env->vl;
09106eed 2103 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2104 uint32_t vta = vext_vta(desc);
72e17a9f 2105 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2106
2107 switch (env->vxrm) {
2108 case 0: /* rnu */
2109 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2110 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2111 break;
2112 case 1: /* rne */
2113 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2114 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2115 break;
2116 case 2: /* rdn */
2117 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2118 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2119 break;
2120 default: /* rod */
2121 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2122 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2123 break;
2124 }
09106eed 2125 /* set tail elements to 1s */
2126 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2127}
2128
2129/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2130#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3 2131void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
2132 void *vs2, CPURISCVState *env, \
2133 uint32_t desc) \
eb2650e3 2134{ \
8a085fb2 2135 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2136 do_##NAME, ESZ); \
eb2650e3
LZ
2137}
2138
2139RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2140RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2141RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2142RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2143GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2144GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2145GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2146GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2147
2148static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2149{
2150 int8_t res = a + b;
2151 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2152 res = a > 0 ? INT8_MAX : INT8_MIN;
2153 env->vxsat = 0x1;
2154 }
2155 return res;
2156}
2157
246f8796
WL
2158static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2159 int16_t b)
eb2650e3
LZ
2160{
2161 int16_t res = a + b;
2162 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2163 res = a > 0 ? INT16_MAX : INT16_MIN;
2164 env->vxsat = 0x1;
2165 }
2166 return res;
2167}
2168
246f8796
WL
2169static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2170 int32_t b)
eb2650e3
LZ
2171{
2172 int32_t res = a + b;
2173 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2174 res = a > 0 ? INT32_MAX : INT32_MIN;
2175 env->vxsat = 0x1;
2176 }
2177 return res;
2178}
2179
246f8796
WL
2180static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2181 int64_t b)
eb2650e3
LZ
2182{
2183 int64_t res = a + b;
2184 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2185 res = a > 0 ? INT64_MAX : INT64_MIN;
2186 env->vxsat = 0x1;
2187 }
2188 return res;
2189}
2190
2191RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2192RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2193RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2194RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2195GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2196GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2197GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2198GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2199
2200RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2201RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2202RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2203RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2204GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2205GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2206GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2207GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3 2208
246f8796
WL
2209static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2210 uint8_t b)
eb2650e3
LZ
2211{
2212 uint8_t res = a - b;
2213 if (res > a) {
2214 res = 0;
2215 env->vxsat = 0x1;
2216 }
2217 return res;
2218}
2219
2220static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2221 uint16_t b)
2222{
2223 uint16_t res = a - b;
2224 if (res > a) {
2225 res = 0;
2226 env->vxsat = 0x1;
2227 }
2228 return res;
2229}
2230
2231static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2232 uint32_t b)
2233{
2234 uint32_t res = a - b;
2235 if (res > a) {
2236 res = 0;
2237 env->vxsat = 0x1;
2238 }
2239 return res;
2240}
2241
2242static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2243 uint64_t b)
2244{
2245 uint64_t res = a - b;
2246 if (res > a) {
2247 res = 0;
2248 env->vxsat = 0x1;
2249 }
2250 return res;
2251}
2252
2253RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2254RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2255RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2256RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2257GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2258GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2259GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2260GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2261
2262RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2263RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2264RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2265RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2266GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2267GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2268GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2269GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2270
2271static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2272{
2273 int8_t res = a - b;
2274 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2275 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2276 env->vxsat = 0x1;
2277 }
2278 return res;
2279}
2280
246f8796
WL
2281static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2282 int16_t b)
eb2650e3
LZ
2283{
2284 int16_t res = a - b;
2285 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2286 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2287 env->vxsat = 0x1;
2288 }
2289 return res;
2290}
2291
246f8796
WL
2292static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2293 int32_t b)
eb2650e3
LZ
2294{
2295 int32_t res = a - b;
2296 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2297 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2298 env->vxsat = 0x1;
2299 }
2300 return res;
2301}
2302
246f8796
WL
2303static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2304 int64_t b)
eb2650e3
LZ
2305{
2306 int64_t res = a - b;
2307 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2308 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2309 env->vxsat = 0x1;
2310 }
2311 return res;
2312}
2313
2314RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2315RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2316RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2317RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2318GEN_VEXT_VV_RM(vssub_vv_b, 1)
2319GEN_VEXT_VV_RM(vssub_vv_h, 2)
2320GEN_VEXT_VV_RM(vssub_vv_w, 4)
2321GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2322
2323RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2324RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2325RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2326RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2327GEN_VEXT_VX_RM(vssub_vx_b, 1)
2328GEN_VEXT_VX_RM(vssub_vx_h, 2)
2329GEN_VEXT_VX_RM(vssub_vx_w, 4)
2330GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2331
2332/* Vector Single-Width Averaging Add and Subtract */
2333static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2334{
2335 uint8_t d = extract64(v, shift, 1);
2336 uint8_t d1;
2337 uint64_t D1, D2;
2338
2339 if (shift == 0 || shift > 64) {
2340 return 0;
2341 }
2342
2343 d1 = extract64(v, shift - 1, 1);
2344 D1 = extract64(v, 0, shift);
2345 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2346 return d1;
2347 } else if (vxrm == 1) { /* round-to-nearest-even */
2348 if (shift > 1) {
2349 D2 = extract64(v, 0, shift - 1);
2350 return d1 & ((D2 != 0) | d);
2351 } else {
2352 return d1 & d;
2353 }
2354 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2355 return !d & (D1 != 0);
2356 }
2357 return 0; /* round-down (truncate) */
2358}
2359
246f8796
WL
2360static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2361 int32_t b)
b7aee481
LZ
2362{
2363 int64_t res = (int64_t)a + b;
2364 uint8_t round = get_round(vxrm, res, 1);
2365
2366 return (res >> 1) + round;
2367}
2368
246f8796
WL
2369static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2370 int64_t b)
b7aee481
LZ
2371{
2372 int64_t res = a + b;
2373 uint8_t round = get_round(vxrm, res, 1);
2374 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2375
2376 /* With signed overflow, bit 64 is inverse of bit 63. */
2377 return ((res >> 1) ^ over) + round;
2378}
2379
2380RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2381RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2382RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2383RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2384GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2385GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2386GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2387GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2388
2389RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2390RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2391RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2392RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2393GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2394GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2395GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2396GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2397
8b99a110
FC
2398static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2399 uint32_t a, uint32_t b)
2400{
2401 uint64_t res = (uint64_t)a + b;
2402 uint8_t round = get_round(vxrm, res, 1);
2403
2404 return (res >> 1) + round;
2405}
2406
2407static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2408 uint64_t a, uint64_t b)
2409{
2410 uint64_t res = a + b;
2411 uint8_t round = get_round(vxrm, res, 1);
2412 uint64_t over = (uint64_t)(res < a) << 63;
2413
2414 return ((res >> 1) | over) + round;
2415}
2416
2417RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2418RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2419RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2420RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2421GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2422GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2423GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2424GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2425
2426RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2427RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2428RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2429RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2430GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2431GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2432GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2433GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2434
246f8796
WL
2435static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2436 int32_t b)
b7aee481
LZ
2437{
2438 int64_t res = (int64_t)a - b;
2439 uint8_t round = get_round(vxrm, res, 1);
2440
2441 return (res >> 1) + round;
2442}
2443
246f8796
WL
2444static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2445 int64_t b)
b7aee481
LZ
2446{
2447 int64_t res = (int64_t)a - b;
2448 uint8_t round = get_round(vxrm, res, 1);
2449 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2450
2451 /* With signed overflow, bit 64 is inverse of bit 63. */
2452 return ((res >> 1) ^ over) + round;
2453}
2454
2455RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2456RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2457RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2458RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2459GEN_VEXT_VV_RM(vasub_vv_b, 1)
2460GEN_VEXT_VV_RM(vasub_vv_h, 2)
2461GEN_VEXT_VV_RM(vasub_vv_w, 4)
2462GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2463
2464RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2465RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2466RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2467RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2468GEN_VEXT_VX_RM(vasub_vx_b, 1)
2469GEN_VEXT_VX_RM(vasub_vx_h, 2)
2470GEN_VEXT_VX_RM(vasub_vx_w, 4)
2471GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2472
8b99a110
FC
2473static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2474 uint32_t a, uint32_t b)
2475{
2476 int64_t res = (int64_t)a - b;
2477 uint8_t round = get_round(vxrm, res, 1);
2478
2479 return (res >> 1) + round;
2480}
2481
2482static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2483 uint64_t a, uint64_t b)
2484{
2485 uint64_t res = (uint64_t)a - b;
2486 uint8_t round = get_round(vxrm, res, 1);
2487 uint64_t over = (uint64_t)(res > a) << 63;
2488
2489 return ((res >> 1) | over) + round;
2490}
2491
2492RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2493RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2494RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2495RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2496GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2497GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2498GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2499GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2500
2501RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2502RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2503RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2504RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2505GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2506GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2507GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2508GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2509
9f0ff9e5
LZ
2510/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2511static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2512{
2513 uint8_t round;
2514 int16_t res;
2515
2516 res = (int16_t)a * (int16_t)b;
2517 round = get_round(vxrm, res, 7);
c45eff30 2518 res = (res >> 7) + round;
9f0ff9e5
LZ
2519
2520 if (res > INT8_MAX) {
2521 env->vxsat = 0x1;
2522 return INT8_MAX;
2523 } else if (res < INT8_MIN) {
2524 env->vxsat = 0x1;
2525 return INT8_MIN;
2526 } else {
2527 return res;
2528 }
2529}
2530
2531static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2532{
2533 uint8_t round;
2534 int32_t res;
2535
2536 res = (int32_t)a * (int32_t)b;
2537 round = get_round(vxrm, res, 15);
c45eff30 2538 res = (res >> 15) + round;
9f0ff9e5
LZ
2539
2540 if (res > INT16_MAX) {
2541 env->vxsat = 0x1;
2542 return INT16_MAX;
2543 } else if (res < INT16_MIN) {
2544 env->vxsat = 0x1;
2545 return INT16_MIN;
2546 } else {
2547 return res;
2548 }
2549}
2550
2551static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2552{
2553 uint8_t round;
2554 int64_t res;
2555
2556 res = (int64_t)a * (int64_t)b;
2557 round = get_round(vxrm, res, 31);
c45eff30 2558 res = (res >> 31) + round;
9f0ff9e5
LZ
2559
2560 if (res > INT32_MAX) {
2561 env->vxsat = 0x1;
2562 return INT32_MAX;
2563 } else if (res < INT32_MIN) {
2564 env->vxsat = 0x1;
2565 return INT32_MIN;
2566 } else {
2567 return res;
2568 }
2569}
2570
2571static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2572{
2573 uint8_t round;
2574 uint64_t hi_64, lo_64;
2575 int64_t res;
2576
2577 if (a == INT64_MIN && b == INT64_MIN) {
2578 env->vxsat = 1;
2579 return INT64_MAX;
2580 }
2581
2582 muls64(&lo_64, &hi_64, a, b);
2583 round = get_round(vxrm, lo_64, 63);
2584 /*
2585 * Cannot overflow, as there are always
2586 * 2 sign bits after multiply.
2587 */
2588 res = (hi_64 << 1) | (lo_64 >> 63);
2589 if (round) {
2590 if (res == INT64_MAX) {
2591 env->vxsat = 1;
2592 } else {
2593 res += 1;
2594 }
2595 }
2596 return res;
2597}
2598
2599RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2600RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2601RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2602RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2603GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2604GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2605GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2606GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2607
2608RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2609RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2610RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2611RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2612GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2613GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2614GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2615GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2616
04a61406
LZ
2617/* Vector Single-Width Scaling Shift Instructions */
2618static inline uint8_t
2619vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2620{
2621 uint8_t round, shift = b & 0x7;
2622 uint8_t res;
2623
2624 round = get_round(vxrm, a, shift);
c45eff30 2625 res = (a >> shift) + round;
04a61406
LZ
2626 return res;
2627}
2628static inline uint16_t
2629vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2630{
2631 uint8_t round, shift = b & 0xf;
04a61406
LZ
2632
2633 round = get_round(vxrm, a, shift);
66997c42 2634 return (a >> shift) + round;
04a61406
LZ
2635}
2636static inline uint32_t
2637vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2638{
2639 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2640
2641 round = get_round(vxrm, a, shift);
66997c42 2642 return (a >> shift) + round;
04a61406
LZ
2643}
2644static inline uint64_t
2645vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2646{
2647 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2648
2649 round = get_round(vxrm, a, shift);
66997c42 2650 return (a >> shift) + round;
04a61406
LZ
2651}
2652RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2653RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2654RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2655RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2656GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2657GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2658GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2659GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2660
2661RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2662RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2663RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2664RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2665GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2666GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2667GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2668GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2669
2670static inline int8_t
2671vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2672{
2673 uint8_t round, shift = b & 0x7;
04a61406
LZ
2674
2675 round = get_round(vxrm, a, shift);
66997c42 2676 return (a >> shift) + round;
04a61406
LZ
2677}
2678static inline int16_t
2679vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2680{
2681 uint8_t round, shift = b & 0xf;
04a61406
LZ
2682
2683 round = get_round(vxrm, a, shift);
66997c42 2684 return (a >> shift) + round;
04a61406
LZ
2685}
2686static inline int32_t
2687vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2688{
2689 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2690
2691 round = get_round(vxrm, a, shift);
66997c42 2692 return (a >> shift) + round;
04a61406
LZ
2693}
2694static inline int64_t
2695vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2696{
2697 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2698
2699 round = get_round(vxrm, a, shift);
66997c42 2700 return (a >> shift) + round;
04a61406 2701}
9ff3d287 2702
04a61406
LZ
2703RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2704RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2705RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2706RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2707GEN_VEXT_VV_RM(vssra_vv_b, 1)
2708GEN_VEXT_VV_RM(vssra_vv_h, 2)
2709GEN_VEXT_VV_RM(vssra_vv_w, 4)
2710GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2711
2712RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2713RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2714RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2715RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2716GEN_VEXT_VX_RM(vssra_vx_b, 1)
2717GEN_VEXT_VX_RM(vssra_vx_h, 2)
2718GEN_VEXT_VX_RM(vssra_vx_w, 4)
2719GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2720
2721/* Vector Narrowing Fixed-Point Clip Instructions */
2722static inline int8_t
2723vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2724{
2725 uint8_t round, shift = b & 0xf;
2726 int16_t res;
2727
2728 round = get_round(vxrm, a, shift);
c45eff30 2729 res = (a >> shift) + round;
9ff3d287
LZ
2730 if (res > INT8_MAX) {
2731 env->vxsat = 0x1;
2732 return INT8_MAX;
2733 } else if (res < INT8_MIN) {
2734 env->vxsat = 0x1;
2735 return INT8_MIN;
2736 } else {
2737 return res;
2738 }
2739}
2740
2741static inline int16_t
2742vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2743{
2744 uint8_t round, shift = b & 0x1f;
2745 int32_t res;
2746
2747 round = get_round(vxrm, a, shift);
c45eff30 2748 res = (a >> shift) + round;
9ff3d287
LZ
2749 if (res > INT16_MAX) {
2750 env->vxsat = 0x1;
2751 return INT16_MAX;
2752 } else if (res < INT16_MIN) {
2753 env->vxsat = 0x1;
2754 return INT16_MIN;
2755 } else {
2756 return res;
2757 }
2758}
2759
2760static inline int32_t
2761vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2762{
2763 uint8_t round, shift = b & 0x3f;
2764 int64_t res;
2765
2766 round = get_round(vxrm, a, shift);
c45eff30 2767 res = (a >> shift) + round;
9ff3d287
LZ
2768 if (res > INT32_MAX) {
2769 env->vxsat = 0x1;
2770 return INT32_MAX;
2771 } else if (res < INT32_MIN) {
2772 env->vxsat = 0x1;
2773 return INT32_MIN;
2774 } else {
2775 return res;
2776 }
2777}
2778
a70b3a73
FC
2779RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2780RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2781RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2782GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2783GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2784GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2785
2786RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2787RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2788RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2789GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2790GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2791GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2792
2793static inline uint8_t
2794vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2795{
2796 uint8_t round, shift = b & 0xf;
2797 uint16_t res;
2798
2799 round = get_round(vxrm, a, shift);
c45eff30 2800 res = (a >> shift) + round;
9ff3d287
LZ
2801 if (res > UINT8_MAX) {
2802 env->vxsat = 0x1;
2803 return UINT8_MAX;
2804 } else {
2805 return res;
2806 }
2807}
2808
2809static inline uint16_t
2810vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2811{
2812 uint8_t round, shift = b & 0x1f;
2813 uint32_t res;
2814
2815 round = get_round(vxrm, a, shift);
c45eff30 2816 res = (a >> shift) + round;
9ff3d287
LZ
2817 if (res > UINT16_MAX) {
2818 env->vxsat = 0x1;
2819 return UINT16_MAX;
2820 } else {
2821 return res;
2822 }
2823}
2824
2825static inline uint32_t
2826vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2827{
2828 uint8_t round, shift = b & 0x3f;
a70b3a73 2829 uint64_t res;
9ff3d287
LZ
2830
2831 round = get_round(vxrm, a, shift);
c45eff30 2832 res = (a >> shift) + round;
9ff3d287
LZ
2833 if (res > UINT32_MAX) {
2834 env->vxsat = 0x1;
2835 return UINT32_MAX;
2836 } else {
2837 return res;
2838 }
2839}
2840
a70b3a73
FC
2841RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2842RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2843RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 2844GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2845GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2846GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 2847
a70b3a73
FC
2848RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2849RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2850RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 2851GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
2852GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
2853GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
2854
2855/*
3b57254d 2856 * Vector Float Point Arithmetic Instructions
ce2a0343
LZ
2857 */
2858/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2859#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2860static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2861 CPURISCVState *env) \
2862{ \
2863 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2864 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2865 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2866}
2867
5eacf7d8 2868#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
2869void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2870 void *vs2, CPURISCVState *env, \
2871 uint32_t desc) \
2872{ \
ce2a0343
LZ
2873 uint32_t vm = vext_vm(desc); \
2874 uint32_t vl = env->vl; \
5eacf7d8 2875 uint32_t total_elems = \
2876 vext_get_total_elems(env, desc, ESZ); \
2877 uint32_t vta = vext_vta(desc); \
5b448f44 2878 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
2879 uint32_t i; \
2880 \
df4252b2
DHB
2881 VSTART_CHECK_EARLY_EXIT(env); \
2882 \
f714361e 2883 for (i = env->vstart; i < vl; i++) { \
f9298de5 2884 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
2885 /* set masked-off elements to 1s */ \
2886 vext_set_elems_1s(vd, vma, i * ESZ, \
2887 (i + 1) * ESZ); \
ce2a0343
LZ
2888 continue; \
2889 } \
2890 do_##NAME(vd, vs1, vs2, i, env); \
2891 } \
f714361e 2892 env->vstart = 0; \
5eacf7d8 2893 /* set tail elements to 1s */ \
2894 vext_set_elems_1s(vd, vta, vl * ESZ, \
2895 total_elems * ESZ); \
ce2a0343
LZ
2896}
2897
2898RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2899RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2900RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 2901GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
2902GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
2903GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
2904
2905#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2906static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2907 CPURISCVState *env) \
2908{ \
2909 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2910 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2911}
2912
5eacf7d8 2913#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
2914void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2915 void *vs2, CPURISCVState *env, \
2916 uint32_t desc) \
2917{ \
ce2a0343
LZ
2918 uint32_t vm = vext_vm(desc); \
2919 uint32_t vl = env->vl; \
5eacf7d8 2920 uint32_t total_elems = \
c45eff30 2921 vext_get_total_elems(env, desc, ESZ); \
5eacf7d8 2922 uint32_t vta = vext_vta(desc); \
5b448f44 2923 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
2924 uint32_t i; \
2925 \
df4252b2
DHB
2926 VSTART_CHECK_EARLY_EXIT(env); \
2927 \
f714361e 2928 for (i = env->vstart; i < vl; i++) { \
f9298de5 2929 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
2930 /* set masked-off elements to 1s */ \
2931 vext_set_elems_1s(vd, vma, i * ESZ, \
2932 (i + 1) * ESZ); \
ce2a0343
LZ
2933 continue; \
2934 } \
2935 do_##NAME(vd, s1, vs2, i, env); \
2936 } \
f714361e 2937 env->vstart = 0; \
5eacf7d8 2938 /* set tail elements to 1s */ \
2939 vext_set_elems_1s(vd, vta, vl * ESZ, \
2940 total_elems * ESZ); \
ce2a0343
LZ
2941}
2942
2943RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2944RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2945RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 2946GEN_VEXT_VF(vfadd_vf_h, 2)
2947GEN_VEXT_VF(vfadd_vf_w, 4)
2948GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
2949
2950RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2951RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2952RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 2953GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
2954GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
2955GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
2956RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2957RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2958RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 2959GEN_VEXT_VF(vfsub_vf_h, 2)
2960GEN_VEXT_VF(vfsub_vf_w, 4)
2961GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
2962
2963static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2964{
2965 return float16_sub(b, a, s);
2966}
2967
2968static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2969{
2970 return float32_sub(b, a, s);
2971}
2972
2973static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2974{
2975 return float64_sub(b, a, s);
2976}
2977
2978RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2979RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2980RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 2981GEN_VEXT_VF(vfrsub_vf_h, 2)
2982GEN_VEXT_VF(vfrsub_vf_w, 4)
2983GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
2984
2985/* Vector Widening Floating-Point Add/Subtract Instructions */
2986static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2987{
2988 return float32_add(float16_to_float32(a, true, s),
c45eff30 2989 float16_to_float32(b, true, s), s);
eeffab2e
LZ
2990}
2991
2992static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2993{
2994 return float64_add(float32_to_float64(a, s),
c45eff30 2995 float32_to_float64(b, s), s);
eeffab2e
LZ
2996
2997}
2998
2999RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3000RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 3001GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3002GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
3003RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3004RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 3005GEN_VEXT_VF(vfwadd_vf_h, 4)
3006GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
3007
3008static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3009{
3010 return float32_sub(float16_to_float32(a, true, s),
c45eff30 3011 float16_to_float32(b, true, s), s);
eeffab2e
LZ
3012}
3013
3014static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3015{
3016 return float64_sub(float32_to_float64(a, s),
c45eff30 3017 float32_to_float64(b, s), s);
eeffab2e
LZ
3018
3019}
3020
3021RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3022RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 3023GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3024GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
3025RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3026RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 3027GEN_VEXT_VF(vfwsub_vf_h, 4)
3028GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
3029
3030static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3031{
3032 return float32_add(a, float16_to_float32(b, true, s), s);
3033}
3034
3035static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3036{
3037 return float64_add(a, float32_to_float64(b, s), s);
3038}
3039
3040RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3041RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3042GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3043GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3044RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3045RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3046GEN_VEXT_VF(vfwadd_wf_h, 4)
3047GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3048
3049static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3050{
3051 return float32_sub(a, float16_to_float32(b, true, s), s);
3052}
3053
3054static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3055{
3056 return float64_sub(a, float32_to_float64(b, s), s);
3057}
3058
3059RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3060RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3061GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3062GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3063RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3064RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3065GEN_VEXT_VF(vfwsub_wf_h, 4)
3066GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3067
3068/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3069RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3070RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3071RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3072GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3073GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3074GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3075RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3076RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3077RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3078GEN_VEXT_VF(vfmul_vf_h, 2)
3079GEN_VEXT_VF(vfmul_vf_w, 4)
3080GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3081
3082RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3083RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3084RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3085GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3086GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3087GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3088RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3089RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3090RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3091GEN_VEXT_VF(vfdiv_vf_h, 2)
3092GEN_VEXT_VF(vfdiv_vf_w, 4)
3093GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3094
3095static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3096{
3097 return float16_div(b, a, s);
3098}
3099
3100static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3101{
3102 return float32_div(b, a, s);
3103}
3104
3105static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3106{
3107 return float64_div(b, a, s);
3108}
3109
3110RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3111RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3112RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3113GEN_VEXT_VF(vfrdiv_vf_h, 2)
3114GEN_VEXT_VF(vfrdiv_vf_w, 4)
3115GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3116
3117/* Vector Widening Floating-Point Multiply */
3118static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3119{
3120 return float32_mul(float16_to_float32(a, true, s),
c45eff30 3121 float16_to_float32(b, true, s), s);
f7c7b7cd
LZ
3122}
3123
3124static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3125{
3126 return float64_mul(float32_to_float64(a, s),
c45eff30 3127 float32_to_float64(b, s), s);
f7c7b7cd
LZ
3128
3129}
3130RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3131RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3132GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3133GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3134RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3135RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3136GEN_VEXT_VF(vfwmul_vf_h, 4)
3137GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3138
3139/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3140#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3141static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
c45eff30 3142 CPURISCVState *env) \
4aa5a8fe
LZ
3143{ \
3144 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3145 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3146 TD d = *((TD *)vd + HD(i)); \
3147 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3148}
3149
3150static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3151{
3152 return float16_muladd(a, b, d, 0, s);
3153}
3154
3155static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3156{
3157 return float32_muladd(a, b, d, 0, s);
3158}
3159
3160static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3161{
3162 return float64_muladd(a, b, d, 0, s);
3163}
3164
3165RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3166RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3167RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3168GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3169GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3170GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3171
3172#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3173static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
c45eff30 3174 CPURISCVState *env) \
4aa5a8fe
LZ
3175{ \
3176 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3177 TD d = *((TD *)vd + HD(i)); \
3178 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3179}
3180
3181RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3182RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3183RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3184GEN_VEXT_VF(vfmacc_vf_h, 2)
3185GEN_VEXT_VF(vfmacc_vf_w, 4)
3186GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3187
3188static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3189{
c45eff30
WL
3190 return float16_muladd(a, b, d, float_muladd_negate_c |
3191 float_muladd_negate_product, s);
4aa5a8fe
LZ
3192}
3193
3194static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3195{
c45eff30
WL
3196 return float32_muladd(a, b, d, float_muladd_negate_c |
3197 float_muladd_negate_product, s);
4aa5a8fe
LZ
3198}
3199
3200static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3201{
c45eff30
WL
3202 return float64_muladd(a, b, d, float_muladd_negate_c |
3203 float_muladd_negate_product, s);
4aa5a8fe
LZ
3204}
3205
3206RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3207RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3208RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3209GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3210GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3211GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3212RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3213RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3214RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3215GEN_VEXT_VF(vfnmacc_vf_h, 2)
3216GEN_VEXT_VF(vfnmacc_vf_w, 4)
3217GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3218
3219static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3220{
3221 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3222}
3223
3224static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3225{
3226 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3227}
3228
3229static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3230{
3231 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3232}
3233
3234RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3235RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3236RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3237GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3238GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3239GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3240RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3241RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3242RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3243GEN_VEXT_VF(vfmsac_vf_h, 2)
3244GEN_VEXT_VF(vfmsac_vf_w, 4)
3245GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3246
3247static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3248{
3249 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3250}
3251
3252static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3253{
3254 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3255}
3256
3257static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3258{
3259 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3260}
3261
3262RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3263RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3264RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3265GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3266GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3267GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3268RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3269RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3270RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3271GEN_VEXT_VF(vfnmsac_vf_h, 2)
3272GEN_VEXT_VF(vfnmsac_vf_w, 4)
3273GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3274
3275static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3276{
3277 return float16_muladd(d, b, a, 0, s);
3278}
3279
3280static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3281{
3282 return float32_muladd(d, b, a, 0, s);
3283}
3284
3285static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3286{
3287 return float64_muladd(d, b, a, 0, s);
3288}
3289
3290RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3291RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3292RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3293GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3294GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3295GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3296RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3297RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3298RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3299GEN_VEXT_VF(vfmadd_vf_h, 2)
3300GEN_VEXT_VF(vfmadd_vf_w, 4)
3301GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3302
3303static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3304{
c45eff30
WL
3305 return float16_muladd(d, b, a, float_muladd_negate_c |
3306 float_muladd_negate_product, s);
4aa5a8fe
LZ
3307}
3308
3309static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3310{
c45eff30
WL
3311 return float32_muladd(d, b, a, float_muladd_negate_c |
3312 float_muladd_negate_product, s);
4aa5a8fe
LZ
3313}
3314
3315static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3316{
c45eff30
WL
3317 return float64_muladd(d, b, a, float_muladd_negate_c |
3318 float_muladd_negate_product, s);
4aa5a8fe
LZ
3319}
3320
3321RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3322RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3323RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3324GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3325GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3326GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3327RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3328RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3329RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3330GEN_VEXT_VF(vfnmadd_vf_h, 2)
3331GEN_VEXT_VF(vfnmadd_vf_w, 4)
3332GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3333
3334static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3335{
3336 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3337}
3338
3339static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3340{
3341 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3342}
3343
3344static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3345{
3346 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3347}
3348
3349RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3350RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3351RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3352GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3353GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3354GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3355RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3356RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3357RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3358GEN_VEXT_VF(vfmsub_vf_h, 2)
3359GEN_VEXT_VF(vfmsub_vf_w, 4)
3360GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3361
3362static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3363{
3364 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3365}
3366
3367static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3368{
3369 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3370}
3371
3372static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3373{
3374 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3375}
3376
3377RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3378RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3379RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3380GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3381GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3382GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3383RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3384RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3385RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3386GEN_VEXT_VF(vfnmsub_vf_h, 2)
3387GEN_VEXT_VF(vfnmsub_vf_w, 4)
3388GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3389
3390/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3391static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3392{
3393 return float32_muladd(float16_to_float32(a, true, s),
c45eff30 3394 float16_to_float32(b, true, s), d, 0, s);
0dd50959
LZ
3395}
3396
3397static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3398{
3399 return float64_muladd(float32_to_float64(a, s),
c45eff30 3400 float32_to_float64(b, s), d, 0, s);
0dd50959
LZ
3401}
3402
3403RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3404RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3405GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3406GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3407RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3408RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3409GEN_VEXT_VF(vfwmacc_vf_h, 4)
3410GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959 3411
adf772b0
WL
3412static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3413{
3414 return float32_muladd(bfloat16_to_float32(a, s),
3415 bfloat16_to_float32(b, s), d, 0, s);
3416}
3417
3418RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3419GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
837570ce 3420RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
adf772b0
WL
3421GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3422
0dd50959
LZ
3423static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3424{
3425 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3426 float16_to_float32(b, true, s), d,
3427 float_muladd_negate_c | float_muladd_negate_product,
3428 s);
0dd50959
LZ
3429}
3430
3431static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3432{
c45eff30
WL
3433 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3434 d, float_muladd_negate_c |
3435 float_muladd_negate_product, s);
0dd50959
LZ
3436}
3437
3438RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3439RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3440GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3441GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3442RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3443RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3444GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3445GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3446
3447static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3448{
3449 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3450 float16_to_float32(b, true, s), d,
3451 float_muladd_negate_c, s);
0dd50959
LZ
3452}
3453
3454static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3455{
3456 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3457 float32_to_float64(b, s), d,
3458 float_muladd_negate_c, s);
0dd50959
LZ
3459}
3460
3461RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3462RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3463GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3464GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3465RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3466RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3467GEN_VEXT_VF(vfwmsac_vf_h, 4)
3468GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3469
3470static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3471{
3472 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3473 float16_to_float32(b, true, s), d,
3474 float_muladd_negate_product, s);
0dd50959
LZ
3475}
3476
3477static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3478{
3479 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3480 float32_to_float64(b, s), d,
3481 float_muladd_negate_product, s);
0dd50959
LZ
3482}
3483
3484RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3485RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3486GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3487GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3488RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3489RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3490GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3491GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3492
3493/* Vector Floating-Point Square-Root Instruction */
c45eff30 3494#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
d9e4ce72 3495static void do_##NAME(void *vd, void *vs2, int i, \
c45eff30 3496 CPURISCVState *env) \
d9e4ce72
LZ
3497{ \
3498 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3499 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3500}
3501
5eacf7d8 3502#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72 3503void HELPER(NAME)(void *vd, void *v0, void *vs2, \
c45eff30 3504 CPURISCVState *env, uint32_t desc) \
d9e4ce72 3505{ \
d9e4ce72
LZ
3506 uint32_t vm = vext_vm(desc); \
3507 uint32_t vl = env->vl; \
5eacf7d8 3508 uint32_t total_elems = \
3509 vext_get_total_elems(env, desc, ESZ); \
3510 uint32_t vta = vext_vta(desc); \
5b448f44 3511 uint32_t vma = vext_vma(desc); \
d9e4ce72
LZ
3512 uint32_t i; \
3513 \
df4252b2
DHB
3514 VSTART_CHECK_EARLY_EXIT(env); \
3515 \
d9e4ce72
LZ
3516 if (vl == 0) { \
3517 return; \
3518 } \
f714361e 3519 for (i = env->vstart; i < vl; i++) { \
f9298de5 3520 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3521 /* set masked-off elements to 1s */ \
3522 vext_set_elems_1s(vd, vma, i * ESZ, \
3523 (i + 1) * ESZ); \
d9e4ce72
LZ
3524 continue; \
3525 } \
3526 do_##NAME(vd, vs2, i, env); \
3527 } \
f714361e 3528 env->vstart = 0; \
5eacf7d8 3529 vext_set_elems_1s(vd, vta, vl * ESZ, \
3530 total_elems * ESZ); \
d9e4ce72
LZ
3531}
3532
3533RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3534RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3535RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3536GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3537GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3538GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3539
e848a1e5
FC
3540/*
3541 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3542 *
3543 * Adapted from riscv-v-spec recip.c:
3544 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3545 */
3546static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3547{
3548 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3549 uint64_t exp = extract64(f, frac_size, exp_size);
3550 uint64_t frac = extract64(f, 0, frac_size);
3551
3552 const uint8_t lookup_table[] = {
3553 52, 51, 50, 48, 47, 46, 44, 43,
3554 42, 41, 40, 39, 38, 36, 35, 34,
3555 33, 32, 31, 30, 30, 29, 28, 27,
3556 26, 25, 24, 23, 23, 22, 21, 20,
3557 19, 19, 18, 17, 16, 16, 15, 14,
3558 14, 13, 12, 12, 11, 10, 10, 9,
3559 9, 8, 7, 7, 6, 6, 5, 4,
3560 4, 3, 3, 2, 2, 1, 1, 0,
3561 127, 125, 123, 121, 119, 118, 116, 114,
3562 113, 111, 109, 108, 106, 105, 103, 102,
3563 100, 99, 97, 96, 95, 93, 92, 91,
3564 90, 88, 87, 86, 85, 84, 83, 82,
3565 80, 79, 78, 77, 76, 75, 74, 73,
3566 72, 71, 70, 70, 69, 68, 67, 66,
3567 65, 64, 63, 63, 62, 61, 60, 59,
3568 59, 58, 57, 56, 56, 55, 54, 53
3569 };
3570 const int precision = 7;
3571
3572 if (exp == 0 && frac != 0) { /* subnormal */
3573 /* Normalize the subnormal. */
3574 while (extract64(frac, frac_size - 1, 1) == 0) {
3575 exp--;
3576 frac <<= 1;
3577 }
3578
3579 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3580 }
3581
3582 int idx = ((exp & 1) << (precision - 1)) |
c45eff30 3583 (frac >> (frac_size - precision + 1));
e848a1e5 3584 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3585 (frac_size - precision);
e848a1e5
FC
3586 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3587
3588 uint64_t val = 0;
3589 val = deposit64(val, 0, frac_size, out_frac);
3590 val = deposit64(val, frac_size, exp_size, out_exp);
3591 val = deposit64(val, frac_size + exp_size, 1, sign);
3592 return val;
3593}
3594
3595static float16 frsqrt7_h(float16 f, float_status *s)
3596{
3597 int exp_size = 5, frac_size = 10;
3598 bool sign = float16_is_neg(f);
3599
3600 /*
3601 * frsqrt7(sNaN) = canonical NaN
3602 * frsqrt7(-inf) = canonical NaN
3603 * frsqrt7(-normal) = canonical NaN
3604 * frsqrt7(-subnormal) = canonical NaN
3605 */
3606 if (float16_is_signaling_nan(f, s) ||
c45eff30
WL
3607 (float16_is_infinity(f) && sign) ||
3608 (float16_is_normal(f) && sign) ||
3609 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
e848a1e5
FC
3610 s->float_exception_flags |= float_flag_invalid;
3611 return float16_default_nan(s);
3612 }
3613
3614 /* frsqrt7(qNaN) = canonical NaN */
3615 if (float16_is_quiet_nan(f, s)) {
3616 return float16_default_nan(s);
3617 }
3618
3619 /* frsqrt7(+-0) = +-inf */
3620 if (float16_is_zero(f)) {
3621 s->float_exception_flags |= float_flag_divbyzero;
3622 return float16_set_sign(float16_infinity, sign);
3623 }
3624
3625 /* frsqrt7(+inf) = +0 */
3626 if (float16_is_infinity(f) && !sign) {
3627 return float16_set_sign(float16_zero, sign);
3628 }
3629
3630 /* +normal, +subnormal */
3631 uint64_t val = frsqrt7(f, exp_size, frac_size);
3632 return make_float16(val);
3633}
3634
3635static float32 frsqrt7_s(float32 f, float_status *s)
3636{
3637 int exp_size = 8, frac_size = 23;
3638 bool sign = float32_is_neg(f);
3639
3640 /*
3641 * frsqrt7(sNaN) = canonical NaN
3642 * frsqrt7(-inf) = canonical NaN
3643 * frsqrt7(-normal) = canonical NaN
3644 * frsqrt7(-subnormal) = canonical NaN
3645 */
3646 if (float32_is_signaling_nan(f, s) ||
c45eff30
WL
3647 (float32_is_infinity(f) && sign) ||
3648 (float32_is_normal(f) && sign) ||
3649 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
e848a1e5
FC
3650 s->float_exception_flags |= float_flag_invalid;
3651 return float32_default_nan(s);
3652 }
3653
3654 /* frsqrt7(qNaN) = canonical NaN */
3655 if (float32_is_quiet_nan(f, s)) {
3656 return float32_default_nan(s);
3657 }
3658
3659 /* frsqrt7(+-0) = +-inf */
3660 if (float32_is_zero(f)) {
3661 s->float_exception_flags |= float_flag_divbyzero;
3662 return float32_set_sign(float32_infinity, sign);
3663 }
3664
3665 /* frsqrt7(+inf) = +0 */
3666 if (float32_is_infinity(f) && !sign) {
3667 return float32_set_sign(float32_zero, sign);
3668 }
3669
3670 /* +normal, +subnormal */
3671 uint64_t val = frsqrt7(f, exp_size, frac_size);
3672 return make_float32(val);
3673}
3674
3675static float64 frsqrt7_d(float64 f, float_status *s)
3676{
3677 int exp_size = 11, frac_size = 52;
3678 bool sign = float64_is_neg(f);
3679
3680 /*
3681 * frsqrt7(sNaN) = canonical NaN
3682 * frsqrt7(-inf) = canonical NaN
3683 * frsqrt7(-normal) = canonical NaN
3684 * frsqrt7(-subnormal) = canonical NaN
3685 */
3686 if (float64_is_signaling_nan(f, s) ||
c45eff30
WL
3687 (float64_is_infinity(f) && sign) ||
3688 (float64_is_normal(f) && sign) ||
3689 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
e848a1e5
FC
3690 s->float_exception_flags |= float_flag_invalid;
3691 return float64_default_nan(s);
3692 }
3693
3694 /* frsqrt7(qNaN) = canonical NaN */
3695 if (float64_is_quiet_nan(f, s)) {
3696 return float64_default_nan(s);
3697 }
3698
3699 /* frsqrt7(+-0) = +-inf */
3700 if (float64_is_zero(f)) {
3701 s->float_exception_flags |= float_flag_divbyzero;
3702 return float64_set_sign(float64_infinity, sign);
3703 }
3704
3705 /* frsqrt7(+inf) = +0 */
3706 if (float64_is_infinity(f) && !sign) {
3707 return float64_set_sign(float64_zero, sign);
3708 }
3709
3710 /* +normal, +subnormal */
3711 uint64_t val = frsqrt7(f, exp_size, frac_size);
3712 return make_float64(val);
3713}
3714
3715RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3716RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3717RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3718GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3719GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3720GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3721
55c35407
FC
3722/*
3723 * Vector Floating-Point Reciprocal Estimate Instruction
3724 *
3725 * Adapted from riscv-v-spec recip.c:
3726 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3727 */
3728static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3729 float_status *s)
3730{
3731 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3732 uint64_t exp = extract64(f, frac_size, exp_size);
3733 uint64_t frac = extract64(f, 0, frac_size);
3734
3735 const uint8_t lookup_table[] = {
3736 127, 125, 123, 121, 119, 117, 116, 114,
3737 112, 110, 109, 107, 105, 104, 102, 100,
3738 99, 97, 96, 94, 93, 91, 90, 88,
3739 87, 85, 84, 83, 81, 80, 79, 77,
3740 76, 75, 74, 72, 71, 70, 69, 68,
3741 66, 65, 64, 63, 62, 61, 60, 59,
3742 58, 57, 56, 55, 54, 53, 52, 51,
3743 50, 49, 48, 47, 46, 45, 44, 43,
3744 42, 41, 40, 40, 39, 38, 37, 36,
3745 35, 35, 34, 33, 32, 31, 31, 30,
3746 29, 28, 28, 27, 26, 25, 25, 24,
3747 23, 23, 22, 21, 21, 20, 19, 19,
3748 18, 17, 17, 16, 15, 15, 14, 14,
3749 13, 12, 12, 11, 11, 10, 9, 9,
3750 8, 8, 7, 7, 6, 5, 5, 4,
3751 4, 3, 3, 2, 2, 1, 1, 0
3752 };
3753 const int precision = 7;
3754
3755 if (exp == 0 && frac != 0) { /* subnormal */
3756 /* Normalize the subnormal. */
3757 while (extract64(frac, frac_size - 1, 1) == 0) {
3758 exp--;
3759 frac <<= 1;
3760 }
3761
3762 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3763
3764 if (exp != 0 && exp != UINT64_MAX) {
3765 /*
3766 * Overflow to inf or max value of same sign,
3767 * depending on sign and rounding mode.
3768 */
3769 s->float_exception_flags |= (float_flag_inexact |
3770 float_flag_overflow);
3771
3772 if ((s->float_rounding_mode == float_round_to_zero) ||
3773 ((s->float_rounding_mode == float_round_down) && !sign) ||
3774 ((s->float_rounding_mode == float_round_up) && sign)) {
3775 /* Return greatest/negative finite value. */
3776 return (sign << (exp_size + frac_size)) |
c45eff30 3777 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
55c35407
FC
3778 } else {
3779 /* Return +-inf. */
3780 return (sign << (exp_size + frac_size)) |
c45eff30 3781 MAKE_64BIT_MASK(frac_size, exp_size);
55c35407
FC
3782 }
3783 }
3784 }
3785
3786 int idx = frac >> (frac_size - precision);
3787 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3788 (frac_size - precision);
55c35407
FC
3789 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3790
3791 if (out_exp == 0 || out_exp == UINT64_MAX) {
3792 /*
3793 * The result is subnormal, but don't raise the underflow exception,
3794 * because there's no additional loss of precision.
3795 */
3796 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3797 if (out_exp == UINT64_MAX) {
3798 out_frac >>= 1;
3799 out_exp = 0;
3800 }
3801 }
3802
3803 uint64_t val = 0;
3804 val = deposit64(val, 0, frac_size, out_frac);
3805 val = deposit64(val, frac_size, exp_size, out_exp);
3806 val = deposit64(val, frac_size + exp_size, 1, sign);
3807 return val;
3808}
3809
3810static float16 frec7_h(float16 f, float_status *s)
3811{
3812 int exp_size = 5, frac_size = 10;
3813 bool sign = float16_is_neg(f);
3814
3815 /* frec7(+-inf) = +-0 */
3816 if (float16_is_infinity(f)) {
3817 return float16_set_sign(float16_zero, sign);
3818 }
3819
3820 /* frec7(+-0) = +-inf */
3821 if (float16_is_zero(f)) {
3822 s->float_exception_flags |= float_flag_divbyzero;
3823 return float16_set_sign(float16_infinity, sign);
3824 }
3825
3826 /* frec7(sNaN) = canonical NaN */
3827 if (float16_is_signaling_nan(f, s)) {
3828 s->float_exception_flags |= float_flag_invalid;
3829 return float16_default_nan(s);
3830 }
3831
3832 /* frec7(qNaN) = canonical NaN */
3833 if (float16_is_quiet_nan(f, s)) {
3834 return float16_default_nan(s);
3835 }
3836
3837 /* +-normal, +-subnormal */
3838 uint64_t val = frec7(f, exp_size, frac_size, s);
3839 return make_float16(val);
3840}
3841
3842static float32 frec7_s(float32 f, float_status *s)
3843{
3844 int exp_size = 8, frac_size = 23;
3845 bool sign = float32_is_neg(f);
3846
3847 /* frec7(+-inf) = +-0 */
3848 if (float32_is_infinity(f)) {
3849 return float32_set_sign(float32_zero, sign);
3850 }
3851
3852 /* frec7(+-0) = +-inf */
3853 if (float32_is_zero(f)) {
3854 s->float_exception_flags |= float_flag_divbyzero;
3855 return float32_set_sign(float32_infinity, sign);
3856 }
3857
3858 /* frec7(sNaN) = canonical NaN */
3859 if (float32_is_signaling_nan(f, s)) {
3860 s->float_exception_flags |= float_flag_invalid;
3861 return float32_default_nan(s);
3862 }
3863
3864 /* frec7(qNaN) = canonical NaN */
3865 if (float32_is_quiet_nan(f, s)) {
3866 return float32_default_nan(s);
3867 }
3868
3869 /* +-normal, +-subnormal */
3870 uint64_t val = frec7(f, exp_size, frac_size, s);
3871 return make_float32(val);
3872}
3873
3874static float64 frec7_d(float64 f, float_status *s)
3875{
3876 int exp_size = 11, frac_size = 52;
3877 bool sign = float64_is_neg(f);
3878
3879 /* frec7(+-inf) = +-0 */
3880 if (float64_is_infinity(f)) {
3881 return float64_set_sign(float64_zero, sign);
3882 }
3883
3884 /* frec7(+-0) = +-inf */
3885 if (float64_is_zero(f)) {
3886 s->float_exception_flags |= float_flag_divbyzero;
3887 return float64_set_sign(float64_infinity, sign);
3888 }
3889
3890 /* frec7(sNaN) = canonical NaN */
3891 if (float64_is_signaling_nan(f, s)) {
3892 s->float_exception_flags |= float_flag_invalid;
3893 return float64_default_nan(s);
3894 }
3895
3896 /* frec7(qNaN) = canonical NaN */
3897 if (float64_is_quiet_nan(f, s)) {
3898 return float64_default_nan(s);
3899 }
3900
3901 /* +-normal, +-subnormal */
3902 uint64_t val = frec7(f, exp_size, frac_size, s);
3903 return make_float64(val);
3904}
3905
3906RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3907RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3908RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 3909GEN_VEXT_V_ENV(vfrec7_v_h, 2)
3910GEN_VEXT_V_ENV(vfrec7_v_w, 4)
3911GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 3912
230b53dd 3913/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
3914RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3915RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3916RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 3917GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
3918GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
3919GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
3920RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3921RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3922RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 3923GEN_VEXT_VF(vfmin_vf_h, 2)
3924GEN_VEXT_VF(vfmin_vf_w, 4)
3925GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 3926
49c5611a
FC
3927RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3928RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3929RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 3930GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
3931GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
3932GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
3933RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3934RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3935RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 3936GEN_VEXT_VF(vfmax_vf_h, 2)
3937GEN_VEXT_VF(vfmax_vf_w, 4)
3938GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
3939
3940/* Vector Floating-Point Sign-Injection Instructions */
3941static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3942{
3943 return deposit64(b, 0, 15, a);
3944}
3945
3946static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3947{
3948 return deposit64(b, 0, 31, a);
3949}
3950
3951static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3952{
3953 return deposit64(b, 0, 63, a);
3954}
3955
3956RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3957RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3958RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 3959GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
3960GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
3961GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
3962RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3963RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3964RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 3965GEN_VEXT_VF(vfsgnj_vf_h, 2)
3966GEN_VEXT_VF(vfsgnj_vf_w, 4)
3967GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
3968
3969static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3970{
3971 return deposit64(~b, 0, 15, a);
3972}
3973
3974static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3975{
3976 return deposit64(~b, 0, 31, a);
3977}
3978
3979static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3980{
3981 return deposit64(~b, 0, 63, a);
3982}
3983
3984RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3985RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3986RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 3987GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
3988GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
3989GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
3990RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3991RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3992RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 3993GEN_VEXT_VF(vfsgnjn_vf_h, 2)
3994GEN_VEXT_VF(vfsgnjn_vf_w, 4)
3995GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
3996
3997static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3998{
3999 return deposit64(b ^ a, 0, 15, a);
4000}
4001
4002static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4003{
4004 return deposit64(b ^ a, 0, 31, a);
4005}
4006
4007static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4008{
4009 return deposit64(b ^ a, 0, 63, a);
4010}
4011
4012RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4013RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4014RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 4015GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4016GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4017GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
4018RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4019RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4020RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 4021GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4022GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4023GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
4024
4025/* Vector Floating-Point Compare Instructions */
4026#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4027void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4028 CPURISCVState *env, uint32_t desc) \
4029{ \
2a68e9e5
LZ
4030 uint32_t vm = vext_vm(desc); \
4031 uint32_t vl = env->vl; \
58bc9063 4032 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5eacf7d8 4033 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4034 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4035 uint32_t i; \
4036 \
df4252b2
DHB
4037 VSTART_CHECK_EARLY_EXIT(env); \
4038 \
f714361e 4039 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4040 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4041 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4042 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4043 /* set masked-off elements to 1s */ \
4044 if (vma) { \
4045 vext_set_elem_mask(vd, i, 1); \
4046 } \
2a68e9e5
LZ
4047 continue; \
4048 } \
f9298de5 4049 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4050 DO_OP(s2, s1, &env->fp_status)); \
4051 } \
f714361e 4052 env->vstart = 0; \
3b57254d
WL
4053 /*
4054 * mask destination register are always tail-agnostic
4055 * set tail elements to 1s
4056 */ \
5eacf7d8 4057 if (vta_all_1s) { \
4058 for (; i < total_elems; i++) { \
4059 vext_set_elem_mask(vd, i, 1); \
4060 } \
4061 } \
2a68e9e5
LZ
4062}
4063
2a68e9e5
LZ
4064GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4065GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4066GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4067
4068#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4069void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4070 CPURISCVState *env, uint32_t desc) \
4071{ \
2a68e9e5
LZ
4072 uint32_t vm = vext_vm(desc); \
4073 uint32_t vl = env->vl; \
58bc9063 4074 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5eacf7d8 4075 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4076 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4077 uint32_t i; \
4078 \
df4252b2
DHB
4079 VSTART_CHECK_EARLY_EXIT(env); \
4080 \
f714361e 4081 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4082 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4083 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4084 /* set masked-off elements to 1s */ \
4085 if (vma) { \
4086 vext_set_elem_mask(vd, i, 1); \
4087 } \
2a68e9e5
LZ
4088 continue; \
4089 } \
f9298de5 4090 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4091 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4092 } \
f714361e 4093 env->vstart = 0; \
3b57254d
WL
4094 /*
4095 * mask destination register are always tail-agnostic
4096 * set tail elements to 1s
4097 */ \
5eacf7d8 4098 if (vta_all_1s) { \
4099 for (; i < total_elems; i++) { \
4100 vext_set_elem_mask(vd, i, 1); \
4101 } \
4102 } \
2a68e9e5
LZ
4103}
4104
4105GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4106GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4107GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4108
4109static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4110{
4111 FloatRelation compare = float16_compare_quiet(a, b, s);
4112 return compare != float_relation_equal;
4113}
4114
4115static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4116{
4117 FloatRelation compare = float32_compare_quiet(a, b, s);
4118 return compare != float_relation_equal;
4119}
4120
4121static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4122{
4123 FloatRelation compare = float64_compare_quiet(a, b, s);
4124 return compare != float_relation_equal;
4125}
4126
4127GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4128GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4129GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4130GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4131GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4132GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4133
2a68e9e5
LZ
4134GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4135GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4136GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4137GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4138GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4139GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4140
2a68e9e5
LZ
4141GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4142GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4143GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4144GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4145GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4146GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4147
4148static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4149{
4150 FloatRelation compare = float16_compare(a, b, s);
4151 return compare == float_relation_greater;
4152}
4153
4154static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4155{
4156 FloatRelation compare = float32_compare(a, b, s);
4157 return compare == float_relation_greater;
4158}
4159
4160static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4161{
4162 FloatRelation compare = float64_compare(a, b, s);
4163 return compare == float_relation_greater;
4164}
4165
4166GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4167GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4168GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4169
4170static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4171{
4172 FloatRelation compare = float16_compare(a, b, s);
4173 return compare == float_relation_greater ||
4174 compare == float_relation_equal;
4175}
4176
4177static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4178{
4179 FloatRelation compare = float32_compare(a, b, s);
4180 return compare == float_relation_greater ||
4181 compare == float_relation_equal;
4182}
4183
4184static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4185{
4186 FloatRelation compare = float64_compare(a, b, s);
4187 return compare == float_relation_greater ||
4188 compare == float_relation_equal;
4189}
4190
4191GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4192GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4193GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4194
121ddbb3 4195/* Vector Floating-Point Classify Instruction */
121ddbb3
LZ
4196target_ulong fclass_h(uint64_t frs1)
4197{
4198 float16 f = frs1;
4199 bool sign = float16_is_neg(f);
4200
4201 if (float16_is_infinity(f)) {
4202 return sign ? 1 << 0 : 1 << 7;
4203 } else if (float16_is_zero(f)) {
4204 return sign ? 1 << 3 : 1 << 4;
4205 } else if (float16_is_zero_or_denormal(f)) {
4206 return sign ? 1 << 2 : 1 << 5;
4207 } else if (float16_is_any_nan(f)) {
4208 float_status s = { }; /* for snan_bit_is_one */
4209 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4210 } else {
4211 return sign ? 1 << 1 : 1 << 6;
4212 }
4213}
4214
4215target_ulong fclass_s(uint64_t frs1)
4216{
4217 float32 f = frs1;
4218 bool sign = float32_is_neg(f);
4219
4220 if (float32_is_infinity(f)) {
4221 return sign ? 1 << 0 : 1 << 7;
4222 } else if (float32_is_zero(f)) {
4223 return sign ? 1 << 3 : 1 << 4;
4224 } else if (float32_is_zero_or_denormal(f)) {
4225 return sign ? 1 << 2 : 1 << 5;
4226 } else if (float32_is_any_nan(f)) {
4227 float_status s = { }; /* for snan_bit_is_one */
4228 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4229 } else {
4230 return sign ? 1 << 1 : 1 << 6;
4231 }
4232}
4233
4234target_ulong fclass_d(uint64_t frs1)
4235{
4236 float64 f = frs1;
4237 bool sign = float64_is_neg(f);
4238
4239 if (float64_is_infinity(f)) {
4240 return sign ? 1 << 0 : 1 << 7;
4241 } else if (float64_is_zero(f)) {
4242 return sign ? 1 << 3 : 1 << 4;
4243 } else if (float64_is_zero_or_denormal(f)) {
4244 return sign ? 1 << 2 : 1 << 5;
4245 } else if (float64_is_any_nan(f)) {
4246 float_status s = { }; /* for snan_bit_is_one */
4247 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4248 } else {
4249 return sign ? 1 << 1 : 1 << 6;
4250 }
4251}
4252
4253RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4254RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4255RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4256GEN_VEXT_V(vfclass_v_h, 2)
4257GEN_VEXT_V(vfclass_v_w, 4)
4258GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4259
4260/* Vector Floating-Point Merge Instruction */
5eacf7d8 4261
3479a814 4262#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4263void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4264 CPURISCVState *env, uint32_t desc) \
4265{ \
64ab5846
LZ
4266 uint32_t vm = vext_vm(desc); \
4267 uint32_t vl = env->vl; \
5eacf7d8 4268 uint32_t esz = sizeof(ETYPE); \
4269 uint32_t total_elems = \
4270 vext_get_total_elems(env, desc, esz); \
4271 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4272 uint32_t i; \
4273 \
df4252b2
DHB
4274 VSTART_CHECK_EARLY_EXIT(env); \
4275 \
f714361e 4276 for (i = env->vstart; i < vl; i++) { \
64ab5846 4277 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
c45eff30
WL
4278 *((ETYPE *)vd + H(i)) = \
4279 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4280 } \
f714361e 4281 env->vstart = 0; \
5eacf7d8 4282 /* set tail elements to 1s */ \
4283 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4284}
4285
3479a814
FC
4286GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4287GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4288GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4289
4290/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4291/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4292RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4293RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4294RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4295GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4296GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4297GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4298
4299/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4300RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4301RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4302RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4303GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4304GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4305GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4306
4307/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4308RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4309RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4310RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4311GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4312GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4313GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4314
4315/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4316RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4317RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4318RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4319GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4320GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4321GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4322
4323/* Widening Floating-Point/Integer Type-Convert Instructions */
4324/* (TD, T2, TX2) */
3ce4c09d 4325#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4326#define WOP_UU_H uint32_t, uint16_t, uint16_t
4327#define WOP_UU_W uint64_t, uint32_t, uint32_t
3b57254d
WL
4328/*
4329 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4330 */
4514b7b1
LZ
4331RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4332RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4333GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4334GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4335
4336/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4337RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4338RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4339GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4340GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1 4341
246f8796
WL
4342/*
4343 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4344 */
3ce4c09d 4345RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4346RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4347RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4348GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4349GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4350GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4351
4352/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4353RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4354RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4355RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4356GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4357GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4358GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4359
4360/*
246f8796 4361 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4514b7b1
LZ
4362 */
4363static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4364{
4365 return float16_to_float32(a, true, s);
4366}
4367
4368RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4369RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4370GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4371GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e 4372
87b27bfc
WL
4373RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4374GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4375
878d406e
LZ
4376/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4377/* (TD, T2, TX2) */
ff679b58 4378#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4379#define NOP_UU_H uint16_t, uint32_t, uint32_t
4380#define NOP_UU_W uint32_t, uint64_t, uint64_t
4381/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4382RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4383RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4384RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4385GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4386GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4387GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4388
4389/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4390RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4391RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4392RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4393GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4394GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4395GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e 4396
246f8796
WL
4397/*
4398 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4399 */
ff679b58
FC
4400RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4401RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4402GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4403GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4404
4405/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4406RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4407RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4408GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4409GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4410
4411/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4412static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4413{
4414 return float32_to_float16(a, true, s);
4415}
4416
ff679b58
FC
4417RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4418RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4419GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4420GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1 4421
87b27bfc
WL
4422RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4423GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4424
fe5c9ab1 4425/*
3b57254d 4426 * Vector Reduction Operations
fe5c9ab1
LZ
4427 */
4428/* Vector Single-Width Integer Reduction Instructions */
3479a814 4429#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1 4430void HELPER(NAME)(void *vd, void *v0, void *vs1, \
c45eff30
WL
4431 void *vs2, CPURISCVState *env, \
4432 uint32_t desc) \
fe5c9ab1 4433{ \
fe5c9ab1
LZ
4434 uint32_t vm = vext_vm(desc); \
4435 uint32_t vl = env->vl; \
df4f52a7 4436 uint32_t esz = sizeof(TD); \
4437 uint32_t vlenb = simd_maxsz(desc); \
4438 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4439 uint32_t i; \
fe5c9ab1
LZ
4440 TD s1 = *((TD *)vs1 + HD(0)); \
4441 \
f714361e 4442 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4443 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4444 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4445 continue; \
4446 } \
4447 s1 = OP(s1, (TD)s2); \
4448 } \
4449 *((TD *)vd + HD(0)) = s1; \
f714361e 4450 env->vstart = 0; \
df4f52a7 4451 /* set tail elements to 1s */ \
4452 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4453}
4454
4455/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4456GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4457GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4458GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4459GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4460
4461/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4462GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4463GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4464GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4465GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4466
4467/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4468GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4469GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4470GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4471GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4472
4473/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4474GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4475GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4476GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4477GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4478
4479/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4480GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4481GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4482GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4483GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4484
4485/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4486GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4487GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4488GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4489GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4490
4491/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4492GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4493GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4494GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4495GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4496
4497/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4498GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4499GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4500GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4501GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4502
4503/* Vector Widening Integer Reduction Instructions */
4504/* signed sum reduction into double-width accumulator */
3479a814
FC
4505GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4506GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4507GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4508
4509/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4510GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4511GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4512GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4513
4514/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4515#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4516void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4517 void *vs2, CPURISCVState *env, \
4518 uint32_t desc) \
4519{ \
523547f1
LZ
4520 uint32_t vm = vext_vm(desc); \
4521 uint32_t vl = env->vl; \
df4f52a7 4522 uint32_t esz = sizeof(TD); \
4523 uint32_t vlenb = simd_maxsz(desc); \
4524 uint32_t vta = vext_vta(desc); \
523547f1 4525 uint32_t i; \
523547f1
LZ
4526 TD s1 = *((TD *)vs1 + HD(0)); \
4527 \
f714361e 4528 for (i = env->vstart; i < vl; i++) { \
523547f1 4529 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4530 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4531 continue; \
4532 } \
4533 s1 = OP(s1, (TD)s2, &env->fp_status); \
4534 } \
4535 *((TD *)vd + HD(0)) = s1; \
f714361e 4536 env->vstart = 0; \
df4f52a7 4537 /* set tail elements to 1s */ \
4538 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4539}
4540
4541/* Unordered sum */
a3ab69f9
YL
4542GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4543GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4544GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4545
4546/* Ordered sum */
4547GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4548GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4549GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4550
4551/* Maximum value */
246f8796
WL
4552GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4553 float16_maximum_number)
4554GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4555 float32_maximum_number)
4556GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4557 float64_maximum_number)
523547f1
LZ
4558
4559/* Minimum value */
246f8796
WL
4560GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4561 float16_minimum_number)
4562GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4563 float32_minimum_number)
4564GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4565 float64_minimum_number)
696b0c26 4566
5bda21c0
YL
4567/* Vector Widening Floating-Point Add Instructions */
4568static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
696b0c26 4569{
5bda21c0 4570 return float32_add(a, float16_to_float32(b, true, s), s);
696b0c26
LZ
4571}
4572
5bda21c0 4573static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
696b0c26 4574{
5bda21c0 4575 return float64_add(a, float32_to_float64(b, s), s);
696b0c26 4576}
c21f34ae 4577
5bda21c0 4578/* Vector Widening Floating-Point Reduction Instructions */
a3ab69f9
YL
4579/* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4580GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4581GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4582GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4583GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
5bda21c0 4584
c21f34ae 4585/*
3b57254d 4586 * Vector Mask Operations
c21f34ae
LZ
4587 */
4588/* Vector Mask-Register Logical Instructions */
4589#define GEN_VEXT_MASK_VV(NAME, OP) \
4590void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4591 void *vs2, CPURISCVState *env, \
4592 uint32_t desc) \
4593{ \
c21f34ae 4594 uint32_t vl = env->vl; \
58bc9063 4595 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
acc6ffd4 4596 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4597 uint32_t i; \
4598 int a, b; \
4599 \
df4252b2
DHB
4600 VSTART_CHECK_EARLY_EXIT(env); \
4601 \
f714361e 4602 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4603 a = vext_elem_mask(vs1, i); \
4604 b = vext_elem_mask(vs2, i); \
4605 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4606 } \
f714361e 4607 env->vstart = 0; \
3b57254d
WL
4608 /*
4609 * mask destination register are always tail-agnostic
4610 * set tail elements to 1s
acc6ffd4 4611 */ \
acc6ffd4 4612 if (vta_all_1s) { \
4613 for (; i < total_elems; i++) { \
4614 vext_set_elem_mask(vd, i, 1); \
4615 } \
4616 } \
c21f34ae
LZ
4617}
4618
4619#define DO_NAND(N, M) (!(N & M))
4620#define DO_ANDNOT(N, M) (N & !M)
4621#define DO_NOR(N, M) (!(N | M))
4622#define DO_ORNOT(N, M) (N | !M)
4623#define DO_XNOR(N, M) (!(N ^ M))
4624
4625GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4626GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4627GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4628GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4629GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4630GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4631GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4632GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4633
0014aa74
FC
4634/* Vector count population in mask vcpop */
4635target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4636 uint32_t desc)
2e88f551
LZ
4637{
4638 target_ulong cnt = 0;
2e88f551
LZ
4639 uint32_t vm = vext_vm(desc);
4640 uint32_t vl = env->vl;
4641 int i;
4642
f714361e 4643 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4644 if (vm || vext_elem_mask(v0, i)) {
4645 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4646 cnt++;
4647 }
4648 }
4649 }
f714361e 4650 env->vstart = 0;
2e88f551
LZ
4651 return cnt;
4652}
0db67e1c 4653
3b57254d 4654/* vfirst find-first-set mask bit */
d71a24fc
FC
4655target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4656 uint32_t desc)
0db67e1c 4657{
0db67e1c
LZ
4658 uint32_t vm = vext_vm(desc);
4659 uint32_t vl = env->vl;
4660 int i;
4661
f714361e 4662 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4663 if (vm || vext_elem_mask(v0, i)) {
4664 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4665 return i;
4666 }
4667 }
4668 }
f714361e 4669 env->vstart = 0;
0db67e1c
LZ
4670 return -1LL;
4671}
81fbf7da
LZ
4672
4673enum set_mask_type {
4674 ONLY_FIRST = 1,
4675 INCLUDE_FIRST,
4676 BEFORE_FIRST,
4677};
4678
4679static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4680 uint32_t desc, enum set_mask_type type)
4681{
81fbf7da
LZ
4682 uint32_t vm = vext_vm(desc);
4683 uint32_t vl = env->vl;
58bc9063 4684 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
acc6ffd4 4685 uint32_t vta_all_1s = vext_vta_all_1s(desc);
35f2d795 4686 uint32_t vma = vext_vma(desc);
81fbf7da
LZ
4687 int i;
4688 bool first_mask_bit = false;
4689
f714361e 4690 for (i = env->vstart; i < vl; i++) {
f9298de5 4691 if (!vm && !vext_elem_mask(v0, i)) {
35f2d795
YTC
4692 /* set masked-off elements to 1s */
4693 if (vma) {
4694 vext_set_elem_mask(vd, i, 1);
4695 }
81fbf7da
LZ
4696 continue;
4697 }
4698 /* write a zero to all following active elements */
4699 if (first_mask_bit) {
f9298de5 4700 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4701 continue;
4702 }
f9298de5 4703 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4704 first_mask_bit = true;
4705 if (type == BEFORE_FIRST) {
f9298de5 4706 vext_set_elem_mask(vd, i, 0);
81fbf7da 4707 } else {
f9298de5 4708 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4709 }
4710 } else {
4711 if (type == ONLY_FIRST) {
f9298de5 4712 vext_set_elem_mask(vd, i, 0);
81fbf7da 4713 } else {
f9298de5 4714 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4715 }
4716 }
4717 }
f714361e 4718 env->vstart = 0;
3b57254d
WL
4719 /*
4720 * mask destination register are always tail-agnostic
4721 * set tail elements to 1s
4722 */
acc6ffd4 4723 if (vta_all_1s) {
4724 for (; i < total_elems; i++) {
4725 vext_set_elem_mask(vd, i, 1);
4726 }
4727 }
81fbf7da
LZ
4728}
4729
4730void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4731 uint32_t desc)
4732{
4733 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4734}
4735
4736void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4737 uint32_t desc)
4738{
4739 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4740}
4741
4742void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4743 uint32_t desc)
4744{
4745 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4746}
78d90cfe
LZ
4747
4748/* Vector Iota Instruction */
3479a814 4749#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4750void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4751 uint32_t desc) \
4752{ \
78d90cfe
LZ
4753 uint32_t vm = vext_vm(desc); \
4754 uint32_t vl = env->vl; \
acc6ffd4 4755 uint32_t esz = sizeof(ETYPE); \
4756 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4757 uint32_t vta = vext_vta(desc); \
35f2d795 4758 uint32_t vma = vext_vma(desc); \
78d90cfe
LZ
4759 uint32_t sum = 0; \
4760 int i; \
4761 \
f714361e 4762 for (i = env->vstart; i < vl; i++) { \
f9298de5 4763 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4764 /* set masked-off elements to 1s */ \
4765 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
78d90cfe
LZ
4766 continue; \
4767 } \
4768 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4769 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4770 sum++; \
4771 } \
4772 } \
f714361e 4773 env->vstart = 0; \
acc6ffd4 4774 /* set tail elements to 1s */ \
4775 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4776}
4777
3479a814
FC
4778GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4779GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4780GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4781GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4782
4783/* Vector Element Index Instruction */
3479a814 4784#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4785void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4786{ \
126bec3f
LZ
4787 uint32_t vm = vext_vm(desc); \
4788 uint32_t vl = env->vl; \
acc6ffd4 4789 uint32_t esz = sizeof(ETYPE); \
4790 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4791 uint32_t vta = vext_vta(desc); \
35f2d795 4792 uint32_t vma = vext_vma(desc); \
126bec3f
LZ
4793 int i; \
4794 \
df4252b2
DHB
4795 VSTART_CHECK_EARLY_EXIT(env); \
4796 \
f714361e 4797 for (i = env->vstart; i < vl; i++) { \
f9298de5 4798 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4799 /* set masked-off elements to 1s */ \
4800 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
126bec3f
LZ
4801 continue; \
4802 } \
4803 *((ETYPE *)vd + H(i)) = i; \
4804 } \
f714361e 4805 env->vstart = 0; \
acc6ffd4 4806 /* set tail elements to 1s */ \
4807 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4808}
4809
3479a814
FC
4810GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4811GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4812GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4813GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4814
4815/*
3b57254d 4816 * Vector Permutation Instructions
ec17e036
LZ
4817 */
4818
4819/* Vector Slide Instructions */
3479a814 4820#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4821void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4822 CPURISCVState *env, uint32_t desc) \
4823{ \
ec17e036
LZ
4824 uint32_t vm = vext_vm(desc); \
4825 uint32_t vl = env->vl; \
803963f7 4826 uint32_t esz = sizeof(ETYPE); \
4827 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4828 uint32_t vta = vext_vta(desc); \
edabcd0e 4829 uint32_t vma = vext_vma(desc); \
f714361e 4830 target_ulong offset = s1, i_min, i; \
ec17e036 4831 \
df4252b2
DHB
4832 VSTART_CHECK_EARLY_EXIT(env); \
4833 \
f714361e
FC
4834 i_min = MAX(env->vstart, offset); \
4835 for (i = i_min; i < vl; i++) { \
f9298de5 4836 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4837 /* set masked-off elements to 1s */ \
4838 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
ec17e036
LZ
4839 continue; \
4840 } \
4841 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4842 } \
d3646e31 4843 env->vstart = 0; \
803963f7 4844 /* set tail elements to 1s */ \
4845 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4846}
4847
4848/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4849GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4850GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4851GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4852GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4853
3479a814 4854#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4855void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4856 CPURISCVState *env, uint32_t desc) \
4857{ \
6438ed61 4858 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4859 uint32_t vm = vext_vm(desc); \
4860 uint32_t vl = env->vl; \
803963f7 4861 uint32_t esz = sizeof(ETYPE); \
4862 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4863 uint32_t vta = vext_vta(desc); \
edabcd0e 4864 uint32_t vma = vext_vma(desc); \
f3f65c40 4865 target_ulong i_max, i_min, i; \
ec17e036 4866 \
df4252b2
DHB
4867 VSTART_CHECK_EARLY_EXIT(env); \
4868 \
f3f65c40
AF
4869 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
4870 i_max = MAX(i_min, env->vstart); \
f714361e 4871 for (i = env->vstart; i < i_max; ++i) { \
edabcd0e
YTC
4872 if (!vm && !vext_elem_mask(v0, i)) { \
4873 /* set masked-off elements to 1s */ \
4874 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4875 continue; \
6438ed61 4876 } \
edabcd0e 4877 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
6438ed61
FC
4878 } \
4879 \
4880 for (i = i_max; i < vl; ++i) { \
4881 if (vm || vext_elem_mask(v0, i)) { \
4882 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4883 } \
ec17e036 4884 } \
f714361e
FC
4885 \
4886 env->vstart = 0; \
803963f7 4887 /* set tail elements to 1s */ \
4888 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4889}
4890
4891/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4892GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4893GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4894GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4895GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4896
c7b8a421 4897#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
8c89d50c 4898static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
4899 void *vs2, CPURISCVState *env, \
4900 uint32_t desc) \
8500d4ab 4901{ \
c7b8a421 4902 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4903 uint32_t vm = vext_vm(desc); \
4904 uint32_t vl = env->vl; \
803963f7 4905 uint32_t esz = sizeof(ETYPE); \
4906 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4907 uint32_t vta = vext_vta(desc); \
edabcd0e 4908 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
4909 uint32_t i; \
4910 \
df4252b2
DHB
4911 VSTART_CHECK_EARLY_EXIT(env); \
4912 \
f714361e 4913 for (i = env->vstart; i < vl; i++) { \
8500d4ab 4914 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4915 /* set masked-off elements to 1s */ \
4916 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
4917 continue; \
4918 } \
4919 if (i == 0) { \
4920 *((ETYPE *)vd + H(i)) = s1; \
4921 } else { \
4922 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4923 } \
4924 } \
f714361e 4925 env->vstart = 0; \
803963f7 4926 /* set tail elements to 1s */ \
4927 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
4928}
4929
4930GEN_VEXT_VSLIE1UP(8, H1)
4931GEN_VEXT_VSLIE1UP(16, H2)
4932GEN_VEXT_VSLIE1UP(32, H4)
4933GEN_VEXT_VSLIE1UP(64, H8)
4934
c7b8a421 4935#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
4936void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4937 CPURISCVState *env, uint32_t desc) \
4938{ \
c7b8a421 4939 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4940}
4941
4942/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
4943GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4944GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4945GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4946GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4947
c7b8a421 4948#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
8c89d50c 4949static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
4950 void *vs2, CPURISCVState *env, \
4951 uint32_t desc) \
8500d4ab 4952{ \
c7b8a421 4953 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4954 uint32_t vm = vext_vm(desc); \
4955 uint32_t vl = env->vl; \
803963f7 4956 uint32_t esz = sizeof(ETYPE); \
4957 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4958 uint32_t vta = vext_vta(desc); \
edabcd0e 4959 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
4960 uint32_t i; \
4961 \
df4252b2
DHB
4962 VSTART_CHECK_EARLY_EXIT(env); \
4963 \
f714361e 4964 for (i = env->vstart; i < vl; i++) { \
8500d4ab 4965 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4966 /* set masked-off elements to 1s */ \
4967 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
4968 continue; \
4969 } \
4970 if (i == vl - 1) { \
4971 *((ETYPE *)vd + H(i)) = s1; \
4972 } else { \
4973 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4974 } \
4975 } \
f714361e 4976 env->vstart = 0; \
803963f7 4977 /* set tail elements to 1s */ \
4978 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
4979}
4980
4981GEN_VEXT_VSLIDE1DOWN(8, H1)
4982GEN_VEXT_VSLIDE1DOWN(16, H2)
4983GEN_VEXT_VSLIDE1DOWN(32, H4)
4984GEN_VEXT_VSLIDE1DOWN(64, H8)
4985
c7b8a421 4986#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
4987void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4988 CPURISCVState *env, uint32_t desc) \
4989{ \
c7b8a421 4990 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4991}
4992
4993/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
4994GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4995GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4996GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4997GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4998
4999/* Vector Floating-Point Slide Instructions */
c7b8a421 5000#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
5001void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5002 CPURISCVState *env, uint32_t desc) \
5003{ \
c7b8a421 5004 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5005}
5006
5007/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5008GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5009GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5010GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5011
c7b8a421 5012#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
5013void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5014 CPURISCVState *env, uint32_t desc) \
5015{ \
c7b8a421 5016 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5017}
5018
5019/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5020GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5021GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5022GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
5023
5024/* Vector Register Gather Instruction */
50bfb45b 5025#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
5026void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5027 CPURISCVState *env, uint32_t desc) \
5028{ \
f714361e 5029 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
5030 uint32_t vm = vext_vm(desc); \
5031 uint32_t vl = env->vl; \
803963f7 5032 uint32_t esz = sizeof(TS2); \
5033 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5034 uint32_t vta = vext_vta(desc); \
edabcd0e 5035 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5036 uint64_t index; \
5037 uint32_t i; \
e4b83d5c 5038 \
df4252b2
DHB
5039 VSTART_CHECK_EARLY_EXIT(env); \
5040 \
f714361e 5041 for (i = env->vstart; i < vl; i++) { \
f9298de5 5042 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5043 /* set masked-off elements to 1s */ \
5044 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5045 continue; \
5046 } \
50bfb45b 5047 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 5048 if (index >= vlmax) { \
50bfb45b 5049 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 5050 } else { \
50bfb45b 5051 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
5052 } \
5053 } \
f714361e 5054 env->vstart = 0; \
803963f7 5055 /* set tail elements to 1s */ \
5056 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5057}
5058
5059/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
5060GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5061GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5062GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5063GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5064
5065GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5066GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5067GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5068GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5069
3479a814 5070#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5071void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5072 CPURISCVState *env, uint32_t desc) \
5073{ \
5a9f8e15 5074 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5075 uint32_t vm = vext_vm(desc); \
5076 uint32_t vl = env->vl; \
803963f7 5077 uint32_t esz = sizeof(ETYPE); \
5078 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5079 uint32_t vta = vext_vta(desc); \
edabcd0e 5080 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5081 uint64_t index = s1; \
5082 uint32_t i; \
e4b83d5c 5083 \
df4252b2
DHB
5084 VSTART_CHECK_EARLY_EXIT(env); \
5085 \
f714361e 5086 for (i = env->vstart; i < vl; i++) { \
f9298de5 5087 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5088 /* set masked-off elements to 1s */ \
5089 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5090 continue; \
5091 } \
5092 if (index >= vlmax) { \
5093 *((ETYPE *)vd + H(i)) = 0; \
5094 } else { \
5095 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5096 } \
5097 } \
f714361e 5098 env->vstart = 0; \
803963f7 5099 /* set tail elements to 1s */ \
5100 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5101}
5102
5103/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5104GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5105GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5106GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5107GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5108
5109/* Vector Compress Instruction */
3479a814 5110#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5111void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5112 CPURISCVState *env, uint32_t desc) \
5113{ \
31bf42a2 5114 uint32_t vl = env->vl; \
803963f7 5115 uint32_t esz = sizeof(ETYPE); \
5116 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5117 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5118 uint32_t num = 0, i; \
5119 \
f714361e 5120 for (i = env->vstart; i < vl; i++) { \
f9298de5 5121 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5122 continue; \
5123 } \
5124 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5125 num++; \
5126 } \
f714361e 5127 env->vstart = 0; \
803963f7 5128 /* set tail elements to 1s */ \
5129 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5130}
5131
5132/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5133GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5134GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5135GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5136GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5137
f714361e 5138/* Vector Whole Register Move */
f32d82f6
WL
5139void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5140{
f06193c4 5141 /* EEW = SEW */
f32d82f6 5142 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5143 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5144 uint32_t startb = env->vstart * sewb;
5145 uint32_t i = startb;
f32d82f6 5146
7e53e3dd
DHB
5147 if (startb >= maxsz) {
5148 env->vstart = 0;
5149 return;
5150 }
5151
768e7b32
DHB
5152 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5153 uint32_t j = ROUND_UP(i, 8);
5154 memcpy((uint8_t *)vd + H1(j - 1),
5155 (uint8_t *)vs2 + H1(j - 1),
5156 j - i);
5157 i = j;
5158 }
5159
f32d82f6
WL
5160 memcpy((uint8_t *)vd + H1(i),
5161 (uint8_t *)vs2 + H1(i),
768e7b32 5162 maxsz - i);
f714361e 5163
f32d82f6
WL
5164 env->vstart = 0;
5165}
f714361e 5166
cd01340e
FC
5167/* Vector Integer Extension */
5168#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5169void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5170 CPURISCVState *env, uint32_t desc) \
5171{ \
5172 uint32_t vl = env->vl; \
5173 uint32_t vm = vext_vm(desc); \
803963f7 5174 uint32_t esz = sizeof(ETYPE); \
5175 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5176 uint32_t vta = vext_vta(desc); \
edabcd0e 5177 uint32_t vma = vext_vma(desc); \
cd01340e
FC
5178 uint32_t i; \
5179 \
df4252b2
DHB
5180 VSTART_CHECK_EARLY_EXIT(env); \
5181 \
f714361e 5182 for (i = env->vstart; i < vl; i++) { \
cd01340e 5183 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5184 /* set masked-off elements to 1s */ \
5185 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
cd01340e
FC
5186 continue; \
5187 } \
5188 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5189 } \
f714361e 5190 env->vstart = 0; \
803963f7 5191 /* set tail elements to 1s */ \
5192 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5193}
5194
5195GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5196GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5197GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5198GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5199GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5200GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5201
5202GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5203GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5204GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5205GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5206GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5207GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)