]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
target/riscv: always clear vstart for ldst_whole insns
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc 24#include "exec/exec-all.h"
09b07f28 25#include "exec/cpu_ldst.h"
2b7168fc 26#include "exec/helper-proto.h"
ce2a0343 27#include "fpu/softfloat.h"
751538d5
LZ
28#include "tcg/tcg-gvec-desc.h"
29#include "internals.h"
98f40dd2 30#include "vector_internals.h"
2b7168fc
LZ
31#include <math.h>
32
33target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
34 target_ulong s2)
35{
36 int vlmax, vl;
37 RISCVCPU *cpu = env_archcpu(env);
cd21576d
DHB
38 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
39 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
40 uint16_t sew = 8 << vsew;
2b7168fc 41 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
42 int xlen = riscv_cpu_xlen(env);
43 bool vill = (s2 >> (xlen - 1)) & 0x1;
44 target_ulong reserved = s2 &
45 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
46 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
938dd05e 47 uint16_t vlen = cpu->cfg.vlenb << 3;
cd21576d 48 int8_t lmul;
2b7168fc 49
cd21576d 50 if (vlmul & 4) {
7aa4d519
DHB
51 /*
52 * Fractional LMUL, check:
53 *
54 * VLEN * LMUL >= SEW
55 * VLEN >> (8 - lmul) >= sew
56 * (vlenb << 3) >> (8 - lmul) >= sew
7aa4d519 57 */
938dd05e 58 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
d9b7609a
FC
59 vill = true;
60 }
61 }
62
c45eff30 63 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
2b7168fc 64 /* only set vill bit. */
d96a271a
LZ
65 env->vill = 1;
66 env->vtype = 0;
2b7168fc
LZ
67 env->vl = 0;
68 env->vstart = 0;
69 return 0;
70 }
71
cd21576d
DHB
72 /* lmul encoded as in DisasContext::lmul */
73 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
74 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
2b7168fc
LZ
75 if (s1 <= vlmax) {
76 vl = s1;
77 } else {
78 vl = vlmax;
79 }
80 env->vl = vl;
81 env->vtype = s2;
82 env->vstart = 0;
ac6bcf4d 83 env->vill = 0;
2b7168fc
LZ
84 return vl;
85}
751538d5 86
751538d5 87/*
5a9f8e15 88 * Get the maximum number of elements can be operated.
751538d5 89 *
c7b8a421 90 * log2_esz: log2 of element size in bytes.
751538d5 91 */
c7b8a421 92static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 93{
5a9f8e15 94 /*
8a4b5257 95 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
96 * so vlen in bytes (vlenb) is encoded as maxsz.
97 */
98 uint32_t vlenb = simd_maxsz(desc);
99
100 /* Return VLMAX */
c7b8a421 101 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 102 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
103}
104
d6b9d930
LZ
105static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
106{
7b945bdc 107 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
d6b9d930
LZ
108}
109
751538d5
LZ
110/*
111 * This function checks watchpoint before real load operation.
112 *
7893e42d 113 * In system mode, the TLB API probe_access is enough for watchpoint check.
751538d5
LZ
114 * In user mode, there is no watchpoint support now.
115 *
116 * It will trigger an exception if there is no mapping in TLB
117 * and page table walk can't fill the TLB entry. Then the guest
118 * software can return here after process the exception or never return.
119 */
120static void probe_pages(CPURISCVState *env, target_ulong addr,
121 target_ulong len, uintptr_t ra,
122 MMUAccessType access_type)
123{
124 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
125 target_ulong curlen = MIN(pagelen, len);
d9996d09 126 int mmu_index = riscv_env_mmu_index(env, false);
751538d5 127
d6b9d930 128 probe_access(env, adjust_addr(env, addr), curlen, access_type,
d9996d09 129 mmu_index, ra);
751538d5
LZ
130 if (len > curlen) {
131 addr += curlen;
132 curlen = len - curlen;
d6b9d930 133 probe_access(env, adjust_addr(env, addr), curlen, access_type,
d9996d09 134 mmu_index, ra);
751538d5
LZ
135 }
136}
137
f9298de5
FC
138static inline void vext_set_elem_mask(void *v0, int index,
139 uint8_t value)
3a6f8f68 140{
f9298de5
FC
141 int idx = index / 64;
142 int pos = index % 64;
3a6f8f68 143 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 144 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 145}
751538d5 146
751538d5 147/* elements operations for load and store */
022b9bce 148typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
751538d5 149 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 150
79556fb6 151#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
152static void NAME(CPURISCVState *env, abi_ptr addr, \
153 uint32_t idx, void *vd, uintptr_t retaddr)\
154{ \
751538d5 155 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 156 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
157} \
158
79556fb6
FC
159GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
160GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
161GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
162GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
163
164#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
165static void NAME(CPURISCVState *env, abi_ptr addr, \
166 uint32_t idx, void *vd, uintptr_t retaddr)\
167{ \
168 ETYPE data = *((ETYPE *)vd + H(idx)); \
169 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
170}
171
751538d5
LZ
172GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
173GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
174GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
175GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
176
949b6bcb
XW
177static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
178 uint32_t desc, uint32_t nf,
e130683f
DHB
179 uint32_t esz, uint32_t max_elems)
180{
e130683f 181 uint32_t vta = vext_vta(desc);
e130683f
DHB
182 int k;
183
bc0ec52e
DHB
184 if (vta == 0) {
185 return;
186 }
187
e130683f
DHB
188 for (k = 0; k < nf; ++k) {
189 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
190 (k * max_elems + max_elems) * esz);
191 }
e130683f
DHB
192}
193
751538d5 194/*
3b57254d 195 * stride: access vector element from strided memory
751538d5
LZ
196 */
197static void
198vext_ldst_stride(void *vd, void *v0, target_ulong base,
199 target_ulong stride, CPURISCVState *env,
200 uint32_t desc, uint32_t vm,
3479a814 201 vext_ldst_elem_fn *ldst_elem,
c7b8a421 202 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
203{
204 uint32_t i, k;
205 uint32_t nf = vext_nf(desc);
c7b8a421 206 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 207 uint32_t esz = 1 << log2_esz;
265ecd4c 208 uint32_t vma = vext_vma(desc);
751538d5 209
f714361e 210 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
751538d5 211 k = 0;
751538d5 212 while (k < nf) {
265ecd4c
YTC
213 if (!vm && !vext_elem_mask(v0, i)) {
214 /* set masked-off elements to 1s */
215 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
216 (i + k * max_elems + 1) * esz);
217 k++;
218 continue;
219 }
c7b8a421 220 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 221 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
222 k++;
223 }
224 }
f714361e 225 env->vstart = 0;
e130683f 226
949b6bcb 227 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
751538d5
LZ
228}
229
79556fb6 230#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
231void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
232 target_ulong stride, CPURISCVState *env, \
233 uint32_t desc) \
234{ \
235 uint32_t vm = vext_vm(desc); \
236 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 237 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
238}
239
79556fb6
FC
240GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
241GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
242GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
243GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
244
245#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
246void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
247 target_ulong stride, CPURISCVState *env, \
248 uint32_t desc) \
249{ \
250 uint32_t vm = vext_vm(desc); \
251 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 252 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
253}
254
79556fb6
FC
255GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
256GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
257GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
258GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
259
260/*
3b57254d 261 * unit-stride: access elements stored contiguously in memory
751538d5
LZ
262 */
263
3b57254d 264/* unmasked unit-stride load and store operation */
751538d5
LZ
265static void
266vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 267 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 268 uintptr_t ra)
751538d5
LZ
269{
270 uint32_t i, k;
271 uint32_t nf = vext_nf(desc);
c7b8a421 272 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 273 uint32_t esz = 1 << log2_esz;
751538d5 274
751538d5 275 /* load bytes from guest memory */
5c89e9c0 276 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
277 k = 0;
278 while (k < nf) {
c7b8a421 279 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 280 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
281 k++;
282 }
283 }
f714361e 284 env->vstart = 0;
e130683f 285
949b6bcb 286 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
751538d5
LZ
287}
288
289/*
246f8796 290 * masked unit-stride load and store operation will be a special case of
fba59e0f 291 * stride, stride = NF * sizeof (ETYPE)
751538d5
LZ
292 */
293
79556fb6 294#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
295void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
296 CPURISCVState *env, uint32_t desc) \
297{ \
5a9f8e15 298 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 299 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 300 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
301} \
302 \
303void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
304 CPURISCVState *env, uint32_t desc) \
305{ \
3479a814 306 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 307 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
308}
309
79556fb6
FC
310GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
311GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
312GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
313GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
314
5c89e9c0
FC
315#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
316void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
317 CPURISCVState *env, uint32_t desc) \
318{ \
319 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
320 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 321 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
322} \
323 \
324void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
325 CPURISCVState *env, uint32_t desc) \
326{ \
327 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 328 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
329}
330
79556fb6
FC
331GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
332GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
333GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
334GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 335
26086aea 336/*
3b57254d 337 * unit stride mask load and store, EEW = 1
26086aea
FC
338 */
339void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
340 CPURISCVState *env, uint32_t desc)
341{
342 /* evl = ceil(vl/8) */
343 uint8_t evl = (env->vl + 7) >> 3;
344 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 345 0, evl, GETPC());
26086aea
FC
346}
347
348void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
349 CPURISCVState *env, uint32_t desc)
350{
351 /* evl = ceil(vl/8) */
352 uint8_t evl = (env->vl + 7) >> 3;
353 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 354 0, evl, GETPC());
26086aea
FC
355}
356
f732560e 357/*
3b57254d 358 * index: access vector element from indexed memory
f732560e
LZ
359 */
360typedef target_ulong vext_get_index_addr(target_ulong base,
361 uint32_t idx, void *vs2);
362
363#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
364static target_ulong NAME(target_ulong base, \
365 uint32_t idx, void *vs2) \
366{ \
367 return (base + *((ETYPE *)vs2 + H(idx))); \
368}
369
83fcd573
FC
370GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
371GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
372GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
373GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
374
375static inline void
376vext_ldst_index(void *vd, void *v0, target_ulong base,
377 void *vs2, CPURISCVState *env, uint32_t desc,
378 vext_get_index_addr get_index_addr,
379 vext_ldst_elem_fn *ldst_elem,
c7b8a421 380 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
381{
382 uint32_t i, k;
383 uint32_t nf = vext_nf(desc);
384 uint32_t vm = vext_vm(desc);
c7b8a421 385 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 386 uint32_t esz = 1 << log2_esz;
265ecd4c 387 uint32_t vma = vext_vma(desc);
f732560e 388
f732560e 389 /* load bytes from guest memory */
f714361e 390 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f714361e 391 k = 0;
f732560e 392 while (k < nf) {
265ecd4c
YTC
393 if (!vm && !vext_elem_mask(v0, i)) {
394 /* set masked-off elements to 1s */
395 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
396 (i + k * max_elems + 1) * esz);
397 k++;
398 continue;
399 }
c7b8a421 400 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 401 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
402 k++;
403 }
404 }
f714361e 405 env->vstart = 0;
e130683f 406
949b6bcb 407 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
f732560e
LZ
408}
409
08b9d0ed 410#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
411void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
412 void *vs2, CPURISCVState *env, uint32_t desc) \
413{ \
414 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 415 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
416}
417
08b9d0ed
FC
418GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
419GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
420GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
421GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
422GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
423GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
424GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
425GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
426GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
427GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
428GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
429GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
430GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
431GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
432GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
433GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
434
435#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
436void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
437 void *vs2, CPURISCVState *env, uint32_t desc) \
438{ \
439 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 440 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 441 GETPC()); \
f732560e
LZ
442}
443
08b9d0ed
FC
444GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
445GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
446GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
447GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
448GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
449GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
450GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
451GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
452GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
453GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
454GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
455GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
456GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
457GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
458GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
459GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
460
461/*
3b57254d 462 * unit-stride fault-only-fisrt load instructions
022b4ecf
LZ
463 */
464static inline void
465vext_ldff(void *vd, void *v0, target_ulong base,
466 CPURISCVState *env, uint32_t desc,
467 vext_ldst_elem_fn *ldst_elem,
c7b8a421 468 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
469{
470 void *host;
471 uint32_t i, k, vl = 0;
022b4ecf
LZ
472 uint32_t nf = vext_nf(desc);
473 uint32_t vm = vext_vm(desc);
c7b8a421 474 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 475 uint32_t esz = 1 << log2_esz;
265ecd4c 476 uint32_t vma = vext_vma(desc);
022b4ecf 477 target_ulong addr, offset, remain;
d9996d09 478 int mmu_index = riscv_env_mmu_index(env, false);
022b4ecf 479
3b57254d 480 /* probe every access */
f714361e 481 for (i = env->vstart; i < env->vl; i++) {
f9298de5 482 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
483 continue;
484 }
c7b8a421 485 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 486 if (i == 0) {
c7b8a421 487 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
488 } else {
489 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 490 remain = nf << log2_esz;
022b4ecf
LZ
491 while (remain > 0) {
492 offset = -(addr | TARGET_PAGE_MASK);
d9996d09 493 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
022b4ecf
LZ
494 if (host) {
495#ifdef CONFIG_USER_ONLY
4cc9f284 496 if (!page_check_range(addr, offset, PAGE_READ)) {
022b4ecf
LZ
497 vl = i;
498 goto ProbeSuccess;
499 }
500#else
01d09525 501 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
502#endif
503 } else {
504 vl = i;
505 goto ProbeSuccess;
506 }
507 if (remain <= offset) {
508 break;
509 }
510 remain -= offset;
d6b9d930 511 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
512 }
513 }
514 }
515ProbeSuccess:
516 /* load bytes from guest memory */
517 if (vl != 0) {
518 env->vl = vl;
519 }
f714361e 520 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 521 k = 0;
022b4ecf 522 while (k < nf) {
265ecd4c
YTC
523 if (!vm && !vext_elem_mask(v0, i)) {
524 /* set masked-off elements to 1s */
525 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
526 (i + k * max_elems + 1) * esz);
527 k++;
528 continue;
529 }
f3f65c40 530 addr = base + ((i * nf + k) << log2_esz);
d6b9d930 531 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
532 k++;
533 }
534 }
f714361e 535 env->vstart = 0;
e130683f 536
949b6bcb 537 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
022b4ecf
LZ
538}
539
d3e5e2ff
FC
540#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
541void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
542 CPURISCVState *env, uint32_t desc) \
543{ \
544 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 545 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
546}
547
d3e5e2ff
FC
548GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
549GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
550GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
551GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 552
268fcca6
LZ
553#define DO_SWAP(N, M) (M)
554#define DO_AND(N, M) (N & M)
555#define DO_XOR(N, M) (N ^ M)
556#define DO_OR(N, M) (N | M)
557#define DO_ADD(N, M) (N + M)
558
268fcca6
LZ
559/* Signed min/max */
560#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
561#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
562
30206bd8 563/*
3b57254d 564 * load and store whole register instructions
30206bd8
FC
565 */
566static void
567vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 568 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 569{
f714361e 570 uint32_t i, k, off, pos;
30206bd8 571 uint32_t nf = vext_nf(desc);
58bc9063 572 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
c7b8a421 573 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 574
929e521a
DHB
575 if (env->vstart >= ((vlenb * nf) >> log2_esz)) {
576 env->vstart = 0;
577 return;
578 }
579
f714361e
FC
580 k = env->vstart / max_elems;
581 off = env->vstart % max_elems;
30206bd8 582
f714361e
FC
583 if (off) {
584 /* load/store rest of elements of current segment pointed by vstart */
585 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 586 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
246f8796
WL
587 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd,
588 ra);
f714361e
FC
589 }
590 k++;
591 }
592
593 /* load/store elements for rest of segments */
594 for (; k < nf; k++) {
595 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 596 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 597 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
598 }
599 }
f714361e
FC
600
601 env->vstart = 0;
30206bd8
FC
602}
603
604#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
605void HELPER(NAME)(void *vd, target_ulong base, \
606 CPURISCVState *env, uint32_t desc) \
607{ \
608 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 609 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
610}
611
612GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
613GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
614GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
615GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
616GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
617GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
618GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
619GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
620GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
621GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
622GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
623GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
624GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
625GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
626GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
627GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
628
629#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
630void HELPER(NAME)(void *vd, target_ulong base, \
631 CPURISCVState *env, uint32_t desc) \
632{ \
633 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 634 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
635}
636
637GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
638GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
639GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
640GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
641
43740e3a 642/*
3b57254d 643 * Vector Integer Arithmetic Instructions
43740e3a
LZ
644 */
645
43740e3a
LZ
646/* (TD, T1, T2, TX1, TX2) */
647#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
648#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
649#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
650#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
958b85f3
LZ
651#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
652#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
653#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
654#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
655#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
656#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
657#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
658#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
659#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
660#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
661#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
662#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
663#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
664#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
665#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
666#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
667#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
668#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
669#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a 670
43740e3a
LZ
671#define DO_SUB(N, M) (N - M)
672#define DO_RSUB(N, M) (M - N)
673
674RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
675RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
676RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
677RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
678RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
679RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
680RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
681RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
682
f1eed927 683GEN_VEXT_VV(vadd_vv_b, 1)
684GEN_VEXT_VV(vadd_vv_h, 2)
685GEN_VEXT_VV(vadd_vv_w, 4)
686GEN_VEXT_VV(vadd_vv_d, 8)
687GEN_VEXT_VV(vsub_vv_b, 1)
688GEN_VEXT_VV(vsub_vv_h, 2)
689GEN_VEXT_VV(vsub_vv_w, 4)
690GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a 691
43740e3a
LZ
692
693RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
694RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
695RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
696RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
697RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
698RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
699RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
700RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
701RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
702RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
703RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
704RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
705
5c19fc15 706GEN_VEXT_VX(vadd_vx_b, 1)
707GEN_VEXT_VX(vadd_vx_h, 2)
708GEN_VEXT_VX(vadd_vx_w, 4)
709GEN_VEXT_VX(vadd_vx_d, 8)
710GEN_VEXT_VX(vsub_vx_b, 1)
711GEN_VEXT_VX(vsub_vx_h, 2)
712GEN_VEXT_VX(vsub_vx_w, 4)
713GEN_VEXT_VX(vsub_vx_d, 8)
714GEN_VEXT_VX(vrsub_vx_b, 1)
715GEN_VEXT_VX(vrsub_vx_h, 2)
716GEN_VEXT_VX(vrsub_vx_w, 4)
717GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
718
719void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
720{
721 intptr_t oprsz = simd_oprsz(desc);
722 intptr_t i;
723
724 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
725 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
726 }
727}
728
729void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
730{
731 intptr_t oprsz = simd_oprsz(desc);
732 intptr_t i;
733
734 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
735 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
736 }
737}
738
739void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
740{
741 intptr_t oprsz = simd_oprsz(desc);
742 intptr_t i;
743
744 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
745 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
746 }
747}
748
749void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
750{
751 intptr_t oprsz = simd_oprsz(desc);
752 intptr_t i;
753
754 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
755 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
756 }
757}
8fcdf776
LZ
758
759/* Vector Widening Integer Add/Subtract */
760#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
761#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
762#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
763#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
764#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
765#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
766#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
767#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
768#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
769#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
770#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
771#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
772RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
773RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
774RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
775RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
776RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
777RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
778RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
779RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
780RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
781RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
782RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
783RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
784RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
785RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
786RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
787RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
788RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
789RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
790RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
791RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
792RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
793RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
794RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
795RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 796GEN_VEXT_VV(vwaddu_vv_b, 2)
797GEN_VEXT_VV(vwaddu_vv_h, 4)
798GEN_VEXT_VV(vwaddu_vv_w, 8)
799GEN_VEXT_VV(vwsubu_vv_b, 2)
800GEN_VEXT_VV(vwsubu_vv_h, 4)
801GEN_VEXT_VV(vwsubu_vv_w, 8)
802GEN_VEXT_VV(vwadd_vv_b, 2)
803GEN_VEXT_VV(vwadd_vv_h, 4)
804GEN_VEXT_VV(vwadd_vv_w, 8)
805GEN_VEXT_VV(vwsub_vv_b, 2)
806GEN_VEXT_VV(vwsub_vv_h, 4)
807GEN_VEXT_VV(vwsub_vv_w, 8)
808GEN_VEXT_VV(vwaddu_wv_b, 2)
809GEN_VEXT_VV(vwaddu_wv_h, 4)
810GEN_VEXT_VV(vwaddu_wv_w, 8)
811GEN_VEXT_VV(vwsubu_wv_b, 2)
812GEN_VEXT_VV(vwsubu_wv_h, 4)
813GEN_VEXT_VV(vwsubu_wv_w, 8)
814GEN_VEXT_VV(vwadd_wv_b, 2)
815GEN_VEXT_VV(vwadd_wv_h, 4)
816GEN_VEXT_VV(vwadd_wv_w, 8)
817GEN_VEXT_VV(vwsub_wv_b, 2)
818GEN_VEXT_VV(vwsub_wv_h, 4)
819GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
820
821RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
822RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
823RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
824RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
825RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
826RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
827RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
828RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
829RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
830RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
831RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
832RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
833RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
834RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
835RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
836RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
837RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
838RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
839RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
840RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
841RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
842RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
843RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
844RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 845GEN_VEXT_VX(vwaddu_vx_b, 2)
846GEN_VEXT_VX(vwaddu_vx_h, 4)
847GEN_VEXT_VX(vwaddu_vx_w, 8)
848GEN_VEXT_VX(vwsubu_vx_b, 2)
849GEN_VEXT_VX(vwsubu_vx_h, 4)
850GEN_VEXT_VX(vwsubu_vx_w, 8)
851GEN_VEXT_VX(vwadd_vx_b, 2)
852GEN_VEXT_VX(vwadd_vx_h, 4)
853GEN_VEXT_VX(vwadd_vx_w, 8)
854GEN_VEXT_VX(vwsub_vx_b, 2)
855GEN_VEXT_VX(vwsub_vx_h, 4)
856GEN_VEXT_VX(vwsub_vx_w, 8)
857GEN_VEXT_VX(vwaddu_wx_b, 2)
858GEN_VEXT_VX(vwaddu_wx_h, 4)
859GEN_VEXT_VX(vwaddu_wx_w, 8)
860GEN_VEXT_VX(vwsubu_wx_b, 2)
861GEN_VEXT_VX(vwsubu_wx_h, 4)
862GEN_VEXT_VX(vwsubu_wx_w, 8)
863GEN_VEXT_VX(vwadd_wx_b, 2)
864GEN_VEXT_VX(vwadd_wx_h, 4)
865GEN_VEXT_VX(vwadd_wx_w, 8)
866GEN_VEXT_VX(vwsub_wx_b, 2)
867GEN_VEXT_VX(vwsub_wx_h, 4)
868GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
869
870/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
871#define DO_VADC(N, M, C) (N + M + C)
872#define DO_VSBC(N, M, C) (N - M - C)
873
3479a814 874#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
875void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
876 CPURISCVState *env, uint32_t desc) \
877{ \
3a6f8f68 878 uint32_t vl = env->vl; \
5c19fc15 879 uint32_t esz = sizeof(ETYPE); \
880 uint32_t total_elems = \
881 vext_get_total_elems(env, desc, esz); \
882 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
883 uint32_t i; \
884 \
f714361e 885 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
886 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
887 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 888 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
889 \
890 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
891 } \
f714361e 892 env->vstart = 0; \
5c19fc15 893 /* set tail elements to 1s */ \
894 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
895}
896
3479a814
FC
897GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
898GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
899GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
900GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 901
3479a814
FC
902GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
903GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
904GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
905GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 906
3479a814 907#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
908void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
909 CPURISCVState *env, uint32_t desc) \
910{ \
3a6f8f68 911 uint32_t vl = env->vl; \
5c19fc15 912 uint32_t esz = sizeof(ETYPE); \
913 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
914 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
915 uint32_t i; \
916 \
f714361e 917 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 918 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 919 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
920 \
921 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
922 } \
c45eff30 923 env->vstart = 0; \
5c19fc15 924 /* set tail elements to 1s */ \
925 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
926}
927
3479a814
FC
928GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
929GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
930GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
931GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 932
3479a814
FC
933GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
934GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
935GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
936GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
937
938#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
939 (__typeof(N))(N + M) < N)
940#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
941
942#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
943void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
944 CPURISCVState *env, uint32_t desc) \
945{ \
3a6f8f68 946 uint32_t vl = env->vl; \
bb45485a 947 uint32_t vm = vext_vm(desc); \
58bc9063 948 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5c19fc15 949 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
950 uint32_t i; \
951 \
f714361e 952 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
953 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
954 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 955 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 956 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 957 } \
f714361e 958 env->vstart = 0; \
3b57254d
WL
959 /*
960 * mask destination register are always tail-agnostic
961 * set tail elements to 1s
962 */ \
5c19fc15 963 if (vta_all_1s) { \
964 for (; i < total_elems; i++) { \
965 vext_set_elem_mask(vd, i, 1); \
966 } \
967 } \
3a6f8f68
LZ
968}
969
970GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
971GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
972GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
973GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
974
975GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
976GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
977GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
978GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
979
980#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
981void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
982 void *vs2, CPURISCVState *env, uint32_t desc) \
983{ \
3a6f8f68 984 uint32_t vl = env->vl; \
bb45485a 985 uint32_t vm = vext_vm(desc); \
58bc9063 986 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5c19fc15 987 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
988 uint32_t i; \
989 \
f714361e 990 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 991 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 992 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 993 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
994 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
995 } \
f714361e 996 env->vstart = 0; \
3b57254d
WL
997 /*
998 * mask destination register are always tail-agnostic
999 * set tail elements to 1s
1000 */ \
5c19fc15 1001 if (vta_all_1s) { \
1002 for (; i < total_elems; i++) { \
1003 vext_set_elem_mask(vd, i, 1); \
1004 } \
1005 } \
3a6f8f68
LZ
1006}
1007
1008GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1009GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1010GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1011GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1012
1013GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1014GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1015GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1016GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1017
1018/* Vector Bitwise Logical Instructions */
1019RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1020RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1021RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1022RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1023RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1024RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1025RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1026RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1027RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1028RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1029RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1030RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1031GEN_VEXT_VV(vand_vv_b, 1)
1032GEN_VEXT_VV(vand_vv_h, 2)
1033GEN_VEXT_VV(vand_vv_w, 4)
1034GEN_VEXT_VV(vand_vv_d, 8)
1035GEN_VEXT_VV(vor_vv_b, 1)
1036GEN_VEXT_VV(vor_vv_h, 2)
1037GEN_VEXT_VV(vor_vv_w, 4)
1038GEN_VEXT_VV(vor_vv_d, 8)
1039GEN_VEXT_VV(vxor_vv_b, 1)
1040GEN_VEXT_VV(vxor_vv_h, 2)
1041GEN_VEXT_VV(vxor_vv_w, 4)
1042GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1043
1044RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1045RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1046RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1047RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1048RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1049RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1050RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1051RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1052RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1053RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1054RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1055RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1056GEN_VEXT_VX(vand_vx_b, 1)
1057GEN_VEXT_VX(vand_vx_h, 2)
1058GEN_VEXT_VX(vand_vx_w, 4)
1059GEN_VEXT_VX(vand_vx_d, 8)
1060GEN_VEXT_VX(vor_vx_b, 1)
1061GEN_VEXT_VX(vor_vx_h, 2)
1062GEN_VEXT_VX(vor_vx_w, 4)
1063GEN_VEXT_VX(vor_vx_d, 8)
1064GEN_VEXT_VX(vxor_vx_b, 1)
1065GEN_VEXT_VX(vxor_vx_h, 2)
1066GEN_VEXT_VX(vxor_vx_w, 4)
1067GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1068
1069/* Vector Single-Width Bit Shift Instructions */
1070#define DO_SLL(N, M) (N << (M))
1071#define DO_SRL(N, M) (N >> (M))
1072
1073/* generate the helpers for shift instructions with two vector operators */
3479a814 1074#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1075void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1076 void *vs2, CPURISCVState *env, uint32_t desc) \
1077{ \
3277d955
LZ
1078 uint32_t vm = vext_vm(desc); \
1079 uint32_t vl = env->vl; \
7b1bff41 1080 uint32_t esz = sizeof(TS1); \
1081 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1082 uint32_t vta = vext_vta(desc); \
fd93045e 1083 uint32_t vma = vext_vma(desc); \
3277d955
LZ
1084 uint32_t i; \
1085 \
f714361e 1086 for (i = env->vstart; i < vl; i++) { \
f9298de5 1087 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1088 /* set masked-off elements to 1s */ \
1089 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
3277d955
LZ
1090 continue; \
1091 } \
1092 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1093 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1094 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1095 } \
f714361e 1096 env->vstart = 0; \
7b1bff41 1097 /* set tail elements to 1s */ \
1098 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1099}
1100
3479a814
FC
1101GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1102GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1103GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1104GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1105
3479a814
FC
1106GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1107GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1108GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1109GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1110
3479a814
FC
1111GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1112GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1113GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1114GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1115
246f8796
WL
1116/*
1117 * generate the helpers for shift instructions with one vector and one scalar
1118 */
3479a814
FC
1119#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1120void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
1121 void *vs2, CPURISCVState *env, \
1122 uint32_t desc) \
3479a814
FC
1123{ \
1124 uint32_t vm = vext_vm(desc); \
1125 uint32_t vl = env->vl; \
7b1bff41 1126 uint32_t esz = sizeof(TD); \
1127 uint32_t total_elems = \
1128 vext_get_total_elems(env, desc, esz); \
1129 uint32_t vta = vext_vta(desc); \
fd93045e 1130 uint32_t vma = vext_vma(desc); \
3479a814
FC
1131 uint32_t i; \
1132 \
f714361e 1133 for (i = env->vstart; i < vl; i++) { \
3479a814 1134 if (!vm && !vext_elem_mask(v0, i)) { \
fd93045e
YTC
1135 /* set masked-off elements to 1s */ \
1136 vext_set_elems_1s(vd, vma, i * esz, \
1137 (i + 1) * esz); \
3479a814
FC
1138 continue; \
1139 } \
1140 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1141 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1142 } \
f714361e 1143 env->vstart = 0; \
7b1bff41 1144 /* set tail elements to 1s */ \
1145 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1146}
1147
1148GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1149GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1150GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1151GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1152
1153GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1154GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1155GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1156GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1157
1158GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1159GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1160GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1161GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1162
1163/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1164GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1165GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1166GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1167GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1168GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1169GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1170GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1171GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1172GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1173GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1174GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1175GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1176
1177/* Vector Integer Comparison Instructions */
1178#define DO_MSEQ(N, M) (N == M)
1179#define DO_MSNE(N, M) (N != M)
1180#define DO_MSLT(N, M) (N < M)
1181#define DO_MSLE(N, M) (N <= M)
1182#define DO_MSGT(N, M) (N > M)
1183
1184#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1185void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1186 CPURISCVState *env, uint32_t desc) \
1187{ \
1366fc79
LZ
1188 uint32_t vm = vext_vm(desc); \
1189 uint32_t vl = env->vl; \
58bc9063 1190 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
38581e5c 1191 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1192 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1193 uint32_t i; \
1194 \
f714361e 1195 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1196 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1197 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1198 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1199 /* set masked-off elements to 1s */ \
1200 if (vma) { \
1201 vext_set_elem_mask(vd, i, 1); \
1202 } \
1366fc79
LZ
1203 continue; \
1204 } \
f9298de5 1205 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1206 } \
f714361e 1207 env->vstart = 0; \
3b57254d
WL
1208 /*
1209 * mask destination register are always tail-agnostic
1210 * set tail elements to 1s
1211 */ \
38581e5c 1212 if (vta_all_1s) { \
1213 for (; i < total_elems; i++) { \
1214 vext_set_elem_mask(vd, i, 1); \
1215 } \
1216 } \
1366fc79
LZ
1217}
1218
1219GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1220GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1221GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1222GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1223
1224GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1225GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1226GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1227GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1228
1229GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1230GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1231GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1232GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1233
1234GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1235GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1236GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1237GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1238
1239GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1240GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1241GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1242GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1243
1244GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1245GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1246GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1247GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1248
1249#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1250void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1251 CPURISCVState *env, uint32_t desc) \
1252{ \
1366fc79
LZ
1253 uint32_t vm = vext_vm(desc); \
1254 uint32_t vl = env->vl; \
58bc9063 1255 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
38581e5c 1256 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
6e11d7ea 1257 uint32_t vma = vext_vma(desc); \
1366fc79
LZ
1258 uint32_t i; \
1259 \
f714361e 1260 for (i = env->vstart; i < vl; i++) { \
1366fc79 1261 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1262 if (!vm && !vext_elem_mask(v0, i)) { \
6e11d7ea
YTC
1263 /* set masked-off elements to 1s */ \
1264 if (vma) { \
1265 vext_set_elem_mask(vd, i, 1); \
1266 } \
1366fc79
LZ
1267 continue; \
1268 } \
f9298de5 1269 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1270 DO_OP(s2, (ETYPE)(target_long)s1)); \
1271 } \
f714361e 1272 env->vstart = 0; \
3b57254d
WL
1273 /*
1274 * mask destination register are always tail-agnostic
1275 * set tail elements to 1s
1276 */ \
38581e5c 1277 if (vta_all_1s) { \
1278 for (; i < total_elems; i++) { \
1279 vext_set_elem_mask(vd, i, 1); \
1280 } \
1281 } \
1366fc79
LZ
1282}
1283
1284GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1285GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1286GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1287GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1288
1289GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1290GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1291GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1292GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1293
1294GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1295GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1296GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1297GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1298
1299GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1300GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1301GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1302GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1303
1304GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1305GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1306GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1307GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1308
1309GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1310GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1311GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1312GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1313
1314GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1315GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1316GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1317GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1318
1319GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1320GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1321GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1322GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1323
1324/* Vector Integer Min/Max Instructions */
1325RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1326RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1327RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1328RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1329RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1330RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1331RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1332RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1333RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1334RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1335RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1336RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1337RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1338RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1339RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1340RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1341GEN_VEXT_VV(vminu_vv_b, 1)
1342GEN_VEXT_VV(vminu_vv_h, 2)
1343GEN_VEXT_VV(vminu_vv_w, 4)
1344GEN_VEXT_VV(vminu_vv_d, 8)
1345GEN_VEXT_VV(vmin_vv_b, 1)
1346GEN_VEXT_VV(vmin_vv_h, 2)
1347GEN_VEXT_VV(vmin_vv_w, 4)
1348GEN_VEXT_VV(vmin_vv_d, 8)
1349GEN_VEXT_VV(vmaxu_vv_b, 1)
1350GEN_VEXT_VV(vmaxu_vv_h, 2)
1351GEN_VEXT_VV(vmaxu_vv_w, 4)
1352GEN_VEXT_VV(vmaxu_vv_d, 8)
1353GEN_VEXT_VV(vmax_vv_b, 1)
1354GEN_VEXT_VV(vmax_vv_h, 2)
1355GEN_VEXT_VV(vmax_vv_w, 4)
1356GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1357
1358RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1359RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1360RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1361RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1362RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1363RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1364RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1365RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1366RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1367RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1368RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1369RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1370RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1371RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1372RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1373RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1374GEN_VEXT_VX(vminu_vx_b, 1)
1375GEN_VEXT_VX(vminu_vx_h, 2)
1376GEN_VEXT_VX(vminu_vx_w, 4)
1377GEN_VEXT_VX(vminu_vx_d, 8)
1378GEN_VEXT_VX(vmin_vx_b, 1)
1379GEN_VEXT_VX(vmin_vx_h, 2)
1380GEN_VEXT_VX(vmin_vx_w, 4)
1381GEN_VEXT_VX(vmin_vx_d, 8)
1382GEN_VEXT_VX(vmaxu_vx_b, 1)
1383GEN_VEXT_VX(vmaxu_vx_h, 2)
1384GEN_VEXT_VX(vmaxu_vx_w, 4)
1385GEN_VEXT_VX(vmaxu_vx_d, 8)
1386GEN_VEXT_VX(vmax_vx_b, 1)
1387GEN_VEXT_VX(vmax_vx_h, 2)
1388GEN_VEXT_VX(vmax_vx_w, 4)
1389GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1390
1391/* Vector Single-Width Integer Multiply Instructions */
1392#define DO_MUL(N, M) (N * M)
1393RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1394RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1395RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1396RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1397GEN_VEXT_VV(vmul_vv_b, 1)
1398GEN_VEXT_VV(vmul_vv_h, 2)
1399GEN_VEXT_VV(vmul_vv_w, 4)
1400GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1401
1402static int8_t do_mulh_b(int8_t s2, int8_t s1)
1403{
1404 return (int16_t)s2 * (int16_t)s1 >> 8;
1405}
1406
1407static int16_t do_mulh_h(int16_t s2, int16_t s1)
1408{
1409 return (int32_t)s2 * (int32_t)s1 >> 16;
1410}
1411
1412static int32_t do_mulh_w(int32_t s2, int32_t s1)
1413{
1414 return (int64_t)s2 * (int64_t)s1 >> 32;
1415}
1416
1417static int64_t do_mulh_d(int64_t s2, int64_t s1)
1418{
1419 uint64_t hi_64, lo_64;
1420
1421 muls64(&lo_64, &hi_64, s1, s2);
1422 return hi_64;
1423}
1424
1425static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1426{
1427 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1428}
1429
1430static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1431{
1432 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1433}
1434
1435static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1436{
1437 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1438}
1439
1440static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1441{
1442 uint64_t hi_64, lo_64;
1443
1444 mulu64(&lo_64, &hi_64, s2, s1);
1445 return hi_64;
1446}
1447
1448static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1449{
1450 return (int16_t)s2 * (uint16_t)s1 >> 8;
1451}
1452
1453static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1454{
1455 return (int32_t)s2 * (uint32_t)s1 >> 16;
1456}
1457
1458static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1459{
1460 return (int64_t)s2 * (uint64_t)s1 >> 32;
1461}
1462
1463/*
1464 * Let A = signed operand,
1465 * B = unsigned operand
1466 * P = mulu64(A, B), unsigned product
1467 *
1468 * LET X = 2 ** 64 - A, 2's complement of A
1469 * SP = signed product
1470 * THEN
1471 * IF A < 0
1472 * SP = -X * B
1473 * = -(2 ** 64 - A) * B
1474 * = A * B - 2 ** 64 * B
1475 * = P - 2 ** 64 * B
1476 * ELSE
1477 * SP = P
1478 * THEN
1479 * HI_P -= (A < 0 ? B : 0)
1480 */
1481
1482static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1483{
1484 uint64_t hi_64, lo_64;
1485
1486 mulu64(&lo_64, &hi_64, s2, s1);
1487
1488 hi_64 -= s2 < 0 ? s1 : 0;
1489 return hi_64;
1490}
1491
1492RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1493RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1494RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1495RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1496RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1497RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1498RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1499RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1500RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1501RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1502RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1503RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1504GEN_VEXT_VV(vmulh_vv_b, 1)
1505GEN_VEXT_VV(vmulh_vv_h, 2)
1506GEN_VEXT_VV(vmulh_vv_w, 4)
1507GEN_VEXT_VV(vmulh_vv_d, 8)
1508GEN_VEXT_VV(vmulhu_vv_b, 1)
1509GEN_VEXT_VV(vmulhu_vv_h, 2)
1510GEN_VEXT_VV(vmulhu_vv_w, 4)
1511GEN_VEXT_VV(vmulhu_vv_d, 8)
1512GEN_VEXT_VV(vmulhsu_vv_b, 1)
1513GEN_VEXT_VV(vmulhsu_vv_h, 2)
1514GEN_VEXT_VV(vmulhsu_vv_w, 4)
1515GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1516
1517RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1518RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1519RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1520RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1521RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1522RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1523RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1524RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1525RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1526RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1527RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1528RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1529RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1530RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1531RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1532RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1533GEN_VEXT_VX(vmul_vx_b, 1)
1534GEN_VEXT_VX(vmul_vx_h, 2)
1535GEN_VEXT_VX(vmul_vx_w, 4)
1536GEN_VEXT_VX(vmul_vx_d, 8)
1537GEN_VEXT_VX(vmulh_vx_b, 1)
1538GEN_VEXT_VX(vmulh_vx_h, 2)
1539GEN_VEXT_VX(vmulh_vx_w, 4)
1540GEN_VEXT_VX(vmulh_vx_d, 8)
1541GEN_VEXT_VX(vmulhu_vx_b, 1)
1542GEN_VEXT_VX(vmulhu_vx_h, 2)
1543GEN_VEXT_VX(vmulhu_vx_w, 4)
1544GEN_VEXT_VX(vmulhu_vx_d, 8)
1545GEN_VEXT_VX(vmulhsu_vx_b, 1)
1546GEN_VEXT_VX(vmulhsu_vx_h, 2)
1547GEN_VEXT_VX(vmulhsu_vx_w, 4)
1548GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1549
1550/* Vector Integer Divide Instructions */
1551#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1552#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
c45eff30 1553#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
85e6658c 1554 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
c45eff30 1555#define DO_REM(N, M) (unlikely(M == 0) ? N : \
85e6658c
LZ
1556 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1557
1558RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1559RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1560RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1561RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1562RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1563RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1564RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1565RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1566RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1567RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1568RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1569RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1570RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1571RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1572RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1573RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1574GEN_VEXT_VV(vdivu_vv_b, 1)
1575GEN_VEXT_VV(vdivu_vv_h, 2)
1576GEN_VEXT_VV(vdivu_vv_w, 4)
1577GEN_VEXT_VV(vdivu_vv_d, 8)
1578GEN_VEXT_VV(vdiv_vv_b, 1)
1579GEN_VEXT_VV(vdiv_vv_h, 2)
1580GEN_VEXT_VV(vdiv_vv_w, 4)
1581GEN_VEXT_VV(vdiv_vv_d, 8)
1582GEN_VEXT_VV(vremu_vv_b, 1)
1583GEN_VEXT_VV(vremu_vv_h, 2)
1584GEN_VEXT_VV(vremu_vv_w, 4)
1585GEN_VEXT_VV(vremu_vv_d, 8)
1586GEN_VEXT_VV(vrem_vv_b, 1)
1587GEN_VEXT_VV(vrem_vv_h, 2)
1588GEN_VEXT_VV(vrem_vv_w, 4)
1589GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1590
1591RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1592RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1593RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1594RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1595RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1596RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1597RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1598RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1599RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1600RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1601RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1602RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1603RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1604RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1605RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1606RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1607GEN_VEXT_VX(vdivu_vx_b, 1)
1608GEN_VEXT_VX(vdivu_vx_h, 2)
1609GEN_VEXT_VX(vdivu_vx_w, 4)
1610GEN_VEXT_VX(vdivu_vx_d, 8)
1611GEN_VEXT_VX(vdiv_vx_b, 1)
1612GEN_VEXT_VX(vdiv_vx_h, 2)
1613GEN_VEXT_VX(vdiv_vx_w, 4)
1614GEN_VEXT_VX(vdiv_vx_d, 8)
1615GEN_VEXT_VX(vremu_vx_b, 1)
1616GEN_VEXT_VX(vremu_vx_h, 2)
1617GEN_VEXT_VX(vremu_vx_w, 4)
1618GEN_VEXT_VX(vremu_vx_d, 8)
1619GEN_VEXT_VX(vrem_vx_b, 1)
1620GEN_VEXT_VX(vrem_vx_h, 2)
1621GEN_VEXT_VX(vrem_vx_w, 4)
1622GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1623
1624/* Vector Widening Integer Multiply Instructions */
1625RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1626RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1627RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1628RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1629RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1630RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1631RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1632RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1633RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1634GEN_VEXT_VV(vwmul_vv_b, 2)
1635GEN_VEXT_VV(vwmul_vv_h, 4)
1636GEN_VEXT_VV(vwmul_vv_w, 8)
1637GEN_VEXT_VV(vwmulu_vv_b, 2)
1638GEN_VEXT_VV(vwmulu_vv_h, 4)
1639GEN_VEXT_VV(vwmulu_vv_w, 8)
1640GEN_VEXT_VV(vwmulsu_vv_b, 2)
1641GEN_VEXT_VV(vwmulsu_vv_h, 4)
1642GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1643
1644RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1645RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1646RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1647RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1648RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1649RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1650RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1651RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1652RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1653GEN_VEXT_VX(vwmul_vx_b, 2)
1654GEN_VEXT_VX(vwmul_vx_h, 4)
1655GEN_VEXT_VX(vwmul_vx_w, 8)
1656GEN_VEXT_VX(vwmulu_vx_b, 2)
1657GEN_VEXT_VX(vwmulu_vx_h, 4)
1658GEN_VEXT_VX(vwmulu_vx_w, 8)
1659GEN_VEXT_VX(vwmulsu_vx_b, 2)
1660GEN_VEXT_VX(vwmulsu_vx_h, 4)
1661GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1662
1663/* Vector Single-Width Integer Multiply-Add Instructions */
c45eff30 1664#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
54df813a
LZ
1665static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1666{ \
1667 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1668 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1669 TD d = *((TD *)vd + HD(i)); \
1670 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1671}
1672
1673#define DO_MACC(N, M, D) (M * N + D)
1674#define DO_NMSAC(N, M, D) (-(M * N) + D)
1675#define DO_MADD(N, M, D) (M * D + N)
1676#define DO_NMSUB(N, M, D) (-(M * D) + N)
1677RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1678RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1679RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1680RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1681RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1682RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1683RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1684RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1685RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1686RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1687RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1688RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1689RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1690RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1691RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1692RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1693GEN_VEXT_VV(vmacc_vv_b, 1)
1694GEN_VEXT_VV(vmacc_vv_h, 2)
1695GEN_VEXT_VV(vmacc_vv_w, 4)
1696GEN_VEXT_VV(vmacc_vv_d, 8)
1697GEN_VEXT_VV(vnmsac_vv_b, 1)
1698GEN_VEXT_VV(vnmsac_vv_h, 2)
1699GEN_VEXT_VV(vnmsac_vv_w, 4)
1700GEN_VEXT_VV(vnmsac_vv_d, 8)
1701GEN_VEXT_VV(vmadd_vv_b, 1)
1702GEN_VEXT_VV(vmadd_vv_h, 2)
1703GEN_VEXT_VV(vmadd_vv_w, 4)
1704GEN_VEXT_VV(vmadd_vv_d, 8)
1705GEN_VEXT_VV(vnmsub_vv_b, 1)
1706GEN_VEXT_VV(vnmsub_vv_h, 2)
1707GEN_VEXT_VV(vnmsub_vv_w, 4)
1708GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1709
1710#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1711static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1712{ \
1713 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1714 TD d = *((TD *)vd + HD(i)); \
1715 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1716}
1717
1718RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1719RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1720RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1721RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1722RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1723RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1724RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1725RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1726RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1727RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1728RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1729RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1730RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1731RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1732RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1733RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1734GEN_VEXT_VX(vmacc_vx_b, 1)
1735GEN_VEXT_VX(vmacc_vx_h, 2)
1736GEN_VEXT_VX(vmacc_vx_w, 4)
1737GEN_VEXT_VX(vmacc_vx_d, 8)
1738GEN_VEXT_VX(vnmsac_vx_b, 1)
1739GEN_VEXT_VX(vnmsac_vx_h, 2)
1740GEN_VEXT_VX(vnmsac_vx_w, 4)
1741GEN_VEXT_VX(vnmsac_vx_d, 8)
1742GEN_VEXT_VX(vmadd_vx_b, 1)
1743GEN_VEXT_VX(vmadd_vx_h, 2)
1744GEN_VEXT_VX(vmadd_vx_w, 4)
1745GEN_VEXT_VX(vmadd_vx_d, 8)
1746GEN_VEXT_VX(vnmsub_vx_b, 1)
1747GEN_VEXT_VX(vnmsub_vx_h, 2)
1748GEN_VEXT_VX(vnmsub_vx_w, 4)
1749GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1750
1751/* Vector Widening Integer Multiply-Add Instructions */
1752RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1753RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1754RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1755RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1756RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1757RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1758RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1759RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1760RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1761GEN_VEXT_VV(vwmaccu_vv_b, 2)
1762GEN_VEXT_VV(vwmaccu_vv_h, 4)
1763GEN_VEXT_VV(vwmaccu_vv_w, 8)
1764GEN_VEXT_VV(vwmacc_vv_b, 2)
1765GEN_VEXT_VV(vwmacc_vv_h, 4)
1766GEN_VEXT_VV(vwmacc_vv_w, 8)
1767GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1768GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1769GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1770
1771RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1772RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1773RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1774RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1775RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1776RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1777RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1778RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1779RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1780RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1781RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1782RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1783GEN_VEXT_VX(vwmaccu_vx_b, 2)
1784GEN_VEXT_VX(vwmaccu_vx_h, 4)
1785GEN_VEXT_VX(vwmaccu_vx_w, 8)
1786GEN_VEXT_VX(vwmacc_vx_b, 2)
1787GEN_VEXT_VX(vwmacc_vx_h, 4)
1788GEN_VEXT_VX(vwmacc_vx_w, 8)
1789GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1790GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1791GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1792GEN_VEXT_VX(vwmaccus_vx_b, 2)
1793GEN_VEXT_VX(vwmaccus_vx_h, 4)
1794GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1795
1796/* Vector Integer Merge and Move Instructions */
3479a814 1797#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1798void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1799 uint32_t desc) \
1800{ \
1801 uint32_t vl = env->vl; \
89a32de2 1802 uint32_t esz = sizeof(ETYPE); \
1803 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1804 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1805 uint32_t i; \
1806 \
f714361e 1807 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1808 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1809 *((ETYPE *)vd + H(i)) = s1; \
1810 } \
f714361e 1811 env->vstart = 0; \
89a32de2 1812 /* set tail elements to 1s */ \
1813 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1814}
1815
3479a814
FC
1816GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1817GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1818GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1819GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1820
3479a814 1821#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1822void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1823 uint32_t desc) \
1824{ \
1825 uint32_t vl = env->vl; \
89a32de2 1826 uint32_t esz = sizeof(ETYPE); \
1827 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1828 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1829 uint32_t i; \
1830 \
f714361e 1831 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1832 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1833 } \
f714361e 1834 env->vstart = 0; \
89a32de2 1835 /* set tail elements to 1s */ \
1836 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1837}
1838
3479a814
FC
1839GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1840GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1841GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1842GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 1843
3479a814 1844#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1845void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1846 CPURISCVState *env, uint32_t desc) \
1847{ \
f020a7a1 1848 uint32_t vl = env->vl; \
89a32de2 1849 uint32_t esz = sizeof(ETYPE); \
1850 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1851 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1852 uint32_t i; \
1853 \
f714361e 1854 for (i = env->vstart; i < vl; i++) { \
f9298de5 1855 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
1856 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1857 } \
f714361e 1858 env->vstart = 0; \
89a32de2 1859 /* set tail elements to 1s */ \
1860 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1861}
1862
3479a814
FC
1863GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1864GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1865GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1866GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 1867
3479a814 1868#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1869void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1870 void *vs2, CPURISCVState *env, uint32_t desc) \
1871{ \
f020a7a1 1872 uint32_t vl = env->vl; \
89a32de2 1873 uint32_t esz = sizeof(ETYPE); \
1874 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1875 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1876 uint32_t i; \
1877 \
f714361e 1878 for (i = env->vstart; i < vl; i++) { \
f020a7a1 1879 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1880 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
1881 (ETYPE)(target_long)s1); \
1882 *((ETYPE *)vd + H(i)) = d; \
1883 } \
f714361e 1884 env->vstart = 0; \
89a32de2 1885 /* set tail elements to 1s */ \
1886 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1887}
1888
3479a814
FC
1889GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1890GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1891GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1892GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
1893
1894/*
3b57254d 1895 * Vector Fixed-Point Arithmetic Instructions
eb2650e3
LZ
1896 */
1897
1898/* Vector Single-Width Saturating Add and Subtract */
1899
1900/*
1901 * As fixed point instructions probably have round mode and saturation,
1902 * define common macros for fixed point here.
1903 */
1904typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1905 CPURISCVState *env, int vxrm);
1906
1907#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1908static inline void \
1909do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1910 CPURISCVState *env, int vxrm) \
1911{ \
1912 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1913 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1914 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1915}
1916
1917static inline void
1918vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1919 CPURISCVState *env,
f9298de5 1920 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 1921 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 1922{
f714361e 1923 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 1924 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
1925 /* set masked-off elements to 1s */
1926 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
1927 continue;
1928 }
1929 fn(vd, vs1, vs2, i, env, vxrm);
1930 }
f714361e 1931 env->vstart = 0;
eb2650e3
LZ
1932}
1933
1934static inline void
1935vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1936 CPURISCVState *env,
8a085fb2 1937 uint32_t desc,
09106eed 1938 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 1939{
eb2650e3
LZ
1940 uint32_t vm = vext_vm(desc);
1941 uint32_t vl = env->vl;
09106eed 1942 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
1943 uint32_t vta = vext_vta(desc);
72e17a9f 1944 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
1945
1946 switch (env->vxrm) {
1947 case 0: /* rnu */
1948 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1949 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
1950 break;
1951 case 1: /* rne */
1952 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1953 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
1954 break;
1955 case 2: /* rdn */
1956 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1957 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
1958 break;
1959 default: /* rod */
1960 vext_vv_rm_1(vd, v0, vs1, vs2,
72e17a9f 1961 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
1962 break;
1963 }
09106eed 1964 /* set tail elements to 1s */
1965 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
1966}
1967
1968/* generate helpers for fixed point instructions with OPIVV format */
09106eed 1969#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
1970void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1971 CPURISCVState *env, uint32_t desc) \
1972{ \
8a085fb2 1973 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 1974 do_##NAME, ESZ); \
eb2650e3
LZ
1975}
1976
246f8796
WL
1977static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
1978 uint8_t b)
eb2650e3
LZ
1979{
1980 uint8_t res = a + b;
1981 if (res < a) {
1982 res = UINT8_MAX;
1983 env->vxsat = 0x1;
1984 }
1985 return res;
1986}
1987
1988static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1989 uint16_t b)
1990{
1991 uint16_t res = a + b;
1992 if (res < a) {
1993 res = UINT16_MAX;
1994 env->vxsat = 0x1;
1995 }
1996 return res;
1997}
1998
1999static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2000 uint32_t b)
2001{
2002 uint32_t res = a + b;
2003 if (res < a) {
2004 res = UINT32_MAX;
2005 env->vxsat = 0x1;
2006 }
2007 return res;
2008}
2009
2010static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2011 uint64_t b)
2012{
2013 uint64_t res = a + b;
2014 if (res < a) {
2015 res = UINT64_MAX;
2016 env->vxsat = 0x1;
2017 }
2018 return res;
2019}
2020
2021RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2022RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2023RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2024RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2025GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2026GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2027GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2028GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2029
2030typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2031 CPURISCVState *env, int vxrm);
2032
2033#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2034static inline void \
2035do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2036 CPURISCVState *env, int vxrm) \
2037{ \
2038 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2039 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2040}
2041
2042static inline void
2043vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2044 CPURISCVState *env,
f9298de5 2045 uint32_t vl, uint32_t vm, int vxrm,
72e17a9f 2046 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
eb2650e3 2047{
f714361e 2048 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2049 if (!vm && !vext_elem_mask(v0, i)) {
72e17a9f
YTC
2050 /* set masked-off elements to 1s */
2051 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
eb2650e3
LZ
2052 continue;
2053 }
2054 fn(vd, s1, vs2, i, env, vxrm);
2055 }
f714361e 2056 env->vstart = 0;
eb2650e3
LZ
2057}
2058
2059static inline void
2060vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2061 CPURISCVState *env,
8a085fb2 2062 uint32_t desc,
09106eed 2063 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2064{
eb2650e3
LZ
2065 uint32_t vm = vext_vm(desc);
2066 uint32_t vl = env->vl;
09106eed 2067 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2068 uint32_t vta = vext_vta(desc);
72e17a9f 2069 uint32_t vma = vext_vma(desc);
eb2650e3
LZ
2070
2071 switch (env->vxrm) {
2072 case 0: /* rnu */
2073 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2074 env, vl, vm, 0, fn, vma, esz);
eb2650e3
LZ
2075 break;
2076 case 1: /* rne */
2077 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2078 env, vl, vm, 1, fn, vma, esz);
eb2650e3
LZ
2079 break;
2080 case 2: /* rdn */
2081 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2082 env, vl, vm, 2, fn, vma, esz);
eb2650e3
LZ
2083 break;
2084 default: /* rod */
2085 vext_vx_rm_1(vd, v0, s1, vs2,
72e17a9f 2086 env, vl, vm, 3, fn, vma, esz);
eb2650e3
LZ
2087 break;
2088 }
09106eed 2089 /* set tail elements to 1s */
2090 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2091}
2092
2093/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2094#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3 2095void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
c45eff30
WL
2096 void *vs2, CPURISCVState *env, \
2097 uint32_t desc) \
eb2650e3 2098{ \
8a085fb2 2099 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2100 do_##NAME, ESZ); \
eb2650e3
LZ
2101}
2102
2103RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2104RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2105RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2106RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2107GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2108GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2109GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2110GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2111
2112static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2113{
2114 int8_t res = a + b;
2115 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2116 res = a > 0 ? INT8_MAX : INT8_MIN;
2117 env->vxsat = 0x1;
2118 }
2119 return res;
2120}
2121
246f8796
WL
2122static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2123 int16_t b)
eb2650e3
LZ
2124{
2125 int16_t res = a + b;
2126 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2127 res = a > 0 ? INT16_MAX : INT16_MIN;
2128 env->vxsat = 0x1;
2129 }
2130 return res;
2131}
2132
246f8796
WL
2133static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2134 int32_t b)
eb2650e3
LZ
2135{
2136 int32_t res = a + b;
2137 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2138 res = a > 0 ? INT32_MAX : INT32_MIN;
2139 env->vxsat = 0x1;
2140 }
2141 return res;
2142}
2143
246f8796
WL
2144static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2145 int64_t b)
eb2650e3
LZ
2146{
2147 int64_t res = a + b;
2148 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2149 res = a > 0 ? INT64_MAX : INT64_MIN;
2150 env->vxsat = 0x1;
2151 }
2152 return res;
2153}
2154
2155RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2156RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2157RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2158RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2159GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2160GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2161GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2162GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2163
2164RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2165RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2166RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2167RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2168GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2169GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2170GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2171GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3 2172
246f8796
WL
2173static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2174 uint8_t b)
eb2650e3
LZ
2175{
2176 uint8_t res = a - b;
2177 if (res > a) {
2178 res = 0;
2179 env->vxsat = 0x1;
2180 }
2181 return res;
2182}
2183
2184static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2185 uint16_t b)
2186{
2187 uint16_t res = a - b;
2188 if (res > a) {
2189 res = 0;
2190 env->vxsat = 0x1;
2191 }
2192 return res;
2193}
2194
2195static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2196 uint32_t b)
2197{
2198 uint32_t res = a - b;
2199 if (res > a) {
2200 res = 0;
2201 env->vxsat = 0x1;
2202 }
2203 return res;
2204}
2205
2206static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2207 uint64_t b)
2208{
2209 uint64_t res = a - b;
2210 if (res > a) {
2211 res = 0;
2212 env->vxsat = 0x1;
2213 }
2214 return res;
2215}
2216
2217RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2218RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2219RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2220RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2221GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2222GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2223GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2224GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2225
2226RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2227RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2228RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2229RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2230GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2231GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2232GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2233GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2234
2235static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2236{
2237 int8_t res = a - b;
2238 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2239 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2240 env->vxsat = 0x1;
2241 }
2242 return res;
2243}
2244
246f8796
WL
2245static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2246 int16_t b)
eb2650e3
LZ
2247{
2248 int16_t res = a - b;
2249 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2250 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2251 env->vxsat = 0x1;
2252 }
2253 return res;
2254}
2255
246f8796
WL
2256static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2257 int32_t b)
eb2650e3
LZ
2258{
2259 int32_t res = a - b;
2260 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2261 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2262 env->vxsat = 0x1;
2263 }
2264 return res;
2265}
2266
246f8796
WL
2267static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2268 int64_t b)
eb2650e3
LZ
2269{
2270 int64_t res = a - b;
2271 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2272 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2273 env->vxsat = 0x1;
2274 }
2275 return res;
2276}
2277
2278RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2279RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2280RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2281RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2282GEN_VEXT_VV_RM(vssub_vv_b, 1)
2283GEN_VEXT_VV_RM(vssub_vv_h, 2)
2284GEN_VEXT_VV_RM(vssub_vv_w, 4)
2285GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2286
2287RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2288RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2289RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2290RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2291GEN_VEXT_VX_RM(vssub_vx_b, 1)
2292GEN_VEXT_VX_RM(vssub_vx_h, 2)
2293GEN_VEXT_VX_RM(vssub_vx_w, 4)
2294GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2295
2296/* Vector Single-Width Averaging Add and Subtract */
2297static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2298{
2299 uint8_t d = extract64(v, shift, 1);
2300 uint8_t d1;
2301 uint64_t D1, D2;
2302
2303 if (shift == 0 || shift > 64) {
2304 return 0;
2305 }
2306
2307 d1 = extract64(v, shift - 1, 1);
2308 D1 = extract64(v, 0, shift);
2309 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2310 return d1;
2311 } else if (vxrm == 1) { /* round-to-nearest-even */
2312 if (shift > 1) {
2313 D2 = extract64(v, 0, shift - 1);
2314 return d1 & ((D2 != 0) | d);
2315 } else {
2316 return d1 & d;
2317 }
2318 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2319 return !d & (D1 != 0);
2320 }
2321 return 0; /* round-down (truncate) */
2322}
2323
246f8796
WL
2324static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2325 int32_t b)
b7aee481
LZ
2326{
2327 int64_t res = (int64_t)a + b;
2328 uint8_t round = get_round(vxrm, res, 1);
2329
2330 return (res >> 1) + round;
2331}
2332
246f8796
WL
2333static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2334 int64_t b)
b7aee481
LZ
2335{
2336 int64_t res = a + b;
2337 uint8_t round = get_round(vxrm, res, 1);
2338 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2339
2340 /* With signed overflow, bit 64 is inverse of bit 63. */
2341 return ((res >> 1) ^ over) + round;
2342}
2343
2344RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2345RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2346RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2347RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2348GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2349GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2350GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2351GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2352
2353RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2354RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2355RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2356RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2357GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2358GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2359GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2360GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2361
8b99a110
FC
2362static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2363 uint32_t a, uint32_t b)
2364{
2365 uint64_t res = (uint64_t)a + b;
2366 uint8_t round = get_round(vxrm, res, 1);
2367
2368 return (res >> 1) + round;
2369}
2370
2371static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2372 uint64_t a, uint64_t b)
2373{
2374 uint64_t res = a + b;
2375 uint8_t round = get_round(vxrm, res, 1);
2376 uint64_t over = (uint64_t)(res < a) << 63;
2377
2378 return ((res >> 1) | over) + round;
2379}
2380
2381RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2382RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2383RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2384RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2385GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2386GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2387GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2388GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2389
2390RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2391RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2392RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2393RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2394GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2395GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2396GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2397GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2398
246f8796
WL
2399static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2400 int32_t b)
b7aee481
LZ
2401{
2402 int64_t res = (int64_t)a - b;
2403 uint8_t round = get_round(vxrm, res, 1);
2404
2405 return (res >> 1) + round;
2406}
2407
246f8796
WL
2408static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2409 int64_t b)
b7aee481
LZ
2410{
2411 int64_t res = (int64_t)a - b;
2412 uint8_t round = get_round(vxrm, res, 1);
2413 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2414
2415 /* With signed overflow, bit 64 is inverse of bit 63. */
2416 return ((res >> 1) ^ over) + round;
2417}
2418
2419RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2420RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2421RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2422RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2423GEN_VEXT_VV_RM(vasub_vv_b, 1)
2424GEN_VEXT_VV_RM(vasub_vv_h, 2)
2425GEN_VEXT_VV_RM(vasub_vv_w, 4)
2426GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2427
2428RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2429RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2430RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2431RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2432GEN_VEXT_VX_RM(vasub_vx_b, 1)
2433GEN_VEXT_VX_RM(vasub_vx_h, 2)
2434GEN_VEXT_VX_RM(vasub_vx_w, 4)
2435GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2436
8b99a110
FC
2437static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2438 uint32_t a, uint32_t b)
2439{
2440 int64_t res = (int64_t)a - b;
2441 uint8_t round = get_round(vxrm, res, 1);
2442
2443 return (res >> 1) + round;
2444}
2445
2446static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2447 uint64_t a, uint64_t b)
2448{
2449 uint64_t res = (uint64_t)a - b;
2450 uint8_t round = get_round(vxrm, res, 1);
2451 uint64_t over = (uint64_t)(res > a) << 63;
2452
2453 return ((res >> 1) | over) + round;
2454}
2455
2456RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2457RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2458RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2459RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2460GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2461GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2462GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2463GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2464
2465RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2466RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2467RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2468RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2469GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2470GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2471GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2472GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2473
9f0ff9e5
LZ
2474/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2475static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2476{
2477 uint8_t round;
2478 int16_t res;
2479
2480 res = (int16_t)a * (int16_t)b;
2481 round = get_round(vxrm, res, 7);
c45eff30 2482 res = (res >> 7) + round;
9f0ff9e5
LZ
2483
2484 if (res > INT8_MAX) {
2485 env->vxsat = 0x1;
2486 return INT8_MAX;
2487 } else if (res < INT8_MIN) {
2488 env->vxsat = 0x1;
2489 return INT8_MIN;
2490 } else {
2491 return res;
2492 }
2493}
2494
2495static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2496{
2497 uint8_t round;
2498 int32_t res;
2499
2500 res = (int32_t)a * (int32_t)b;
2501 round = get_round(vxrm, res, 15);
c45eff30 2502 res = (res >> 15) + round;
9f0ff9e5
LZ
2503
2504 if (res > INT16_MAX) {
2505 env->vxsat = 0x1;
2506 return INT16_MAX;
2507 } else if (res < INT16_MIN) {
2508 env->vxsat = 0x1;
2509 return INT16_MIN;
2510 } else {
2511 return res;
2512 }
2513}
2514
2515static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2516{
2517 uint8_t round;
2518 int64_t res;
2519
2520 res = (int64_t)a * (int64_t)b;
2521 round = get_round(vxrm, res, 31);
c45eff30 2522 res = (res >> 31) + round;
9f0ff9e5
LZ
2523
2524 if (res > INT32_MAX) {
2525 env->vxsat = 0x1;
2526 return INT32_MAX;
2527 } else if (res < INT32_MIN) {
2528 env->vxsat = 0x1;
2529 return INT32_MIN;
2530 } else {
2531 return res;
2532 }
2533}
2534
2535static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2536{
2537 uint8_t round;
2538 uint64_t hi_64, lo_64;
2539 int64_t res;
2540
2541 if (a == INT64_MIN && b == INT64_MIN) {
2542 env->vxsat = 1;
2543 return INT64_MAX;
2544 }
2545
2546 muls64(&lo_64, &hi_64, a, b);
2547 round = get_round(vxrm, lo_64, 63);
2548 /*
2549 * Cannot overflow, as there are always
2550 * 2 sign bits after multiply.
2551 */
2552 res = (hi_64 << 1) | (lo_64 >> 63);
2553 if (round) {
2554 if (res == INT64_MAX) {
2555 env->vxsat = 1;
2556 } else {
2557 res += 1;
2558 }
2559 }
2560 return res;
2561}
2562
2563RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2564RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2565RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2566RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2567GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2568GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2569GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2570GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2571
2572RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2573RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2574RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2575RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2576GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2577GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2578GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2579GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2580
04a61406
LZ
2581/* Vector Single-Width Scaling Shift Instructions */
2582static inline uint8_t
2583vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2584{
2585 uint8_t round, shift = b & 0x7;
2586 uint8_t res;
2587
2588 round = get_round(vxrm, a, shift);
c45eff30 2589 res = (a >> shift) + round;
04a61406
LZ
2590 return res;
2591}
2592static inline uint16_t
2593vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2594{
2595 uint8_t round, shift = b & 0xf;
04a61406
LZ
2596
2597 round = get_round(vxrm, a, shift);
66997c42 2598 return (a >> shift) + round;
04a61406
LZ
2599}
2600static inline uint32_t
2601vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2602{
2603 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2604
2605 round = get_round(vxrm, a, shift);
66997c42 2606 return (a >> shift) + round;
04a61406
LZ
2607}
2608static inline uint64_t
2609vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2610{
2611 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2612
2613 round = get_round(vxrm, a, shift);
66997c42 2614 return (a >> shift) + round;
04a61406
LZ
2615}
2616RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2617RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2618RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2619RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2620GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2621GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2622GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2623GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2624
2625RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2626RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2627RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2628RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2629GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2630GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2631GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2632GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2633
2634static inline int8_t
2635vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2636{
2637 uint8_t round, shift = b & 0x7;
04a61406
LZ
2638
2639 round = get_round(vxrm, a, shift);
66997c42 2640 return (a >> shift) + round;
04a61406
LZ
2641}
2642static inline int16_t
2643vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2644{
2645 uint8_t round, shift = b & 0xf;
04a61406
LZ
2646
2647 round = get_round(vxrm, a, shift);
66997c42 2648 return (a >> shift) + round;
04a61406
LZ
2649}
2650static inline int32_t
2651vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2652{
2653 uint8_t round, shift = b & 0x1f;
04a61406
LZ
2654
2655 round = get_round(vxrm, a, shift);
66997c42 2656 return (a >> shift) + round;
04a61406
LZ
2657}
2658static inline int64_t
2659vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2660{
2661 uint8_t round, shift = b & 0x3f;
04a61406
LZ
2662
2663 round = get_round(vxrm, a, shift);
66997c42 2664 return (a >> shift) + round;
04a61406 2665}
9ff3d287 2666
04a61406
LZ
2667RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2668RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2669RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2670RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2671GEN_VEXT_VV_RM(vssra_vv_b, 1)
2672GEN_VEXT_VV_RM(vssra_vv_h, 2)
2673GEN_VEXT_VV_RM(vssra_vv_w, 4)
2674GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2675
2676RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2677RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2678RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2679RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2680GEN_VEXT_VX_RM(vssra_vx_b, 1)
2681GEN_VEXT_VX_RM(vssra_vx_h, 2)
2682GEN_VEXT_VX_RM(vssra_vx_w, 4)
2683GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2684
2685/* Vector Narrowing Fixed-Point Clip Instructions */
2686static inline int8_t
2687vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2688{
2689 uint8_t round, shift = b & 0xf;
2690 int16_t res;
2691
2692 round = get_round(vxrm, a, shift);
c45eff30 2693 res = (a >> shift) + round;
9ff3d287
LZ
2694 if (res > INT8_MAX) {
2695 env->vxsat = 0x1;
2696 return INT8_MAX;
2697 } else if (res < INT8_MIN) {
2698 env->vxsat = 0x1;
2699 return INT8_MIN;
2700 } else {
2701 return res;
2702 }
2703}
2704
2705static inline int16_t
2706vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2707{
2708 uint8_t round, shift = b & 0x1f;
2709 int32_t res;
2710
2711 round = get_round(vxrm, a, shift);
c45eff30 2712 res = (a >> shift) + round;
9ff3d287
LZ
2713 if (res > INT16_MAX) {
2714 env->vxsat = 0x1;
2715 return INT16_MAX;
2716 } else if (res < INT16_MIN) {
2717 env->vxsat = 0x1;
2718 return INT16_MIN;
2719 } else {
2720 return res;
2721 }
2722}
2723
2724static inline int32_t
2725vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2726{
2727 uint8_t round, shift = b & 0x3f;
2728 int64_t res;
2729
2730 round = get_round(vxrm, a, shift);
c45eff30 2731 res = (a >> shift) + round;
9ff3d287
LZ
2732 if (res > INT32_MAX) {
2733 env->vxsat = 0x1;
2734 return INT32_MAX;
2735 } else if (res < INT32_MIN) {
2736 env->vxsat = 0x1;
2737 return INT32_MIN;
2738 } else {
2739 return res;
2740 }
2741}
2742
a70b3a73
FC
2743RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2744RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2745RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2746GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2747GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2748GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2749
2750RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2751RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2752RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2753GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2754GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2755GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2756
2757static inline uint8_t
2758vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2759{
2760 uint8_t round, shift = b & 0xf;
2761 uint16_t res;
2762
2763 round = get_round(vxrm, a, shift);
c45eff30 2764 res = (a >> shift) + round;
9ff3d287
LZ
2765 if (res > UINT8_MAX) {
2766 env->vxsat = 0x1;
2767 return UINT8_MAX;
2768 } else {
2769 return res;
2770 }
2771}
2772
2773static inline uint16_t
2774vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2775{
2776 uint8_t round, shift = b & 0x1f;
2777 uint32_t res;
2778
2779 round = get_round(vxrm, a, shift);
c45eff30 2780 res = (a >> shift) + round;
9ff3d287
LZ
2781 if (res > UINT16_MAX) {
2782 env->vxsat = 0x1;
2783 return UINT16_MAX;
2784 } else {
2785 return res;
2786 }
2787}
2788
2789static inline uint32_t
2790vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2791{
2792 uint8_t round, shift = b & 0x3f;
a70b3a73 2793 uint64_t res;
9ff3d287
LZ
2794
2795 round = get_round(vxrm, a, shift);
c45eff30 2796 res = (a >> shift) + round;
9ff3d287
LZ
2797 if (res > UINT32_MAX) {
2798 env->vxsat = 0x1;
2799 return UINT32_MAX;
2800 } else {
2801 return res;
2802 }
2803}
2804
a70b3a73
FC
2805RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2806RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2807RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 2808GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2809GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2810GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 2811
a70b3a73
FC
2812RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2813RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2814RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 2815GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
2816GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
2817GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
2818
2819/*
3b57254d 2820 * Vector Float Point Arithmetic Instructions
ce2a0343
LZ
2821 */
2822/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2823#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2824static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2825 CPURISCVState *env) \
2826{ \
2827 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2828 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2829 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2830}
2831
5eacf7d8 2832#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
2833void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2834 void *vs2, CPURISCVState *env, \
2835 uint32_t desc) \
2836{ \
ce2a0343
LZ
2837 uint32_t vm = vext_vm(desc); \
2838 uint32_t vl = env->vl; \
5eacf7d8 2839 uint32_t total_elems = \
2840 vext_get_total_elems(env, desc, ESZ); \
2841 uint32_t vta = vext_vta(desc); \
5b448f44 2842 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
2843 uint32_t i; \
2844 \
f714361e 2845 for (i = env->vstart; i < vl; i++) { \
f9298de5 2846 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
2847 /* set masked-off elements to 1s */ \
2848 vext_set_elems_1s(vd, vma, i * ESZ, \
2849 (i + 1) * ESZ); \
ce2a0343
LZ
2850 continue; \
2851 } \
2852 do_##NAME(vd, vs1, vs2, i, env); \
2853 } \
f714361e 2854 env->vstart = 0; \
5eacf7d8 2855 /* set tail elements to 1s */ \
2856 vext_set_elems_1s(vd, vta, vl * ESZ, \
2857 total_elems * ESZ); \
ce2a0343
LZ
2858}
2859
2860RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2861RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2862RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 2863GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
2864GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
2865GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
2866
2867#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2868static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2869 CPURISCVState *env) \
2870{ \
2871 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2872 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2873}
2874
5eacf7d8 2875#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
2876void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2877 void *vs2, CPURISCVState *env, \
2878 uint32_t desc) \
2879{ \
ce2a0343
LZ
2880 uint32_t vm = vext_vm(desc); \
2881 uint32_t vl = env->vl; \
5eacf7d8 2882 uint32_t total_elems = \
c45eff30 2883 vext_get_total_elems(env, desc, ESZ); \
5eacf7d8 2884 uint32_t vta = vext_vta(desc); \
5b448f44 2885 uint32_t vma = vext_vma(desc); \
ce2a0343
LZ
2886 uint32_t i; \
2887 \
f714361e 2888 for (i = env->vstart; i < vl; i++) { \
f9298de5 2889 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
2890 /* set masked-off elements to 1s */ \
2891 vext_set_elems_1s(vd, vma, i * ESZ, \
2892 (i + 1) * ESZ); \
ce2a0343
LZ
2893 continue; \
2894 } \
2895 do_##NAME(vd, s1, vs2, i, env); \
2896 } \
f714361e 2897 env->vstart = 0; \
5eacf7d8 2898 /* set tail elements to 1s */ \
2899 vext_set_elems_1s(vd, vta, vl * ESZ, \
2900 total_elems * ESZ); \
ce2a0343
LZ
2901}
2902
2903RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2904RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2905RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 2906GEN_VEXT_VF(vfadd_vf_h, 2)
2907GEN_VEXT_VF(vfadd_vf_w, 4)
2908GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
2909
2910RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2911RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2912RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 2913GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
2914GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
2915GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
2916RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2917RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2918RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 2919GEN_VEXT_VF(vfsub_vf_h, 2)
2920GEN_VEXT_VF(vfsub_vf_w, 4)
2921GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
2922
2923static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2924{
2925 return float16_sub(b, a, s);
2926}
2927
2928static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2929{
2930 return float32_sub(b, a, s);
2931}
2932
2933static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2934{
2935 return float64_sub(b, a, s);
2936}
2937
2938RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2939RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2940RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 2941GEN_VEXT_VF(vfrsub_vf_h, 2)
2942GEN_VEXT_VF(vfrsub_vf_w, 4)
2943GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
2944
2945/* Vector Widening Floating-Point Add/Subtract Instructions */
2946static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2947{
2948 return float32_add(float16_to_float32(a, true, s),
c45eff30 2949 float16_to_float32(b, true, s), s);
eeffab2e
LZ
2950}
2951
2952static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2953{
2954 return float64_add(float32_to_float64(a, s),
c45eff30 2955 float32_to_float64(b, s), s);
eeffab2e
LZ
2956
2957}
2958
2959RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2960RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 2961GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
2962GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
2963RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2964RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 2965GEN_VEXT_VF(vfwadd_vf_h, 4)
2966GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
2967
2968static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2969{
2970 return float32_sub(float16_to_float32(a, true, s),
c45eff30 2971 float16_to_float32(b, true, s), s);
eeffab2e
LZ
2972}
2973
2974static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2975{
2976 return float64_sub(float32_to_float64(a, s),
c45eff30 2977 float32_to_float64(b, s), s);
eeffab2e
LZ
2978
2979}
2980
2981RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2982RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 2983GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
2984GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
2985RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2986RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 2987GEN_VEXT_VF(vfwsub_vf_h, 4)
2988GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
2989
2990static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2991{
2992 return float32_add(a, float16_to_float32(b, true, s), s);
2993}
2994
2995static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2996{
2997 return float64_add(a, float32_to_float64(b, s), s);
2998}
2999
3000RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3001RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3002GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3003GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3004RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3005RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3006GEN_VEXT_VF(vfwadd_wf_h, 4)
3007GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3008
3009static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3010{
3011 return float32_sub(a, float16_to_float32(b, true, s), s);
3012}
3013
3014static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3015{
3016 return float64_sub(a, float32_to_float64(b, s), s);
3017}
3018
3019RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3020RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3021GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3022GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3023RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3024RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3025GEN_VEXT_VF(vfwsub_wf_h, 4)
3026GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3027
3028/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3029RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3030RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3031RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3032GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3033GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3034GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3035RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3036RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3037RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3038GEN_VEXT_VF(vfmul_vf_h, 2)
3039GEN_VEXT_VF(vfmul_vf_w, 4)
3040GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3041
3042RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3043RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3044RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3045GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3046GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3047GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3048RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3049RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3050RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3051GEN_VEXT_VF(vfdiv_vf_h, 2)
3052GEN_VEXT_VF(vfdiv_vf_w, 4)
3053GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3054
3055static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3056{
3057 return float16_div(b, a, s);
3058}
3059
3060static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3061{
3062 return float32_div(b, a, s);
3063}
3064
3065static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3066{
3067 return float64_div(b, a, s);
3068}
3069
3070RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3071RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3072RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3073GEN_VEXT_VF(vfrdiv_vf_h, 2)
3074GEN_VEXT_VF(vfrdiv_vf_w, 4)
3075GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3076
3077/* Vector Widening Floating-Point Multiply */
3078static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3079{
3080 return float32_mul(float16_to_float32(a, true, s),
c45eff30 3081 float16_to_float32(b, true, s), s);
f7c7b7cd
LZ
3082}
3083
3084static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3085{
3086 return float64_mul(float32_to_float64(a, s),
c45eff30 3087 float32_to_float64(b, s), s);
f7c7b7cd
LZ
3088
3089}
3090RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3091RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3092GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3093GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3094RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3095RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3096GEN_VEXT_VF(vfwmul_vf_h, 4)
3097GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3098
3099/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3100#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3101static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
c45eff30 3102 CPURISCVState *env) \
4aa5a8fe
LZ
3103{ \
3104 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3105 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3106 TD d = *((TD *)vd + HD(i)); \
3107 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3108}
3109
3110static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3111{
3112 return float16_muladd(a, b, d, 0, s);
3113}
3114
3115static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3116{
3117 return float32_muladd(a, b, d, 0, s);
3118}
3119
3120static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3121{
3122 return float64_muladd(a, b, d, 0, s);
3123}
3124
3125RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3126RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3127RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3128GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3129GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3130GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3131
3132#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3133static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
c45eff30 3134 CPURISCVState *env) \
4aa5a8fe
LZ
3135{ \
3136 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3137 TD d = *((TD *)vd + HD(i)); \
3138 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3139}
3140
3141RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3142RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3143RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3144GEN_VEXT_VF(vfmacc_vf_h, 2)
3145GEN_VEXT_VF(vfmacc_vf_w, 4)
3146GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3147
3148static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3149{
c45eff30
WL
3150 return float16_muladd(a, b, d, float_muladd_negate_c |
3151 float_muladd_negate_product, s);
4aa5a8fe
LZ
3152}
3153
3154static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3155{
c45eff30
WL
3156 return float32_muladd(a, b, d, float_muladd_negate_c |
3157 float_muladd_negate_product, s);
4aa5a8fe
LZ
3158}
3159
3160static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3161{
c45eff30
WL
3162 return float64_muladd(a, b, d, float_muladd_negate_c |
3163 float_muladd_negate_product, s);
4aa5a8fe
LZ
3164}
3165
3166RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3167RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3168RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3169GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3170GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3171GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3172RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3173RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3174RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3175GEN_VEXT_VF(vfnmacc_vf_h, 2)
3176GEN_VEXT_VF(vfnmacc_vf_w, 4)
3177GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3178
3179static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3180{
3181 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3182}
3183
3184static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3185{
3186 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3187}
3188
3189static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3190{
3191 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3192}
3193
3194RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3195RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3196RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3197GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3198GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3199GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3200RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3201RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3202RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3203GEN_VEXT_VF(vfmsac_vf_h, 2)
3204GEN_VEXT_VF(vfmsac_vf_w, 4)
3205GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3206
3207static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3208{
3209 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3210}
3211
3212static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3213{
3214 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3215}
3216
3217static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3218{
3219 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3220}
3221
3222RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3223RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3224RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3225GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3226GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3227GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3228RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3229RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3230RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3231GEN_VEXT_VF(vfnmsac_vf_h, 2)
3232GEN_VEXT_VF(vfnmsac_vf_w, 4)
3233GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3234
3235static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3236{
3237 return float16_muladd(d, b, a, 0, s);
3238}
3239
3240static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3241{
3242 return float32_muladd(d, b, a, 0, s);
3243}
3244
3245static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3246{
3247 return float64_muladd(d, b, a, 0, s);
3248}
3249
3250RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3251RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3252RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3253GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3254GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3255GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3256RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3257RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3258RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3259GEN_VEXT_VF(vfmadd_vf_h, 2)
3260GEN_VEXT_VF(vfmadd_vf_w, 4)
3261GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3262
3263static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3264{
c45eff30
WL
3265 return float16_muladd(d, b, a, float_muladd_negate_c |
3266 float_muladd_negate_product, s);
4aa5a8fe
LZ
3267}
3268
3269static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3270{
c45eff30
WL
3271 return float32_muladd(d, b, a, float_muladd_negate_c |
3272 float_muladd_negate_product, s);
4aa5a8fe
LZ
3273}
3274
3275static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3276{
c45eff30
WL
3277 return float64_muladd(d, b, a, float_muladd_negate_c |
3278 float_muladd_negate_product, s);
4aa5a8fe
LZ
3279}
3280
3281RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3282RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3283RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3284GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3285GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3286GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3287RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3288RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3289RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3290GEN_VEXT_VF(vfnmadd_vf_h, 2)
3291GEN_VEXT_VF(vfnmadd_vf_w, 4)
3292GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3293
3294static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3295{
3296 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3297}
3298
3299static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3300{
3301 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3302}
3303
3304static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3305{
3306 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3307}
3308
3309RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3310RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3311RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3312GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3313GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3314GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3315RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3316RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3317RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3318GEN_VEXT_VF(vfmsub_vf_h, 2)
3319GEN_VEXT_VF(vfmsub_vf_w, 4)
3320GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3321
3322static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3323{
3324 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3325}
3326
3327static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3328{
3329 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3330}
3331
3332static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3333{
3334 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3335}
3336
3337RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3338RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3339RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3340GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3341GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3342GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3343RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3344RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3345RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3346GEN_VEXT_VF(vfnmsub_vf_h, 2)
3347GEN_VEXT_VF(vfnmsub_vf_w, 4)
3348GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3349
3350/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3351static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3352{
3353 return float32_muladd(float16_to_float32(a, true, s),
c45eff30 3354 float16_to_float32(b, true, s), d, 0, s);
0dd50959
LZ
3355}
3356
3357static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3358{
3359 return float64_muladd(float32_to_float64(a, s),
c45eff30 3360 float32_to_float64(b, s), d, 0, s);
0dd50959
LZ
3361}
3362
3363RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3364RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3365GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3366GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3367RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3368RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3369GEN_VEXT_VF(vfwmacc_vf_h, 4)
3370GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959 3371
adf772b0
WL
3372static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3373{
3374 return float32_muladd(bfloat16_to_float32(a, s),
3375 bfloat16_to_float32(b, s), d, 0, s);
3376}
3377
3378RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3379GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
837570ce 3380RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
adf772b0
WL
3381GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3382
0dd50959
LZ
3383static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3384{
3385 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3386 float16_to_float32(b, true, s), d,
3387 float_muladd_negate_c | float_muladd_negate_product,
3388 s);
0dd50959
LZ
3389}
3390
3391static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3392{
c45eff30
WL
3393 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3394 d, float_muladd_negate_c |
3395 float_muladd_negate_product, s);
0dd50959
LZ
3396}
3397
3398RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3399RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3400GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3401GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3402RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3403RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3404GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3405GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3406
3407static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3408{
3409 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3410 float16_to_float32(b, true, s), d,
3411 float_muladd_negate_c, s);
0dd50959
LZ
3412}
3413
3414static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3415{
3416 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3417 float32_to_float64(b, s), d,
3418 float_muladd_negate_c, s);
0dd50959
LZ
3419}
3420
3421RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3422RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3423GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3424GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3425RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3426RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3427GEN_VEXT_VF(vfwmsac_vf_h, 4)
3428GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3429
3430static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3431{
3432 return float32_muladd(float16_to_float32(a, true, s),
c45eff30
WL
3433 float16_to_float32(b, true, s), d,
3434 float_muladd_negate_product, s);
0dd50959
LZ
3435}
3436
3437static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3438{
3439 return float64_muladd(float32_to_float64(a, s),
c45eff30
WL
3440 float32_to_float64(b, s), d,
3441 float_muladd_negate_product, s);
0dd50959
LZ
3442}
3443
3444RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3445RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3446GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3447GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3448RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3449RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3450GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3451GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3452
3453/* Vector Floating-Point Square-Root Instruction */
c45eff30 3454#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
d9e4ce72 3455static void do_##NAME(void *vd, void *vs2, int i, \
c45eff30 3456 CPURISCVState *env) \
d9e4ce72
LZ
3457{ \
3458 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3459 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3460}
3461
5eacf7d8 3462#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72 3463void HELPER(NAME)(void *vd, void *v0, void *vs2, \
c45eff30 3464 CPURISCVState *env, uint32_t desc) \
d9e4ce72 3465{ \
d9e4ce72
LZ
3466 uint32_t vm = vext_vm(desc); \
3467 uint32_t vl = env->vl; \
5eacf7d8 3468 uint32_t total_elems = \
3469 vext_get_total_elems(env, desc, ESZ); \
3470 uint32_t vta = vext_vta(desc); \
5b448f44 3471 uint32_t vma = vext_vma(desc); \
d9e4ce72
LZ
3472 uint32_t i; \
3473 \
3474 if (vl == 0) { \
3475 return; \
3476 } \
f714361e 3477 for (i = env->vstart; i < vl; i++) { \
f9298de5 3478 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3479 /* set masked-off elements to 1s */ \
3480 vext_set_elems_1s(vd, vma, i * ESZ, \
3481 (i + 1) * ESZ); \
d9e4ce72
LZ
3482 continue; \
3483 } \
3484 do_##NAME(vd, vs2, i, env); \
3485 } \
f714361e 3486 env->vstart = 0; \
5eacf7d8 3487 vext_set_elems_1s(vd, vta, vl * ESZ, \
3488 total_elems * ESZ); \
d9e4ce72
LZ
3489}
3490
3491RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3492RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3493RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3494GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3495GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3496GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3497
e848a1e5
FC
3498/*
3499 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3500 *
3501 * Adapted from riscv-v-spec recip.c:
3502 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3503 */
3504static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3505{
3506 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3507 uint64_t exp = extract64(f, frac_size, exp_size);
3508 uint64_t frac = extract64(f, 0, frac_size);
3509
3510 const uint8_t lookup_table[] = {
3511 52, 51, 50, 48, 47, 46, 44, 43,
3512 42, 41, 40, 39, 38, 36, 35, 34,
3513 33, 32, 31, 30, 30, 29, 28, 27,
3514 26, 25, 24, 23, 23, 22, 21, 20,
3515 19, 19, 18, 17, 16, 16, 15, 14,
3516 14, 13, 12, 12, 11, 10, 10, 9,
3517 9, 8, 7, 7, 6, 6, 5, 4,
3518 4, 3, 3, 2, 2, 1, 1, 0,
3519 127, 125, 123, 121, 119, 118, 116, 114,
3520 113, 111, 109, 108, 106, 105, 103, 102,
3521 100, 99, 97, 96, 95, 93, 92, 91,
3522 90, 88, 87, 86, 85, 84, 83, 82,
3523 80, 79, 78, 77, 76, 75, 74, 73,
3524 72, 71, 70, 70, 69, 68, 67, 66,
3525 65, 64, 63, 63, 62, 61, 60, 59,
3526 59, 58, 57, 56, 56, 55, 54, 53
3527 };
3528 const int precision = 7;
3529
3530 if (exp == 0 && frac != 0) { /* subnormal */
3531 /* Normalize the subnormal. */
3532 while (extract64(frac, frac_size - 1, 1) == 0) {
3533 exp--;
3534 frac <<= 1;
3535 }
3536
3537 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3538 }
3539
3540 int idx = ((exp & 1) << (precision - 1)) |
c45eff30 3541 (frac >> (frac_size - precision + 1));
e848a1e5 3542 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3543 (frac_size - precision);
e848a1e5
FC
3544 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3545
3546 uint64_t val = 0;
3547 val = deposit64(val, 0, frac_size, out_frac);
3548 val = deposit64(val, frac_size, exp_size, out_exp);
3549 val = deposit64(val, frac_size + exp_size, 1, sign);
3550 return val;
3551}
3552
3553static float16 frsqrt7_h(float16 f, float_status *s)
3554{
3555 int exp_size = 5, frac_size = 10;
3556 bool sign = float16_is_neg(f);
3557
3558 /*
3559 * frsqrt7(sNaN) = canonical NaN
3560 * frsqrt7(-inf) = canonical NaN
3561 * frsqrt7(-normal) = canonical NaN
3562 * frsqrt7(-subnormal) = canonical NaN
3563 */
3564 if (float16_is_signaling_nan(f, s) ||
c45eff30
WL
3565 (float16_is_infinity(f) && sign) ||
3566 (float16_is_normal(f) && sign) ||
3567 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
e848a1e5
FC
3568 s->float_exception_flags |= float_flag_invalid;
3569 return float16_default_nan(s);
3570 }
3571
3572 /* frsqrt7(qNaN) = canonical NaN */
3573 if (float16_is_quiet_nan(f, s)) {
3574 return float16_default_nan(s);
3575 }
3576
3577 /* frsqrt7(+-0) = +-inf */
3578 if (float16_is_zero(f)) {
3579 s->float_exception_flags |= float_flag_divbyzero;
3580 return float16_set_sign(float16_infinity, sign);
3581 }
3582
3583 /* frsqrt7(+inf) = +0 */
3584 if (float16_is_infinity(f) && !sign) {
3585 return float16_set_sign(float16_zero, sign);
3586 }
3587
3588 /* +normal, +subnormal */
3589 uint64_t val = frsqrt7(f, exp_size, frac_size);
3590 return make_float16(val);
3591}
3592
3593static float32 frsqrt7_s(float32 f, float_status *s)
3594{
3595 int exp_size = 8, frac_size = 23;
3596 bool sign = float32_is_neg(f);
3597
3598 /*
3599 * frsqrt7(sNaN) = canonical NaN
3600 * frsqrt7(-inf) = canonical NaN
3601 * frsqrt7(-normal) = canonical NaN
3602 * frsqrt7(-subnormal) = canonical NaN
3603 */
3604 if (float32_is_signaling_nan(f, s) ||
c45eff30
WL
3605 (float32_is_infinity(f) && sign) ||
3606 (float32_is_normal(f) && sign) ||
3607 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
e848a1e5
FC
3608 s->float_exception_flags |= float_flag_invalid;
3609 return float32_default_nan(s);
3610 }
3611
3612 /* frsqrt7(qNaN) = canonical NaN */
3613 if (float32_is_quiet_nan(f, s)) {
3614 return float32_default_nan(s);
3615 }
3616
3617 /* frsqrt7(+-0) = +-inf */
3618 if (float32_is_zero(f)) {
3619 s->float_exception_flags |= float_flag_divbyzero;
3620 return float32_set_sign(float32_infinity, sign);
3621 }
3622
3623 /* frsqrt7(+inf) = +0 */
3624 if (float32_is_infinity(f) && !sign) {
3625 return float32_set_sign(float32_zero, sign);
3626 }
3627
3628 /* +normal, +subnormal */
3629 uint64_t val = frsqrt7(f, exp_size, frac_size);
3630 return make_float32(val);
3631}
3632
3633static float64 frsqrt7_d(float64 f, float_status *s)
3634{
3635 int exp_size = 11, frac_size = 52;
3636 bool sign = float64_is_neg(f);
3637
3638 /*
3639 * frsqrt7(sNaN) = canonical NaN
3640 * frsqrt7(-inf) = canonical NaN
3641 * frsqrt7(-normal) = canonical NaN
3642 * frsqrt7(-subnormal) = canonical NaN
3643 */
3644 if (float64_is_signaling_nan(f, s) ||
c45eff30
WL
3645 (float64_is_infinity(f) && sign) ||
3646 (float64_is_normal(f) && sign) ||
3647 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
e848a1e5
FC
3648 s->float_exception_flags |= float_flag_invalid;
3649 return float64_default_nan(s);
3650 }
3651
3652 /* frsqrt7(qNaN) = canonical NaN */
3653 if (float64_is_quiet_nan(f, s)) {
3654 return float64_default_nan(s);
3655 }
3656
3657 /* frsqrt7(+-0) = +-inf */
3658 if (float64_is_zero(f)) {
3659 s->float_exception_flags |= float_flag_divbyzero;
3660 return float64_set_sign(float64_infinity, sign);
3661 }
3662
3663 /* frsqrt7(+inf) = +0 */
3664 if (float64_is_infinity(f) && !sign) {
3665 return float64_set_sign(float64_zero, sign);
3666 }
3667
3668 /* +normal, +subnormal */
3669 uint64_t val = frsqrt7(f, exp_size, frac_size);
3670 return make_float64(val);
3671}
3672
3673RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3674RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3675RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3676GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3677GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3678GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3679
55c35407
FC
3680/*
3681 * Vector Floating-Point Reciprocal Estimate Instruction
3682 *
3683 * Adapted from riscv-v-spec recip.c:
3684 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3685 */
3686static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3687 float_status *s)
3688{
3689 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3690 uint64_t exp = extract64(f, frac_size, exp_size);
3691 uint64_t frac = extract64(f, 0, frac_size);
3692
3693 const uint8_t lookup_table[] = {
3694 127, 125, 123, 121, 119, 117, 116, 114,
3695 112, 110, 109, 107, 105, 104, 102, 100,
3696 99, 97, 96, 94, 93, 91, 90, 88,
3697 87, 85, 84, 83, 81, 80, 79, 77,
3698 76, 75, 74, 72, 71, 70, 69, 68,
3699 66, 65, 64, 63, 62, 61, 60, 59,
3700 58, 57, 56, 55, 54, 53, 52, 51,
3701 50, 49, 48, 47, 46, 45, 44, 43,
3702 42, 41, 40, 40, 39, 38, 37, 36,
3703 35, 35, 34, 33, 32, 31, 31, 30,
3704 29, 28, 28, 27, 26, 25, 25, 24,
3705 23, 23, 22, 21, 21, 20, 19, 19,
3706 18, 17, 17, 16, 15, 15, 14, 14,
3707 13, 12, 12, 11, 11, 10, 9, 9,
3708 8, 8, 7, 7, 6, 5, 5, 4,
3709 4, 3, 3, 2, 2, 1, 1, 0
3710 };
3711 const int precision = 7;
3712
3713 if (exp == 0 && frac != 0) { /* subnormal */
3714 /* Normalize the subnormal. */
3715 while (extract64(frac, frac_size - 1, 1) == 0) {
3716 exp--;
3717 frac <<= 1;
3718 }
3719
3720 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3721
3722 if (exp != 0 && exp != UINT64_MAX) {
3723 /*
3724 * Overflow to inf or max value of same sign,
3725 * depending on sign and rounding mode.
3726 */
3727 s->float_exception_flags |= (float_flag_inexact |
3728 float_flag_overflow);
3729
3730 if ((s->float_rounding_mode == float_round_to_zero) ||
3731 ((s->float_rounding_mode == float_round_down) && !sign) ||
3732 ((s->float_rounding_mode == float_round_up) && sign)) {
3733 /* Return greatest/negative finite value. */
3734 return (sign << (exp_size + frac_size)) |
c45eff30 3735 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
55c35407
FC
3736 } else {
3737 /* Return +-inf. */
3738 return (sign << (exp_size + frac_size)) |
c45eff30 3739 MAKE_64BIT_MASK(frac_size, exp_size);
55c35407
FC
3740 }
3741 }
3742 }
3743
3744 int idx = frac >> (frac_size - precision);
3745 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
c45eff30 3746 (frac_size - precision);
55c35407
FC
3747 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3748
3749 if (out_exp == 0 || out_exp == UINT64_MAX) {
3750 /*
3751 * The result is subnormal, but don't raise the underflow exception,
3752 * because there's no additional loss of precision.
3753 */
3754 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3755 if (out_exp == UINT64_MAX) {
3756 out_frac >>= 1;
3757 out_exp = 0;
3758 }
3759 }
3760
3761 uint64_t val = 0;
3762 val = deposit64(val, 0, frac_size, out_frac);
3763 val = deposit64(val, frac_size, exp_size, out_exp);
3764 val = deposit64(val, frac_size + exp_size, 1, sign);
3765 return val;
3766}
3767
3768static float16 frec7_h(float16 f, float_status *s)
3769{
3770 int exp_size = 5, frac_size = 10;
3771 bool sign = float16_is_neg(f);
3772
3773 /* frec7(+-inf) = +-0 */
3774 if (float16_is_infinity(f)) {
3775 return float16_set_sign(float16_zero, sign);
3776 }
3777
3778 /* frec7(+-0) = +-inf */
3779 if (float16_is_zero(f)) {
3780 s->float_exception_flags |= float_flag_divbyzero;
3781 return float16_set_sign(float16_infinity, sign);
3782 }
3783
3784 /* frec7(sNaN) = canonical NaN */
3785 if (float16_is_signaling_nan(f, s)) {
3786 s->float_exception_flags |= float_flag_invalid;
3787 return float16_default_nan(s);
3788 }
3789
3790 /* frec7(qNaN) = canonical NaN */
3791 if (float16_is_quiet_nan(f, s)) {
3792 return float16_default_nan(s);
3793 }
3794
3795 /* +-normal, +-subnormal */
3796 uint64_t val = frec7(f, exp_size, frac_size, s);
3797 return make_float16(val);
3798}
3799
3800static float32 frec7_s(float32 f, float_status *s)
3801{
3802 int exp_size = 8, frac_size = 23;
3803 bool sign = float32_is_neg(f);
3804
3805 /* frec7(+-inf) = +-0 */
3806 if (float32_is_infinity(f)) {
3807 return float32_set_sign(float32_zero, sign);
3808 }
3809
3810 /* frec7(+-0) = +-inf */
3811 if (float32_is_zero(f)) {
3812 s->float_exception_flags |= float_flag_divbyzero;
3813 return float32_set_sign(float32_infinity, sign);
3814 }
3815
3816 /* frec7(sNaN) = canonical NaN */
3817 if (float32_is_signaling_nan(f, s)) {
3818 s->float_exception_flags |= float_flag_invalid;
3819 return float32_default_nan(s);
3820 }
3821
3822 /* frec7(qNaN) = canonical NaN */
3823 if (float32_is_quiet_nan(f, s)) {
3824 return float32_default_nan(s);
3825 }
3826
3827 /* +-normal, +-subnormal */
3828 uint64_t val = frec7(f, exp_size, frac_size, s);
3829 return make_float32(val);
3830}
3831
3832static float64 frec7_d(float64 f, float_status *s)
3833{
3834 int exp_size = 11, frac_size = 52;
3835 bool sign = float64_is_neg(f);
3836
3837 /* frec7(+-inf) = +-0 */
3838 if (float64_is_infinity(f)) {
3839 return float64_set_sign(float64_zero, sign);
3840 }
3841
3842 /* frec7(+-0) = +-inf */
3843 if (float64_is_zero(f)) {
3844 s->float_exception_flags |= float_flag_divbyzero;
3845 return float64_set_sign(float64_infinity, sign);
3846 }
3847
3848 /* frec7(sNaN) = canonical NaN */
3849 if (float64_is_signaling_nan(f, s)) {
3850 s->float_exception_flags |= float_flag_invalid;
3851 return float64_default_nan(s);
3852 }
3853
3854 /* frec7(qNaN) = canonical NaN */
3855 if (float64_is_quiet_nan(f, s)) {
3856 return float64_default_nan(s);
3857 }
3858
3859 /* +-normal, +-subnormal */
3860 uint64_t val = frec7(f, exp_size, frac_size, s);
3861 return make_float64(val);
3862}
3863
3864RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3865RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3866RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 3867GEN_VEXT_V_ENV(vfrec7_v_h, 2)
3868GEN_VEXT_V_ENV(vfrec7_v_w, 4)
3869GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 3870
230b53dd 3871/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
3872RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3873RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3874RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 3875GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
3876GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
3877GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
3878RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3879RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3880RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 3881GEN_VEXT_VF(vfmin_vf_h, 2)
3882GEN_VEXT_VF(vfmin_vf_w, 4)
3883GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 3884
49c5611a
FC
3885RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3886RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3887RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 3888GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
3889GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
3890GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
3891RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3892RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3893RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 3894GEN_VEXT_VF(vfmax_vf_h, 2)
3895GEN_VEXT_VF(vfmax_vf_w, 4)
3896GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
3897
3898/* Vector Floating-Point Sign-Injection Instructions */
3899static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3900{
3901 return deposit64(b, 0, 15, a);
3902}
3903
3904static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3905{
3906 return deposit64(b, 0, 31, a);
3907}
3908
3909static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3910{
3911 return deposit64(b, 0, 63, a);
3912}
3913
3914RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3915RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3916RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 3917GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
3918GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
3919GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
3920RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3921RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3922RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 3923GEN_VEXT_VF(vfsgnj_vf_h, 2)
3924GEN_VEXT_VF(vfsgnj_vf_w, 4)
3925GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
3926
3927static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3928{
3929 return deposit64(~b, 0, 15, a);
3930}
3931
3932static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3933{
3934 return deposit64(~b, 0, 31, a);
3935}
3936
3937static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3938{
3939 return deposit64(~b, 0, 63, a);
3940}
3941
3942RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3943RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3944RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 3945GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
3946GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
3947GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
3948RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3949RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3950RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 3951GEN_VEXT_VF(vfsgnjn_vf_h, 2)
3952GEN_VEXT_VF(vfsgnjn_vf_w, 4)
3953GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
3954
3955static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3956{
3957 return deposit64(b ^ a, 0, 15, a);
3958}
3959
3960static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3961{
3962 return deposit64(b ^ a, 0, 31, a);
3963}
3964
3965static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3966{
3967 return deposit64(b ^ a, 0, 63, a);
3968}
3969
3970RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3971RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3972RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 3973GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
3974GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
3975GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
3976RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3977RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3978RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 3979GEN_VEXT_VF(vfsgnjx_vf_h, 2)
3980GEN_VEXT_VF(vfsgnjx_vf_w, 4)
3981GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
3982
3983/* Vector Floating-Point Compare Instructions */
3984#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
3985void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
3986 CPURISCVState *env, uint32_t desc) \
3987{ \
2a68e9e5
LZ
3988 uint32_t vm = vext_vm(desc); \
3989 uint32_t vl = env->vl; \
58bc9063 3990 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5eacf7d8 3991 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 3992 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
3993 uint32_t i; \
3994 \
f714361e 3995 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
3996 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
3997 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 3998 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
3999 /* set masked-off elements to 1s */ \
4000 if (vma) { \
4001 vext_set_elem_mask(vd, i, 1); \
4002 } \
2a68e9e5
LZ
4003 continue; \
4004 } \
f9298de5 4005 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4006 DO_OP(s2, s1, &env->fp_status)); \
4007 } \
f714361e 4008 env->vstart = 0; \
3b57254d
WL
4009 /*
4010 * mask destination register are always tail-agnostic
4011 * set tail elements to 1s
4012 */ \
5eacf7d8 4013 if (vta_all_1s) { \
4014 for (; i < total_elems; i++) { \
4015 vext_set_elem_mask(vd, i, 1); \
4016 } \
4017 } \
2a68e9e5
LZ
4018}
4019
2a68e9e5
LZ
4020GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4021GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4022GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4023
4024#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4025void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4026 CPURISCVState *env, uint32_t desc) \
4027{ \
2a68e9e5
LZ
4028 uint32_t vm = vext_vm(desc); \
4029 uint32_t vl = env->vl; \
58bc9063 4030 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
5eacf7d8 4031 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
5b448f44 4032 uint32_t vma = vext_vma(desc); \
2a68e9e5
LZ
4033 uint32_t i; \
4034 \
f714361e 4035 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4036 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4037 if (!vm && !vext_elem_mask(v0, i)) { \
5b448f44
YTC
4038 /* set masked-off elements to 1s */ \
4039 if (vma) { \
4040 vext_set_elem_mask(vd, i, 1); \
4041 } \
2a68e9e5
LZ
4042 continue; \
4043 } \
f9298de5 4044 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4045 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4046 } \
f714361e 4047 env->vstart = 0; \
3b57254d
WL
4048 /*
4049 * mask destination register are always tail-agnostic
4050 * set tail elements to 1s
4051 */ \
5eacf7d8 4052 if (vta_all_1s) { \
4053 for (; i < total_elems; i++) { \
4054 vext_set_elem_mask(vd, i, 1); \
4055 } \
4056 } \
2a68e9e5
LZ
4057}
4058
4059GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4060GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4061GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4062
4063static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4064{
4065 FloatRelation compare = float16_compare_quiet(a, b, s);
4066 return compare != float_relation_equal;
4067}
4068
4069static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4070{
4071 FloatRelation compare = float32_compare_quiet(a, b, s);
4072 return compare != float_relation_equal;
4073}
4074
4075static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4076{
4077 FloatRelation compare = float64_compare_quiet(a, b, s);
4078 return compare != float_relation_equal;
4079}
4080
4081GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4082GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4083GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4084GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4085GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4086GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4087
2a68e9e5
LZ
4088GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4089GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4090GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4091GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4092GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4093GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4094
2a68e9e5
LZ
4095GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4096GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4097GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4098GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4099GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4100GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4101
4102static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4103{
4104 FloatRelation compare = float16_compare(a, b, s);
4105 return compare == float_relation_greater;
4106}
4107
4108static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4109{
4110 FloatRelation compare = float32_compare(a, b, s);
4111 return compare == float_relation_greater;
4112}
4113
4114static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4115{
4116 FloatRelation compare = float64_compare(a, b, s);
4117 return compare == float_relation_greater;
4118}
4119
4120GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4121GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4122GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4123
4124static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4125{
4126 FloatRelation compare = float16_compare(a, b, s);
4127 return compare == float_relation_greater ||
4128 compare == float_relation_equal;
4129}
4130
4131static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4132{
4133 FloatRelation compare = float32_compare(a, b, s);
4134 return compare == float_relation_greater ||
4135 compare == float_relation_equal;
4136}
4137
4138static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4139{
4140 FloatRelation compare = float64_compare(a, b, s);
4141 return compare == float_relation_greater ||
4142 compare == float_relation_equal;
4143}
4144
4145GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4146GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4147GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4148
121ddbb3 4149/* Vector Floating-Point Classify Instruction */
121ddbb3
LZ
4150target_ulong fclass_h(uint64_t frs1)
4151{
4152 float16 f = frs1;
4153 bool sign = float16_is_neg(f);
4154
4155 if (float16_is_infinity(f)) {
4156 return sign ? 1 << 0 : 1 << 7;
4157 } else if (float16_is_zero(f)) {
4158 return sign ? 1 << 3 : 1 << 4;
4159 } else if (float16_is_zero_or_denormal(f)) {
4160 return sign ? 1 << 2 : 1 << 5;
4161 } else if (float16_is_any_nan(f)) {
4162 float_status s = { }; /* for snan_bit_is_one */
4163 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4164 } else {
4165 return sign ? 1 << 1 : 1 << 6;
4166 }
4167}
4168
4169target_ulong fclass_s(uint64_t frs1)
4170{
4171 float32 f = frs1;
4172 bool sign = float32_is_neg(f);
4173
4174 if (float32_is_infinity(f)) {
4175 return sign ? 1 << 0 : 1 << 7;
4176 } else if (float32_is_zero(f)) {
4177 return sign ? 1 << 3 : 1 << 4;
4178 } else if (float32_is_zero_or_denormal(f)) {
4179 return sign ? 1 << 2 : 1 << 5;
4180 } else if (float32_is_any_nan(f)) {
4181 float_status s = { }; /* for snan_bit_is_one */
4182 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4183 } else {
4184 return sign ? 1 << 1 : 1 << 6;
4185 }
4186}
4187
4188target_ulong fclass_d(uint64_t frs1)
4189{
4190 float64 f = frs1;
4191 bool sign = float64_is_neg(f);
4192
4193 if (float64_is_infinity(f)) {
4194 return sign ? 1 << 0 : 1 << 7;
4195 } else if (float64_is_zero(f)) {
4196 return sign ? 1 << 3 : 1 << 4;
4197 } else if (float64_is_zero_or_denormal(f)) {
4198 return sign ? 1 << 2 : 1 << 5;
4199 } else if (float64_is_any_nan(f)) {
4200 float_status s = { }; /* for snan_bit_is_one */
4201 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4202 } else {
4203 return sign ? 1 << 1 : 1 << 6;
4204 }
4205}
4206
4207RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4208RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4209RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4210GEN_VEXT_V(vfclass_v_h, 2)
4211GEN_VEXT_V(vfclass_v_w, 4)
4212GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4213
4214/* Vector Floating-Point Merge Instruction */
5eacf7d8 4215
3479a814 4216#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4217void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4218 CPURISCVState *env, uint32_t desc) \
4219{ \
64ab5846
LZ
4220 uint32_t vm = vext_vm(desc); \
4221 uint32_t vl = env->vl; \
5eacf7d8 4222 uint32_t esz = sizeof(ETYPE); \
4223 uint32_t total_elems = \
4224 vext_get_total_elems(env, desc, esz); \
4225 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4226 uint32_t i; \
4227 \
f714361e 4228 for (i = env->vstart; i < vl; i++) { \
64ab5846 4229 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
c45eff30
WL
4230 *((ETYPE *)vd + H(i)) = \
4231 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4232 } \
f714361e 4233 env->vstart = 0; \
5eacf7d8 4234 /* set tail elements to 1s */ \
4235 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4236}
4237
3479a814
FC
4238GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4239GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4240GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4241
4242/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4243/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4244RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4245RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4246RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4247GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4248GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4249GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4250
4251/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4252RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4253RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4254RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4255GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4256GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4257GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4258
4259/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4260RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4261RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4262RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4263GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4264GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4265GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4266
4267/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4268RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4269RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4270RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4271GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4272GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4273GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4274
4275/* Widening Floating-Point/Integer Type-Convert Instructions */
4276/* (TD, T2, TX2) */
3ce4c09d 4277#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4278#define WOP_UU_H uint32_t, uint16_t, uint16_t
4279#define WOP_UU_W uint64_t, uint32_t, uint32_t
3b57254d
WL
4280/*
4281 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4282 */
4514b7b1
LZ
4283RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4284RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4285GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4286GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4287
4288/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4289RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4290RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4291GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4292GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1 4293
246f8796
WL
4294/*
4295 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4296 */
3ce4c09d 4297RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4298RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4299RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4300GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4301GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4302GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4303
4304/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4305RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4306RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4307RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4308GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4309GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4310GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4311
4312/*
246f8796 4313 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4514b7b1
LZ
4314 */
4315static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4316{
4317 return float16_to_float32(a, true, s);
4318}
4319
4320RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4321RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4322GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4323GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e 4324
87b27bfc
WL
4325RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4326GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4327
878d406e
LZ
4328/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4329/* (TD, T2, TX2) */
ff679b58 4330#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4331#define NOP_UU_H uint16_t, uint32_t, uint32_t
4332#define NOP_UU_W uint32_t, uint64_t, uint64_t
4333/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4334RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4335RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4336RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4337GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4338GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4339GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4340
4341/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4342RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4343RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4344RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4345GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4346GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4347GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e 4348
246f8796
WL
4349/*
4350 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4351 */
ff679b58
FC
4352RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4353RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4354GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4355GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4356
4357/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4358RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4359RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4360GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4361GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4362
4363/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4364static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4365{
4366 return float32_to_float16(a, true, s);
4367}
4368
ff679b58
FC
4369RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4370RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4371GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4372GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1 4373
87b27bfc
WL
4374RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4375GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4376
fe5c9ab1 4377/*
3b57254d 4378 * Vector Reduction Operations
fe5c9ab1
LZ
4379 */
4380/* Vector Single-Width Integer Reduction Instructions */
3479a814 4381#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1 4382void HELPER(NAME)(void *vd, void *v0, void *vs1, \
c45eff30
WL
4383 void *vs2, CPURISCVState *env, \
4384 uint32_t desc) \
fe5c9ab1 4385{ \
fe5c9ab1
LZ
4386 uint32_t vm = vext_vm(desc); \
4387 uint32_t vl = env->vl; \
df4f52a7 4388 uint32_t esz = sizeof(TD); \
4389 uint32_t vlenb = simd_maxsz(desc); \
4390 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4391 uint32_t i; \
fe5c9ab1
LZ
4392 TD s1 = *((TD *)vs1 + HD(0)); \
4393 \
f714361e 4394 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4395 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4396 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4397 continue; \
4398 } \
4399 s1 = OP(s1, (TD)s2); \
4400 } \
4401 *((TD *)vd + HD(0)) = s1; \
f714361e 4402 env->vstart = 0; \
df4f52a7 4403 /* set tail elements to 1s */ \
4404 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4405}
4406
4407/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4408GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4409GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4410GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4411GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4412
4413/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4414GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4415GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4416GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4417GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4418
4419/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4420GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4421GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4422GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4423GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4424
4425/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4426GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4427GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4428GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4429GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4430
4431/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4432GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4433GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4434GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4435GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4436
4437/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4438GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4439GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4440GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4441GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4442
4443/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4444GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4445GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4446GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4447GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4448
4449/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4450GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4451GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4452GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4453GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4454
4455/* Vector Widening Integer Reduction Instructions */
4456/* signed sum reduction into double-width accumulator */
3479a814
FC
4457GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4458GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4459GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4460
4461/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4462GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4463GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4464GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4465
4466/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4467#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4468void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4469 void *vs2, CPURISCVState *env, \
4470 uint32_t desc) \
4471{ \
523547f1
LZ
4472 uint32_t vm = vext_vm(desc); \
4473 uint32_t vl = env->vl; \
df4f52a7 4474 uint32_t esz = sizeof(TD); \
4475 uint32_t vlenb = simd_maxsz(desc); \
4476 uint32_t vta = vext_vta(desc); \
523547f1 4477 uint32_t i; \
523547f1
LZ
4478 TD s1 = *((TD *)vs1 + HD(0)); \
4479 \
f714361e 4480 for (i = env->vstart; i < vl; i++) { \
523547f1 4481 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4482 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4483 continue; \
4484 } \
4485 s1 = OP(s1, (TD)s2, &env->fp_status); \
4486 } \
4487 *((TD *)vd + HD(0)) = s1; \
f714361e 4488 env->vstart = 0; \
df4f52a7 4489 /* set tail elements to 1s */ \
4490 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4491}
4492
4493/* Unordered sum */
a3ab69f9
YL
4494GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4495GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4496GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4497
4498/* Ordered sum */
4499GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4500GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4501GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4502
4503/* Maximum value */
246f8796
WL
4504GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4505 float16_maximum_number)
4506GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4507 float32_maximum_number)
4508GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4509 float64_maximum_number)
523547f1
LZ
4510
4511/* Minimum value */
246f8796
WL
4512GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4513 float16_minimum_number)
4514GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4515 float32_minimum_number)
4516GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4517 float64_minimum_number)
696b0c26 4518
5bda21c0
YL
4519/* Vector Widening Floating-Point Add Instructions */
4520static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
696b0c26 4521{
5bda21c0 4522 return float32_add(a, float16_to_float32(b, true, s), s);
696b0c26
LZ
4523}
4524
5bda21c0 4525static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
696b0c26 4526{
5bda21c0 4527 return float64_add(a, float32_to_float64(b, s), s);
696b0c26 4528}
c21f34ae 4529
5bda21c0 4530/* Vector Widening Floating-Point Reduction Instructions */
a3ab69f9
YL
4531/* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4532GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4533GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4534GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4535GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
5bda21c0 4536
c21f34ae 4537/*
3b57254d 4538 * Vector Mask Operations
c21f34ae
LZ
4539 */
4540/* Vector Mask-Register Logical Instructions */
4541#define GEN_VEXT_MASK_VV(NAME, OP) \
4542void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4543 void *vs2, CPURISCVState *env, \
4544 uint32_t desc) \
4545{ \
c21f34ae 4546 uint32_t vl = env->vl; \
58bc9063 4547 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
acc6ffd4 4548 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4549 uint32_t i; \
4550 int a, b; \
4551 \
f714361e 4552 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4553 a = vext_elem_mask(vs1, i); \
4554 b = vext_elem_mask(vs2, i); \
4555 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4556 } \
f714361e 4557 env->vstart = 0; \
3b57254d
WL
4558 /*
4559 * mask destination register are always tail-agnostic
4560 * set tail elements to 1s
acc6ffd4 4561 */ \
acc6ffd4 4562 if (vta_all_1s) { \
4563 for (; i < total_elems; i++) { \
4564 vext_set_elem_mask(vd, i, 1); \
4565 } \
4566 } \
c21f34ae
LZ
4567}
4568
4569#define DO_NAND(N, M) (!(N & M))
4570#define DO_ANDNOT(N, M) (N & !M)
4571#define DO_NOR(N, M) (!(N | M))
4572#define DO_ORNOT(N, M) (N | !M)
4573#define DO_XNOR(N, M) (!(N ^ M))
4574
4575GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4576GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4577GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4578GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4579GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4580GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4581GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4582GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4583
0014aa74
FC
4584/* Vector count population in mask vcpop */
4585target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4586 uint32_t desc)
2e88f551
LZ
4587{
4588 target_ulong cnt = 0;
2e88f551
LZ
4589 uint32_t vm = vext_vm(desc);
4590 uint32_t vl = env->vl;
4591 int i;
4592
f714361e 4593 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4594 if (vm || vext_elem_mask(v0, i)) {
4595 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4596 cnt++;
4597 }
4598 }
4599 }
f714361e 4600 env->vstart = 0;
2e88f551
LZ
4601 return cnt;
4602}
0db67e1c 4603
3b57254d 4604/* vfirst find-first-set mask bit */
d71a24fc
FC
4605target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4606 uint32_t desc)
0db67e1c 4607{
0db67e1c
LZ
4608 uint32_t vm = vext_vm(desc);
4609 uint32_t vl = env->vl;
4610 int i;
4611
f714361e 4612 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4613 if (vm || vext_elem_mask(v0, i)) {
4614 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4615 return i;
4616 }
4617 }
4618 }
f714361e 4619 env->vstart = 0;
0db67e1c
LZ
4620 return -1LL;
4621}
81fbf7da
LZ
4622
4623enum set_mask_type {
4624 ONLY_FIRST = 1,
4625 INCLUDE_FIRST,
4626 BEFORE_FIRST,
4627};
4628
4629static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4630 uint32_t desc, enum set_mask_type type)
4631{
81fbf7da
LZ
4632 uint32_t vm = vext_vm(desc);
4633 uint32_t vl = env->vl;
58bc9063 4634 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
acc6ffd4 4635 uint32_t vta_all_1s = vext_vta_all_1s(desc);
35f2d795 4636 uint32_t vma = vext_vma(desc);
81fbf7da
LZ
4637 int i;
4638 bool first_mask_bit = false;
4639
f714361e 4640 for (i = env->vstart; i < vl; i++) {
f9298de5 4641 if (!vm && !vext_elem_mask(v0, i)) {
35f2d795
YTC
4642 /* set masked-off elements to 1s */
4643 if (vma) {
4644 vext_set_elem_mask(vd, i, 1);
4645 }
81fbf7da
LZ
4646 continue;
4647 }
4648 /* write a zero to all following active elements */
4649 if (first_mask_bit) {
f9298de5 4650 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4651 continue;
4652 }
f9298de5 4653 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4654 first_mask_bit = true;
4655 if (type == BEFORE_FIRST) {
f9298de5 4656 vext_set_elem_mask(vd, i, 0);
81fbf7da 4657 } else {
f9298de5 4658 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4659 }
4660 } else {
4661 if (type == ONLY_FIRST) {
f9298de5 4662 vext_set_elem_mask(vd, i, 0);
81fbf7da 4663 } else {
f9298de5 4664 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4665 }
4666 }
4667 }
f714361e 4668 env->vstart = 0;
3b57254d
WL
4669 /*
4670 * mask destination register are always tail-agnostic
4671 * set tail elements to 1s
4672 */
acc6ffd4 4673 if (vta_all_1s) {
4674 for (; i < total_elems; i++) {
4675 vext_set_elem_mask(vd, i, 1);
4676 }
4677 }
81fbf7da
LZ
4678}
4679
4680void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4681 uint32_t desc)
4682{
4683 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4684}
4685
4686void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4687 uint32_t desc)
4688{
4689 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4690}
4691
4692void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4693 uint32_t desc)
4694{
4695 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4696}
78d90cfe
LZ
4697
4698/* Vector Iota Instruction */
3479a814 4699#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4700void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4701 uint32_t desc) \
4702{ \
78d90cfe
LZ
4703 uint32_t vm = vext_vm(desc); \
4704 uint32_t vl = env->vl; \
acc6ffd4 4705 uint32_t esz = sizeof(ETYPE); \
4706 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4707 uint32_t vta = vext_vta(desc); \
35f2d795 4708 uint32_t vma = vext_vma(desc); \
78d90cfe
LZ
4709 uint32_t sum = 0; \
4710 int i; \
4711 \
f714361e 4712 for (i = env->vstart; i < vl; i++) { \
f9298de5 4713 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4714 /* set masked-off elements to 1s */ \
4715 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
78d90cfe
LZ
4716 continue; \
4717 } \
4718 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4719 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4720 sum++; \
4721 } \
4722 } \
f714361e 4723 env->vstart = 0; \
acc6ffd4 4724 /* set tail elements to 1s */ \
4725 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4726}
4727
3479a814
FC
4728GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4729GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4730GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4731GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4732
4733/* Vector Element Index Instruction */
3479a814 4734#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4735void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4736{ \
126bec3f
LZ
4737 uint32_t vm = vext_vm(desc); \
4738 uint32_t vl = env->vl; \
acc6ffd4 4739 uint32_t esz = sizeof(ETYPE); \
4740 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4741 uint32_t vta = vext_vta(desc); \
35f2d795 4742 uint32_t vma = vext_vma(desc); \
126bec3f
LZ
4743 int i; \
4744 \
f714361e 4745 for (i = env->vstart; i < vl; i++) { \
f9298de5 4746 if (!vm && !vext_elem_mask(v0, i)) { \
35f2d795
YTC
4747 /* set masked-off elements to 1s */ \
4748 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
126bec3f
LZ
4749 continue; \
4750 } \
4751 *((ETYPE *)vd + H(i)) = i; \
4752 } \
f714361e 4753 env->vstart = 0; \
acc6ffd4 4754 /* set tail elements to 1s */ \
4755 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4756}
4757
3479a814
FC
4758GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4759GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4760GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4761GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4762
4763/*
3b57254d 4764 * Vector Permutation Instructions
ec17e036
LZ
4765 */
4766
4767/* Vector Slide Instructions */
3479a814 4768#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4769void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4770 CPURISCVState *env, uint32_t desc) \
4771{ \
ec17e036
LZ
4772 uint32_t vm = vext_vm(desc); \
4773 uint32_t vl = env->vl; \
803963f7 4774 uint32_t esz = sizeof(ETYPE); \
4775 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4776 uint32_t vta = vext_vta(desc); \
edabcd0e 4777 uint32_t vma = vext_vma(desc); \
f714361e 4778 target_ulong offset = s1, i_min, i; \
ec17e036 4779 \
f714361e
FC
4780 i_min = MAX(env->vstart, offset); \
4781 for (i = i_min; i < vl; i++) { \
f9298de5 4782 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4783 /* set masked-off elements to 1s */ \
4784 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
ec17e036
LZ
4785 continue; \
4786 } \
4787 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4788 } \
d3646e31 4789 env->vstart = 0; \
803963f7 4790 /* set tail elements to 1s */ \
4791 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4792}
4793
4794/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4795GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4796GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4797GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4798GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4799
3479a814 4800#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4801void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4802 CPURISCVState *env, uint32_t desc) \
4803{ \
6438ed61 4804 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4805 uint32_t vm = vext_vm(desc); \
4806 uint32_t vl = env->vl; \
803963f7 4807 uint32_t esz = sizeof(ETYPE); \
4808 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4809 uint32_t vta = vext_vta(desc); \
edabcd0e 4810 uint32_t vma = vext_vma(desc); \
f3f65c40 4811 target_ulong i_max, i_min, i; \
ec17e036 4812 \
f3f65c40
AF
4813 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
4814 i_max = MAX(i_min, env->vstart); \
f714361e 4815 for (i = env->vstart; i < i_max; ++i) { \
edabcd0e
YTC
4816 if (!vm && !vext_elem_mask(v0, i)) { \
4817 /* set masked-off elements to 1s */ \
4818 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4819 continue; \
6438ed61 4820 } \
edabcd0e 4821 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
6438ed61
FC
4822 } \
4823 \
4824 for (i = i_max; i < vl; ++i) { \
4825 if (vm || vext_elem_mask(v0, i)) { \
4826 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4827 } \
ec17e036 4828 } \
f714361e
FC
4829 \
4830 env->vstart = 0; \
803963f7 4831 /* set tail elements to 1s */ \
4832 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4833}
4834
4835/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4836GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4837GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4838GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4839GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4840
c7b8a421 4841#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
8c89d50c 4842static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
4843 void *vs2, CPURISCVState *env, \
4844 uint32_t desc) \
8500d4ab 4845{ \
c7b8a421 4846 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4847 uint32_t vm = vext_vm(desc); \
4848 uint32_t vl = env->vl; \
803963f7 4849 uint32_t esz = sizeof(ETYPE); \
4850 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4851 uint32_t vta = vext_vta(desc); \
edabcd0e 4852 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
4853 uint32_t i; \
4854 \
f714361e 4855 for (i = env->vstart; i < vl; i++) { \
8500d4ab 4856 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4857 /* set masked-off elements to 1s */ \
4858 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
4859 continue; \
4860 } \
4861 if (i == 0) { \
4862 *((ETYPE *)vd + H(i)) = s1; \
4863 } else { \
4864 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4865 } \
4866 } \
f714361e 4867 env->vstart = 0; \
803963f7 4868 /* set tail elements to 1s */ \
4869 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
4870}
4871
4872GEN_VEXT_VSLIE1UP(8, H1)
4873GEN_VEXT_VSLIE1UP(16, H2)
4874GEN_VEXT_VSLIE1UP(32, H4)
4875GEN_VEXT_VSLIE1UP(64, H8)
4876
c7b8a421 4877#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
4878void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4879 CPURISCVState *env, uint32_t desc) \
4880{ \
c7b8a421 4881 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4882}
4883
4884/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
4885GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4886GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4887GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4888GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4889
c7b8a421 4890#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
8c89d50c 4891static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
c45eff30
WL
4892 void *vs2, CPURISCVState *env, \
4893 uint32_t desc) \
8500d4ab 4894{ \
c7b8a421 4895 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4896 uint32_t vm = vext_vm(desc); \
4897 uint32_t vl = env->vl; \
803963f7 4898 uint32_t esz = sizeof(ETYPE); \
4899 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4900 uint32_t vta = vext_vta(desc); \
edabcd0e 4901 uint32_t vma = vext_vma(desc); \
8500d4ab
FC
4902 uint32_t i; \
4903 \
f714361e 4904 for (i = env->vstart; i < vl; i++) { \
8500d4ab 4905 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4906 /* set masked-off elements to 1s */ \
4907 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
8500d4ab
FC
4908 continue; \
4909 } \
4910 if (i == vl - 1) { \
4911 *((ETYPE *)vd + H(i)) = s1; \
4912 } else { \
4913 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4914 } \
4915 } \
f714361e 4916 env->vstart = 0; \
803963f7 4917 /* set tail elements to 1s */ \
4918 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
4919}
4920
4921GEN_VEXT_VSLIDE1DOWN(8, H1)
4922GEN_VEXT_VSLIDE1DOWN(16, H2)
4923GEN_VEXT_VSLIDE1DOWN(32, H4)
4924GEN_VEXT_VSLIDE1DOWN(64, H8)
4925
c7b8a421 4926#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
4927void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4928 CPURISCVState *env, uint32_t desc) \
4929{ \
c7b8a421 4930 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4931}
4932
4933/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
4934GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4935GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4936GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4937GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4938
4939/* Vector Floating-Point Slide Instructions */
c7b8a421 4940#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
4941void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4942 CPURISCVState *env, uint32_t desc) \
4943{ \
c7b8a421 4944 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
4945}
4946
4947/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4948GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4949GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4950GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4951
c7b8a421 4952#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
4953void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4954 CPURISCVState *env, uint32_t desc) \
4955{ \
c7b8a421 4956 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
4957}
4958
4959/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4960GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4961GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4962GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
4963
4964/* Vector Register Gather Instruction */
50bfb45b 4965#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
4966void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4967 CPURISCVState *env, uint32_t desc) \
4968{ \
f714361e 4969 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
4970 uint32_t vm = vext_vm(desc); \
4971 uint32_t vl = env->vl; \
803963f7 4972 uint32_t esz = sizeof(TS2); \
4973 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4974 uint32_t vta = vext_vta(desc); \
edabcd0e 4975 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
4976 uint64_t index; \
4977 uint32_t i; \
e4b83d5c 4978 \
f714361e 4979 for (i = env->vstart; i < vl; i++) { \
f9298de5 4980 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
4981 /* set masked-off elements to 1s */ \
4982 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
4983 continue; \
4984 } \
50bfb45b 4985 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 4986 if (index >= vlmax) { \
50bfb45b 4987 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 4988 } else { \
50bfb45b 4989 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
4990 } \
4991 } \
f714361e 4992 env->vstart = 0; \
803963f7 4993 /* set tail elements to 1s */ \
4994 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
4995}
4996
4997/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
4998GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4999GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5000GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5001GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5002
5003GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5004GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5005GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5006GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5007
3479a814 5008#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5009void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5010 CPURISCVState *env, uint32_t desc) \
5011{ \
5a9f8e15 5012 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5013 uint32_t vm = vext_vm(desc); \
5014 uint32_t vl = env->vl; \
803963f7 5015 uint32_t esz = sizeof(ETYPE); \
5016 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5017 uint32_t vta = vext_vta(desc); \
edabcd0e 5018 uint32_t vma = vext_vma(desc); \
b11e84b8
FC
5019 uint64_t index = s1; \
5020 uint32_t i; \
e4b83d5c 5021 \
f714361e 5022 for (i = env->vstart; i < vl; i++) { \
f9298de5 5023 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5024 /* set masked-off elements to 1s */ \
5025 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
e4b83d5c
LZ
5026 continue; \
5027 } \
5028 if (index >= vlmax) { \
5029 *((ETYPE *)vd + H(i)) = 0; \
5030 } else { \
5031 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5032 } \
5033 } \
f714361e 5034 env->vstart = 0; \
803963f7 5035 /* set tail elements to 1s */ \
5036 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5037}
5038
5039/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5040GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5041GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5042GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5043GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5044
5045/* Vector Compress Instruction */
3479a814 5046#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5047void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5048 CPURISCVState *env, uint32_t desc) \
5049{ \
31bf42a2 5050 uint32_t vl = env->vl; \
803963f7 5051 uint32_t esz = sizeof(ETYPE); \
5052 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5053 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5054 uint32_t num = 0, i; \
5055 \
f714361e 5056 for (i = env->vstart; i < vl; i++) { \
f9298de5 5057 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5058 continue; \
5059 } \
5060 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5061 num++; \
5062 } \
f714361e 5063 env->vstart = 0; \
803963f7 5064 /* set tail elements to 1s */ \
5065 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5066}
5067
5068/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5069GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5070GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5071GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5072GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5073
f714361e 5074/* Vector Whole Register Move */
f32d82f6
WL
5075void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5076{
f06193c4 5077 /* EEW = SEW */
f32d82f6 5078 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5079 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5080 uint32_t startb = env->vstart * sewb;
5081 uint32_t i = startb;
f32d82f6 5082
7e53e3dd
DHB
5083 if (startb >= maxsz) {
5084 env->vstart = 0;
5085 return;
5086 }
5087
768e7b32
DHB
5088 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5089 uint32_t j = ROUND_UP(i, 8);
5090 memcpy((uint8_t *)vd + H1(j - 1),
5091 (uint8_t *)vs2 + H1(j - 1),
5092 j - i);
5093 i = j;
5094 }
5095
f32d82f6
WL
5096 memcpy((uint8_t *)vd + H1(i),
5097 (uint8_t *)vs2 + H1(i),
768e7b32 5098 maxsz - i);
f714361e 5099
f32d82f6
WL
5100 env->vstart = 0;
5101}
f714361e 5102
cd01340e
FC
5103/* Vector Integer Extension */
5104#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5105void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5106 CPURISCVState *env, uint32_t desc) \
5107{ \
5108 uint32_t vl = env->vl; \
5109 uint32_t vm = vext_vm(desc); \
803963f7 5110 uint32_t esz = sizeof(ETYPE); \
5111 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5112 uint32_t vta = vext_vta(desc); \
edabcd0e 5113 uint32_t vma = vext_vma(desc); \
cd01340e
FC
5114 uint32_t i; \
5115 \
f714361e 5116 for (i = env->vstart; i < vl; i++) { \
cd01340e 5117 if (!vm && !vext_elem_mask(v0, i)) { \
edabcd0e
YTC
5118 /* set masked-off elements to 1s */ \
5119 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
cd01340e
FC
5120 continue; \
5121 } \
5122 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5123 } \
f714361e 5124 env->vstart = 0; \
803963f7 5125 /* set tail elements to 1s */ \
5126 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5127}
5128
5129GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5130GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5131GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5132GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5133GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5134GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5135
5136GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5137GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5138GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5139GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5140GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5141GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)