]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
docs: List kvm as a supported accelerator on RISC-V
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc
LZ
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
ce2a0343 26#include "fpu/softfloat.h"
751538d5
LZ
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
2b7168fc
LZ
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 44
d9b7609a
FC
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
2b7168fc 57 /* only set vill bit. */
d96a271a
LZ
58 env->vill = 1;
59 env->vtype = 0;
2b7168fc
LZ
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
ac6bcf4d 74 env->vill = 0;
2b7168fc
LZ
75 return vl;
76}
751538d5
LZ
77
78/*
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
81 */
e03b5686 82#if HOST_BIG_ENDIAN
751538d5
LZ
83#define H1(x) ((x) ^ 7)
84#define H1_2(x) ((x) ^ 6)
85#define H1_4(x) ((x) ^ 4)
86#define H2(x) ((x) ^ 3)
87#define H4(x) ((x) ^ 1)
88#define H8(x) ((x))
89#else
90#define H1(x) (x)
91#define H1_2(x) (x)
92#define H1_4(x) (x)
93#define H2(x) (x)
94#define H4(x) (x)
95#define H8(x) (x)
96#endif
97
98static inline uint32_t vext_nf(uint32_t desc)
99{
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101}
102
751538d5
LZ
103static inline uint32_t vext_vm(uint32_t desc)
104{
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106}
107
33f1beaf
FC
108/*
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
119 */
120static inline int32_t vext_lmul(uint32_t desc)
751538d5 121{
33f1beaf 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
123}
124
f1eed927 125static inline uint32_t vext_vta(uint32_t desc)
126{
127 return FIELD_EX32(simd_data(desc), VDATA, VTA);
128}
129
5c19fc15 130static inline uint32_t vext_vta_all_1s(uint32_t desc)
131{
132 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
133}
134
751538d5 135/*
5a9f8e15 136 * Get the maximum number of elements can be operated.
751538d5 137 *
c7b8a421 138 * log2_esz: log2 of element size in bytes.
751538d5 139 */
c7b8a421 140static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 141{
5a9f8e15 142 /*
8a4b5257 143 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
144 * so vlen in bytes (vlenb) is encoded as maxsz.
145 */
146 uint32_t vlenb = simd_maxsz(desc);
147
148 /* Return VLMAX */
c7b8a421 149 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 150 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
151}
152
f1eed927 153/*
154 * Get number of total elements, including prestart, body and tail elements.
155 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
156 * are held in the same vector register.
157 */
158static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
159 uint32_t esz)
160{
161 uint32_t vlenb = simd_maxsz(desc);
162 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
163 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
164 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
165 return (vlenb << emul) / esz;
166}
167
d6b9d930
LZ
168static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
169{
170 return (addr & env->cur_pmmask) | env->cur_pmbase;
171}
172
751538d5
LZ
173/*
174 * This function checks watchpoint before real load operation.
175 *
176 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
177 * In user mode, there is no watchpoint support now.
178 *
179 * It will trigger an exception if there is no mapping in TLB
180 * and page table walk can't fill the TLB entry. Then the guest
181 * software can return here after process the exception or never return.
182 */
183static void probe_pages(CPURISCVState *env, target_ulong addr,
184 target_ulong len, uintptr_t ra,
185 MMUAccessType access_type)
186{
187 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
188 target_ulong curlen = MIN(pagelen, len);
189
d6b9d930 190 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
191 cpu_mmu_index(env, false), ra);
192 if (len > curlen) {
193 addr += curlen;
194 curlen = len - curlen;
d6b9d930 195 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
196 cpu_mmu_index(env, false), ra);
197 }
198}
199
f1eed927 200/* set agnostic elements to 1s */
201static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
202 uint32_t tot)
203{
204 if (is_agnostic == 0) {
205 /* policy undisturbed */
206 return;
207 }
208 if (tot - cnt == 0) {
209 return ;
210 }
211 memset(base + cnt, -1, tot - cnt);
212}
213
f9298de5
FC
214static inline void vext_set_elem_mask(void *v0, int index,
215 uint8_t value)
3a6f8f68 216{
f9298de5
FC
217 int idx = index / 64;
218 int pos = index % 64;
3a6f8f68 219 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 220 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 221}
751538d5 222
f9298de5
FC
223/*
224 * Earlier designs (pre-0.9) had a varying number of bits
225 * per mask value (MLEN). In the 0.9 design, MLEN=1.
226 * (Section 4.5)
227 */
228static inline int vext_elem_mask(void *v0, int index)
751538d5 229{
f9298de5
FC
230 int idx = index / 64;
231 int pos = index % 64;
751538d5
LZ
232 return (((uint64_t *)v0)[idx] >> pos) & 1;
233}
234
235/* elements operations for load and store */
236typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
237 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 238
79556fb6 239#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
240static void NAME(CPURISCVState *env, abi_ptr addr, \
241 uint32_t idx, void *vd, uintptr_t retaddr)\
242{ \
751538d5 243 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 244 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
245} \
246
79556fb6
FC
247GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
248GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
249GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
250GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
251
252#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
253static void NAME(CPURISCVState *env, abi_ptr addr, \
254 uint32_t idx, void *vd, uintptr_t retaddr)\
255{ \
256 ETYPE data = *((ETYPE *)vd + H(idx)); \
257 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
258}
259
751538d5
LZ
260GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
261GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
262GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
263GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
264
265/*
266 *** stride: access vector element from strided memory
267 */
268static void
269vext_ldst_stride(void *vd, void *v0, target_ulong base,
270 target_ulong stride, CPURISCVState *env,
271 uint32_t desc, uint32_t vm,
3479a814 272 vext_ldst_elem_fn *ldst_elem,
c7b8a421 273 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
274{
275 uint32_t i, k;
276 uint32_t nf = vext_nf(desc);
c7b8a421 277 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 278 uint32_t esz = 1 << log2_esz;
279 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
280 uint32_t vta = vext_vta(desc);
751538d5 281
f714361e 282 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f9298de5 283 if (!vm && !vext_elem_mask(v0, i)) {
751538d5
LZ
284 continue;
285 }
f714361e 286
751538d5 287 k = 0;
751538d5 288 while (k < nf) {
c7b8a421 289 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 290 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
291 k++;
292 }
293 }
f714361e 294 env->vstart = 0;
752614ca 295 /* set tail elements to 1s */
296 for (k = 0; k < nf; ++k) {
297 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
298 (k * max_elems + max_elems) * esz);
299 }
300 if (nf * max_elems % total_elems != 0) {
301 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
302 uint32_t registers_used =
303 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
304 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
305 registers_used * vlenb);
306 }
751538d5
LZ
307}
308
79556fb6 309#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
310void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
311 target_ulong stride, CPURISCVState *env, \
312 uint32_t desc) \
313{ \
314 uint32_t vm = vext_vm(desc); \
315 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 316 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
317}
318
79556fb6
FC
319GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
320GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
321GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
322GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
323
324#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
325void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
326 target_ulong stride, CPURISCVState *env, \
327 uint32_t desc) \
328{ \
329 uint32_t vm = vext_vm(desc); \
330 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 331 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
332}
333
79556fb6
FC
334GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
335GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
336GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
337GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
338
339/*
340 *** unit-stride: access elements stored contiguously in memory
341 */
342
343/* unmasked unit-stride load and store operation*/
344static void
345vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 346 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 347 uintptr_t ra)
751538d5
LZ
348{
349 uint32_t i, k;
350 uint32_t nf = vext_nf(desc);
c7b8a421 351 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 352 uint32_t esz = 1 << log2_esz;
353 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
354 uint32_t vta = vext_vta(desc);
751538d5 355
751538d5 356 /* load bytes from guest memory */
5c89e9c0 357 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
358 k = 0;
359 while (k < nf) {
c7b8a421 360 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 361 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
362 k++;
363 }
364 }
f714361e 365 env->vstart = 0;
752614ca 366 /* set tail elements to 1s */
367 for (k = 0; k < nf; ++k) {
368 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz,
369 (k * max_elems + max_elems) * esz);
370 }
371 if (nf * max_elems % total_elems != 0) {
372 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
373 uint32_t registers_used =
374 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
375 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
376 registers_used * vlenb);
377 }
751538d5
LZ
378}
379
380/*
381 * masked unit-stride load and store operation will be a special case of stride,
382 * stride = NF * sizeof (MTYPE)
383 */
384
79556fb6 385#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
386void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
387 CPURISCVState *env, uint32_t desc) \
388{ \
5a9f8e15 389 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 390 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 391 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
392} \
393 \
394void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
395 CPURISCVState *env, uint32_t desc) \
396{ \
3479a814 397 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 398 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
399}
400
79556fb6
FC
401GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
402GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
403GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
404GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
405
5c89e9c0
FC
406#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
407void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
408 CPURISCVState *env, uint32_t desc) \
409{ \
410 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
411 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 412 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
413} \
414 \
415void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
416 CPURISCVState *env, uint32_t desc) \
417{ \
418 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 419 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
420}
421
79556fb6
FC
422GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
423GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
424GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
425GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 426
26086aea
FC
427/*
428 *** unit stride mask load and store, EEW = 1
429 */
430void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
431 CPURISCVState *env, uint32_t desc)
432{
433 /* evl = ceil(vl/8) */
434 uint8_t evl = (env->vl + 7) >> 3;
435 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 436 0, evl, GETPC());
26086aea
FC
437}
438
439void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
440 CPURISCVState *env, uint32_t desc)
441{
442 /* evl = ceil(vl/8) */
443 uint8_t evl = (env->vl + 7) >> 3;
444 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 445 0, evl, GETPC());
26086aea
FC
446}
447
f732560e
LZ
448/*
449 *** index: access vector element from indexed memory
450 */
451typedef target_ulong vext_get_index_addr(target_ulong base,
452 uint32_t idx, void *vs2);
453
454#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
455static target_ulong NAME(target_ulong base, \
456 uint32_t idx, void *vs2) \
457{ \
458 return (base + *((ETYPE *)vs2 + H(idx))); \
459}
460
83fcd573
FC
461GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
462GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
463GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
464GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
465
466static inline void
467vext_ldst_index(void *vd, void *v0, target_ulong base,
468 void *vs2, CPURISCVState *env, uint32_t desc,
469 vext_get_index_addr get_index_addr,
470 vext_ldst_elem_fn *ldst_elem,
c7b8a421 471 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
472{
473 uint32_t i, k;
474 uint32_t nf = vext_nf(desc);
475 uint32_t vm = vext_vm(desc);
c7b8a421 476 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 477 uint32_t esz = 1 << log2_esz;
478 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
479 uint32_t vta = vext_vta(desc);
f732560e 480
f732560e 481 /* load bytes from guest memory */
f714361e 482 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f9298de5 483 if (!vm && !vext_elem_mask(v0, i)) {
f732560e
LZ
484 continue;
485 }
f714361e
FC
486
487 k = 0;
f732560e 488 while (k < nf) {
c7b8a421 489 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 490 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
491 k++;
492 }
493 }
f714361e 494 env->vstart = 0;
752614ca 495 /* set tail elements to 1s */
496 for (k = 0; k < nf; ++k) {
497 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
498 (k * max_elems + max_elems) * esz);
499 }
500 if (nf * max_elems % total_elems != 0) {
501 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
502 uint32_t registers_used =
503 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
504 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
505 registers_used * vlenb);
506 }
f732560e
LZ
507}
508
08b9d0ed 509#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
510void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
511 void *vs2, CPURISCVState *env, uint32_t desc) \
512{ \
513 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 514 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
515}
516
08b9d0ed
FC
517GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
518GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
519GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
520GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
521GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
522GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
523GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
524GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
525GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
526GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
527GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
528GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
529GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
530GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
531GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
532GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
533
534#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
535void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
536 void *vs2, CPURISCVState *env, uint32_t desc) \
537{ \
538 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 539 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 540 GETPC()); \
f732560e
LZ
541}
542
08b9d0ed
FC
543GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
544GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
545GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
546GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
547GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
548GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
549GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
550GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
551GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
552GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
553GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
554GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
555GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
556GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
557GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
558GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
559
560/*
561 *** unit-stride fault-only-fisrt load instructions
562 */
563static inline void
564vext_ldff(void *vd, void *v0, target_ulong base,
565 CPURISCVState *env, uint32_t desc,
566 vext_ldst_elem_fn *ldst_elem,
c7b8a421 567 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
568{
569 void *host;
570 uint32_t i, k, vl = 0;
022b4ecf
LZ
571 uint32_t nf = vext_nf(desc);
572 uint32_t vm = vext_vm(desc);
c7b8a421 573 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 574 uint32_t esz = 1 << log2_esz;
575 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
576 uint32_t vta = vext_vta(desc);
022b4ecf
LZ
577 target_ulong addr, offset, remain;
578
579 /* probe every access*/
f714361e 580 for (i = env->vstart; i < env->vl; i++) {
f9298de5 581 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
582 continue;
583 }
c7b8a421 584 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 585 if (i == 0) {
c7b8a421 586 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
587 } else {
588 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 589 remain = nf << log2_esz;
022b4ecf
LZ
590 while (remain > 0) {
591 offset = -(addr | TARGET_PAGE_MASK);
592 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
593 cpu_mmu_index(env, false));
594 if (host) {
595#ifdef CONFIG_USER_ONLY
01d09525 596 if (page_check_range(addr, offset, PAGE_READ) < 0) {
022b4ecf
LZ
597 vl = i;
598 goto ProbeSuccess;
599 }
600#else
01d09525 601 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
602#endif
603 } else {
604 vl = i;
605 goto ProbeSuccess;
606 }
607 if (remain <= offset) {
608 break;
609 }
610 remain -= offset;
d6b9d930 611 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
612 }
613 }
614 }
615ProbeSuccess:
616 /* load bytes from guest memory */
617 if (vl != 0) {
618 env->vl = vl;
619 }
f714361e 620 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 621 k = 0;
f9298de5 622 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
623 continue;
624 }
625 while (k < nf) {
c7b8a421 626 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 627 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
628 k++;
629 }
630 }
f714361e 631 env->vstart = 0;
752614ca 632 /* set tail elements to 1s */
633 for (k = 0; k < nf; ++k) {
634 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
635 (k * max_elems + max_elems) * esz);
636 }
637 if (nf * max_elems % total_elems != 0) {
638 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
639 uint32_t registers_used =
640 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
641 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
642 registers_used * vlenb);
643 }
022b4ecf
LZ
644}
645
d3e5e2ff
FC
646#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
647void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
648 CPURISCVState *env, uint32_t desc) \
649{ \
650 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 651 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
652}
653
d3e5e2ff
FC
654GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
655GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
656GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
657GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 658
268fcca6
LZ
659#define DO_SWAP(N, M) (M)
660#define DO_AND(N, M) (N & M)
661#define DO_XOR(N, M) (N ^ M)
662#define DO_OR(N, M) (N | M)
663#define DO_ADD(N, M) (N + M)
664
268fcca6
LZ
665/* Signed min/max */
666#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
667#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
668
669/* Unsigned min/max */
670#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
671#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
672
30206bd8
FC
673/*
674 *** load and store whole register instructions
675 */
676static void
677vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 678 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 679{
f714361e 680 uint32_t i, k, off, pos;
30206bd8
FC
681 uint32_t nf = vext_nf(desc);
682 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
c7b8a421 683 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 684
f714361e
FC
685 k = env->vstart / max_elems;
686 off = env->vstart % max_elems;
30206bd8 687
f714361e
FC
688 if (off) {
689 /* load/store rest of elements of current segment pointed by vstart */
690 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 691 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
d6b9d930 692 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
f714361e
FC
693 }
694 k++;
695 }
696
697 /* load/store elements for rest of segments */
698 for (; k < nf; k++) {
699 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 700 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 701 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
702 }
703 }
f714361e
FC
704
705 env->vstart = 0;
30206bd8
FC
706}
707
708#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
709void HELPER(NAME)(void *vd, target_ulong base, \
710 CPURISCVState *env, uint32_t desc) \
711{ \
712 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 713 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
714}
715
716GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
717GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
718GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
719GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
720GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
721GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
722GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
723GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
724GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
725GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
726GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
727GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
728GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
729GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
730GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
731GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
732
733#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
734void HELPER(NAME)(void *vd, target_ulong base, \
735 CPURISCVState *env, uint32_t desc) \
736{ \
737 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 738 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
739}
740
741GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
742GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
743GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
744GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
745
43740e3a
LZ
746/*
747 *** Vector Integer Arithmetic Instructions
748 */
749
750/* expand macro args before macro */
751#define RVVCALL(macro, ...) macro(__VA_ARGS__)
752
753/* (TD, T1, T2, TX1, TX2) */
754#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
755#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
756#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
757#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
758#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
759#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
760#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
761#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
762#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
763#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
764#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
765#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
766#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
767#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
768#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
769#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
770#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
771#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
772#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
773#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
774#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
775#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
776#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
777#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
778#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
779#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
780#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
781#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
782#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
783#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
784
785/* operation of two vector elements */
786typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
787
788#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
789static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
790{ \
791 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
792 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
793 *((TD *)vd + HD(i)) = OP(s2, s1); \
794}
795#define DO_SUB(N, M) (N - M)
796#define DO_RSUB(N, M) (M - N)
797
798RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
799RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
800RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
801RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
802RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
803RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
804RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
805RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
806
807static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
808 CPURISCVState *env, uint32_t desc,
f1eed927 809 opivv2_fn *fn, uint32_t esz)
43740e3a 810{
43740e3a
LZ
811 uint32_t vm = vext_vm(desc);
812 uint32_t vl = env->vl;
f1eed927 813 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
814 uint32_t vta = vext_vta(desc);
43740e3a
LZ
815 uint32_t i;
816
f714361e 817 for (i = env->vstart; i < vl; i++) {
f9298de5 818 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
819 continue;
820 }
821 fn(vd, vs1, vs2, i);
822 }
f714361e 823 env->vstart = 0;
f1eed927 824 /* set tail elements to 1s */
825 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
826}
827
828/* generate the helpers for OPIVV */
f1eed927 829#define GEN_VEXT_VV(NAME, ESZ) \
43740e3a
LZ
830void HELPER(NAME)(void *vd, void *v0, void *vs1, \
831 void *vs2, CPURISCVState *env, \
832 uint32_t desc) \
833{ \
8a085fb2 834 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
f1eed927 835 do_##NAME, ESZ); \
43740e3a
LZ
836}
837
f1eed927 838GEN_VEXT_VV(vadd_vv_b, 1)
839GEN_VEXT_VV(vadd_vv_h, 2)
840GEN_VEXT_VV(vadd_vv_w, 4)
841GEN_VEXT_VV(vadd_vv_d, 8)
842GEN_VEXT_VV(vsub_vv_b, 1)
843GEN_VEXT_VV(vsub_vv_h, 2)
844GEN_VEXT_VV(vsub_vv_w, 4)
845GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a
LZ
846
847typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
848
849/*
850 * (T1)s1 gives the real operator type.
851 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
852 */
853#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
854static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
855{ \
856 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
857 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
858}
859
860RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
861RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
862RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
863RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
864RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
865RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
866RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
867RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
868RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
869RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
870RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
871RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
872
873static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
874 CPURISCVState *env, uint32_t desc,
5c19fc15 875 opivx2_fn fn, uint32_t esz)
43740e3a 876{
43740e3a
LZ
877 uint32_t vm = vext_vm(desc);
878 uint32_t vl = env->vl;
5c19fc15 879 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
880 uint32_t vta = vext_vta(desc);
43740e3a
LZ
881 uint32_t i;
882
f714361e 883 for (i = env->vstart; i < vl; i++) {
f9298de5 884 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
885 continue;
886 }
887 fn(vd, s1, vs2, i);
888 }
f714361e 889 env->vstart = 0;
5c19fc15 890 /* set tail elements to 1s */
891 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
892}
893
894/* generate the helpers for OPIVX */
5c19fc15 895#define GEN_VEXT_VX(NAME, ESZ) \
43740e3a
LZ
896void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
897 void *vs2, CPURISCVState *env, \
898 uint32_t desc) \
899{ \
8a085fb2 900 do_vext_vx(vd, v0, s1, vs2, env, desc, \
5c19fc15 901 do_##NAME, ESZ); \
3479a814
FC
902}
903
5c19fc15 904GEN_VEXT_VX(vadd_vx_b, 1)
905GEN_VEXT_VX(vadd_vx_h, 2)
906GEN_VEXT_VX(vadd_vx_w, 4)
907GEN_VEXT_VX(vadd_vx_d, 8)
908GEN_VEXT_VX(vsub_vx_b, 1)
909GEN_VEXT_VX(vsub_vx_h, 2)
910GEN_VEXT_VX(vsub_vx_w, 4)
911GEN_VEXT_VX(vsub_vx_d, 8)
912GEN_VEXT_VX(vrsub_vx_b, 1)
913GEN_VEXT_VX(vrsub_vx_h, 2)
914GEN_VEXT_VX(vrsub_vx_w, 4)
915GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
916
917void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
918{
919 intptr_t oprsz = simd_oprsz(desc);
920 intptr_t i;
921
922 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
923 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
924 }
925}
926
927void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
928{
929 intptr_t oprsz = simd_oprsz(desc);
930 intptr_t i;
931
932 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
933 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
934 }
935}
936
937void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
938{
939 intptr_t oprsz = simd_oprsz(desc);
940 intptr_t i;
941
942 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
943 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
944 }
945}
946
947void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
948{
949 intptr_t oprsz = simd_oprsz(desc);
950 intptr_t i;
951
952 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
953 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
954 }
955}
8fcdf776
LZ
956
957/* Vector Widening Integer Add/Subtract */
958#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
959#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
960#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
961#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
962#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
963#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
964#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
965#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
966#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
967#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
968#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
969#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
970RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
971RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
972RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
973RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
974RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
975RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
976RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
977RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
978RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
979RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
980RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
981RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
982RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
983RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
984RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
985RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
986RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
987RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
988RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
989RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
990RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
991RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
992RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
993RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 994GEN_VEXT_VV(vwaddu_vv_b, 2)
995GEN_VEXT_VV(vwaddu_vv_h, 4)
996GEN_VEXT_VV(vwaddu_vv_w, 8)
997GEN_VEXT_VV(vwsubu_vv_b, 2)
998GEN_VEXT_VV(vwsubu_vv_h, 4)
999GEN_VEXT_VV(vwsubu_vv_w, 8)
1000GEN_VEXT_VV(vwadd_vv_b, 2)
1001GEN_VEXT_VV(vwadd_vv_h, 4)
1002GEN_VEXT_VV(vwadd_vv_w, 8)
1003GEN_VEXT_VV(vwsub_vv_b, 2)
1004GEN_VEXT_VV(vwsub_vv_h, 4)
1005GEN_VEXT_VV(vwsub_vv_w, 8)
1006GEN_VEXT_VV(vwaddu_wv_b, 2)
1007GEN_VEXT_VV(vwaddu_wv_h, 4)
1008GEN_VEXT_VV(vwaddu_wv_w, 8)
1009GEN_VEXT_VV(vwsubu_wv_b, 2)
1010GEN_VEXT_VV(vwsubu_wv_h, 4)
1011GEN_VEXT_VV(vwsubu_wv_w, 8)
1012GEN_VEXT_VV(vwadd_wv_b, 2)
1013GEN_VEXT_VV(vwadd_wv_h, 4)
1014GEN_VEXT_VV(vwadd_wv_w, 8)
1015GEN_VEXT_VV(vwsub_wv_b, 2)
1016GEN_VEXT_VV(vwsub_wv_h, 4)
1017GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
1018
1019RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1020RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1021RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1022RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1023RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1024RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1025RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1026RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1027RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1028RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1029RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1030RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1031RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1032RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1033RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1034RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1035RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1036RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1037RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1038RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1039RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1040RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1041RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1042RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 1043GEN_VEXT_VX(vwaddu_vx_b, 2)
1044GEN_VEXT_VX(vwaddu_vx_h, 4)
1045GEN_VEXT_VX(vwaddu_vx_w, 8)
1046GEN_VEXT_VX(vwsubu_vx_b, 2)
1047GEN_VEXT_VX(vwsubu_vx_h, 4)
1048GEN_VEXT_VX(vwsubu_vx_w, 8)
1049GEN_VEXT_VX(vwadd_vx_b, 2)
1050GEN_VEXT_VX(vwadd_vx_h, 4)
1051GEN_VEXT_VX(vwadd_vx_w, 8)
1052GEN_VEXT_VX(vwsub_vx_b, 2)
1053GEN_VEXT_VX(vwsub_vx_h, 4)
1054GEN_VEXT_VX(vwsub_vx_w, 8)
1055GEN_VEXT_VX(vwaddu_wx_b, 2)
1056GEN_VEXT_VX(vwaddu_wx_h, 4)
1057GEN_VEXT_VX(vwaddu_wx_w, 8)
1058GEN_VEXT_VX(vwsubu_wx_b, 2)
1059GEN_VEXT_VX(vwsubu_wx_h, 4)
1060GEN_VEXT_VX(vwsubu_wx_w, 8)
1061GEN_VEXT_VX(vwadd_wx_b, 2)
1062GEN_VEXT_VX(vwadd_wx_h, 4)
1063GEN_VEXT_VX(vwadd_wx_w, 8)
1064GEN_VEXT_VX(vwsub_wx_b, 2)
1065GEN_VEXT_VX(vwsub_wx_h, 4)
1066GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
1067
1068/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1069#define DO_VADC(N, M, C) (N + M + C)
1070#define DO_VSBC(N, M, C) (N - M - C)
1071
3479a814 1072#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1073void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1074 CPURISCVState *env, uint32_t desc) \
1075{ \
3a6f8f68 1076 uint32_t vl = env->vl; \
5c19fc15 1077 uint32_t esz = sizeof(ETYPE); \
1078 uint32_t total_elems = \
1079 vext_get_total_elems(env, desc, esz); \
1080 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1081 uint32_t i; \
1082 \
f714361e 1083 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1084 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1085 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1086 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1087 \
1088 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1089 } \
f714361e 1090 env->vstart = 0; \
5c19fc15 1091 /* set tail elements to 1s */ \
1092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1093}
1094
3479a814
FC
1095GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1096GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1097GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1098GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1099
3479a814
FC
1100GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1101GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1102GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1103GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 1104
3479a814 1105#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1106void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1107 CPURISCVState *env, uint32_t desc) \
1108{ \
3a6f8f68 1109 uint32_t vl = env->vl; \
5c19fc15 1110 uint32_t esz = sizeof(ETYPE); \
1111 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1112 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1113 uint32_t i; \
1114 \
f714361e 1115 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1116 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1117 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1118 \
1119 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1120 } \
f714361e 1121 env->vstart = 0; \
5c19fc15 1122 /* set tail elements to 1s */ \
1123 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1124}
1125
3479a814
FC
1126GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1127GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1128GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1129GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1130
3479a814
FC
1131GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1132GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1133GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1134GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
1135
1136#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1137 (__typeof(N))(N + M) < N)
1138#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1139
1140#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1141void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1142 CPURISCVState *env, uint32_t desc) \
1143{ \
3a6f8f68 1144 uint32_t vl = env->vl; \
bb45485a 1145 uint32_t vm = vext_vm(desc); \
5c19fc15 1146 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1147 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1148 uint32_t i; \
1149 \
f714361e 1150 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1151 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1152 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1153 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1154 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1155 } \
f714361e 1156 env->vstart = 0; \
5c19fc15 1157 /* mask destination register are always tail-agnostic */ \
1158 /* set tail elements to 1s */ \
1159 if (vta_all_1s) { \
1160 for (; i < total_elems; i++) { \
1161 vext_set_elem_mask(vd, i, 1); \
1162 } \
1163 } \
3a6f8f68
LZ
1164}
1165
1166GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1167GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1168GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1169GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1170
1171GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1172GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1173GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1174GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1175
1176#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1177void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1178 void *vs2, CPURISCVState *env, uint32_t desc) \
1179{ \
3a6f8f68 1180 uint32_t vl = env->vl; \
bb45485a 1181 uint32_t vm = vext_vm(desc); \
5c19fc15 1182 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1183 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1184 uint32_t i; \
1185 \
f714361e 1186 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1187 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1188 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1189 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1190 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1191 } \
f714361e 1192 env->vstart = 0; \
5c19fc15 1193 /* mask destination register are always tail-agnostic */ \
1194 /* set tail elements to 1s */ \
1195 if (vta_all_1s) { \
1196 for (; i < total_elems; i++) { \
1197 vext_set_elem_mask(vd, i, 1); \
1198 } \
1199 } \
3a6f8f68
LZ
1200}
1201
1202GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1203GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1204GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1205GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1206
1207GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1208GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1209GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1210GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1211
1212/* Vector Bitwise Logical Instructions */
1213RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1214RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1215RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1216RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1217RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1218RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1219RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1220RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1221RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1222RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1223RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1224RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1225GEN_VEXT_VV(vand_vv_b, 1)
1226GEN_VEXT_VV(vand_vv_h, 2)
1227GEN_VEXT_VV(vand_vv_w, 4)
1228GEN_VEXT_VV(vand_vv_d, 8)
1229GEN_VEXT_VV(vor_vv_b, 1)
1230GEN_VEXT_VV(vor_vv_h, 2)
1231GEN_VEXT_VV(vor_vv_w, 4)
1232GEN_VEXT_VV(vor_vv_d, 8)
1233GEN_VEXT_VV(vxor_vv_b, 1)
1234GEN_VEXT_VV(vxor_vv_h, 2)
1235GEN_VEXT_VV(vxor_vv_w, 4)
1236GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1237
1238RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1239RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1240RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1241RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1242RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1243RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1244RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1245RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1246RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1247RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1248RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1249RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1250GEN_VEXT_VX(vand_vx_b, 1)
1251GEN_VEXT_VX(vand_vx_h, 2)
1252GEN_VEXT_VX(vand_vx_w, 4)
1253GEN_VEXT_VX(vand_vx_d, 8)
1254GEN_VEXT_VX(vor_vx_b, 1)
1255GEN_VEXT_VX(vor_vx_h, 2)
1256GEN_VEXT_VX(vor_vx_w, 4)
1257GEN_VEXT_VX(vor_vx_d, 8)
1258GEN_VEXT_VX(vxor_vx_b, 1)
1259GEN_VEXT_VX(vxor_vx_h, 2)
1260GEN_VEXT_VX(vxor_vx_w, 4)
1261GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1262
1263/* Vector Single-Width Bit Shift Instructions */
1264#define DO_SLL(N, M) (N << (M))
1265#define DO_SRL(N, M) (N >> (M))
1266
1267/* generate the helpers for shift instructions with two vector operators */
3479a814 1268#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1269void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1270 void *vs2, CPURISCVState *env, uint32_t desc) \
1271{ \
3277d955
LZ
1272 uint32_t vm = vext_vm(desc); \
1273 uint32_t vl = env->vl; \
7b1bff41 1274 uint32_t esz = sizeof(TS1); \
1275 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1276 uint32_t vta = vext_vta(desc); \
3277d955
LZ
1277 uint32_t i; \
1278 \
f714361e 1279 for (i = env->vstart; i < vl; i++) { \
f9298de5 1280 if (!vm && !vext_elem_mask(v0, i)) { \
3277d955
LZ
1281 continue; \
1282 } \
1283 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1284 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1285 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1286 } \
f714361e 1287 env->vstart = 0; \
7b1bff41 1288 /* set tail elements to 1s */ \
1289 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1290}
1291
3479a814
FC
1292GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1293GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1294GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1295GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1296
3479a814
FC
1297GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1298GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1299GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1300GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1301
3479a814
FC
1302GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1303GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1304GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1305GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955
LZ
1306
1307/* generate the helpers for shift instructions with one vector and one scalar */
3479a814
FC
1308#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1309void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1310 void *vs2, CPURISCVState *env, uint32_t desc) \
1311{ \
1312 uint32_t vm = vext_vm(desc); \
1313 uint32_t vl = env->vl; \
7b1bff41 1314 uint32_t esz = sizeof(TD); \
1315 uint32_t total_elems = \
1316 vext_get_total_elems(env, desc, esz); \
1317 uint32_t vta = vext_vta(desc); \
3479a814
FC
1318 uint32_t i; \
1319 \
f714361e 1320 for (i = env->vstart; i < vl; i++) { \
3479a814
FC
1321 if (!vm && !vext_elem_mask(v0, i)) { \
1322 continue; \
1323 } \
1324 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1325 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1326 } \
f714361e 1327 env->vstart = 0; \
7b1bff41 1328 /* set tail elements to 1s */ \
1329 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1330}
1331
1332GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1333GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1334GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1335GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1336
1337GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1338GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1339GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1340GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1341
1342GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1343GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1344GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1345GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1346
1347/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1348GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1349GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1350GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1351GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1352GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1353GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1354GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1355GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1356GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1357GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1358GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1359GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1360
1361/* Vector Integer Comparison Instructions */
1362#define DO_MSEQ(N, M) (N == M)
1363#define DO_MSNE(N, M) (N != M)
1364#define DO_MSLT(N, M) (N < M)
1365#define DO_MSLE(N, M) (N <= M)
1366#define DO_MSGT(N, M) (N > M)
1367
1368#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1369void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1370 CPURISCVState *env, uint32_t desc) \
1371{ \
1366fc79
LZ
1372 uint32_t vm = vext_vm(desc); \
1373 uint32_t vl = env->vl; \
38581e5c 1374 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1375 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1366fc79
LZ
1376 uint32_t i; \
1377 \
f714361e 1378 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1379 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1380 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1381 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1382 continue; \
1383 } \
f9298de5 1384 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1385 } \
f714361e 1386 env->vstart = 0; \
38581e5c 1387 /* mask destination register are always tail-agnostic */ \
1388 /* set tail elements to 1s */ \
1389 if (vta_all_1s) { \
1390 for (; i < total_elems; i++) { \
1391 vext_set_elem_mask(vd, i, 1); \
1392 } \
1393 } \
1366fc79
LZ
1394}
1395
1396GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1397GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1398GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1399GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1400
1401GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1402GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1403GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1404GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1405
1406GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1407GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1408GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1409GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1410
1411GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1412GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1413GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1414GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1415
1416GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1417GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1418GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1419GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1420
1421GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1422GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1423GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1424GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1425
1426#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1427void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1428 CPURISCVState *env, uint32_t desc) \
1429{ \
1366fc79
LZ
1430 uint32_t vm = vext_vm(desc); \
1431 uint32_t vl = env->vl; \
38581e5c 1432 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1433 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1366fc79
LZ
1434 uint32_t i; \
1435 \
f714361e 1436 for (i = env->vstart; i < vl; i++) { \
1366fc79 1437 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1438 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1439 continue; \
1440 } \
f9298de5 1441 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1442 DO_OP(s2, (ETYPE)(target_long)s1)); \
1443 } \
f714361e 1444 env->vstart = 0; \
38581e5c 1445 /* mask destination register are always tail-agnostic */ \
1446 /* set tail elements to 1s */ \
1447 if (vta_all_1s) { \
1448 for (; i < total_elems; i++) { \
1449 vext_set_elem_mask(vd, i, 1); \
1450 } \
1451 } \
1366fc79
LZ
1452}
1453
1454GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1455GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1456GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1457GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1458
1459GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1460GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1461GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1462GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1463
1464GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1465GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1466GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1467GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1468
1469GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1470GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1471GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1472GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1473
1474GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1475GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1476GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1477GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1478
1479GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1480GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1481GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1482GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1483
1484GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1485GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1486GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1487GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1488
1489GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1490GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1491GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1492GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1493
1494/* Vector Integer Min/Max Instructions */
1495RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1496RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1497RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1498RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1499RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1500RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1501RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1502RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1503RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1504RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1505RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1506RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1507RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1508RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1509RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1510RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1511GEN_VEXT_VV(vminu_vv_b, 1)
1512GEN_VEXT_VV(vminu_vv_h, 2)
1513GEN_VEXT_VV(vminu_vv_w, 4)
1514GEN_VEXT_VV(vminu_vv_d, 8)
1515GEN_VEXT_VV(vmin_vv_b, 1)
1516GEN_VEXT_VV(vmin_vv_h, 2)
1517GEN_VEXT_VV(vmin_vv_w, 4)
1518GEN_VEXT_VV(vmin_vv_d, 8)
1519GEN_VEXT_VV(vmaxu_vv_b, 1)
1520GEN_VEXT_VV(vmaxu_vv_h, 2)
1521GEN_VEXT_VV(vmaxu_vv_w, 4)
1522GEN_VEXT_VV(vmaxu_vv_d, 8)
1523GEN_VEXT_VV(vmax_vv_b, 1)
1524GEN_VEXT_VV(vmax_vv_h, 2)
1525GEN_VEXT_VV(vmax_vv_w, 4)
1526GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1527
1528RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1529RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1530RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1531RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1532RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1533RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1534RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1535RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1536RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1537RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1538RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1539RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1540RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1541RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1542RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1543RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1544GEN_VEXT_VX(vminu_vx_b, 1)
1545GEN_VEXT_VX(vminu_vx_h, 2)
1546GEN_VEXT_VX(vminu_vx_w, 4)
1547GEN_VEXT_VX(vminu_vx_d, 8)
1548GEN_VEXT_VX(vmin_vx_b, 1)
1549GEN_VEXT_VX(vmin_vx_h, 2)
1550GEN_VEXT_VX(vmin_vx_w, 4)
1551GEN_VEXT_VX(vmin_vx_d, 8)
1552GEN_VEXT_VX(vmaxu_vx_b, 1)
1553GEN_VEXT_VX(vmaxu_vx_h, 2)
1554GEN_VEXT_VX(vmaxu_vx_w, 4)
1555GEN_VEXT_VX(vmaxu_vx_d, 8)
1556GEN_VEXT_VX(vmax_vx_b, 1)
1557GEN_VEXT_VX(vmax_vx_h, 2)
1558GEN_VEXT_VX(vmax_vx_w, 4)
1559GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1560
1561/* Vector Single-Width Integer Multiply Instructions */
1562#define DO_MUL(N, M) (N * M)
1563RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1564RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1565RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1566RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1567GEN_VEXT_VV(vmul_vv_b, 1)
1568GEN_VEXT_VV(vmul_vv_h, 2)
1569GEN_VEXT_VV(vmul_vv_w, 4)
1570GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1571
1572static int8_t do_mulh_b(int8_t s2, int8_t s1)
1573{
1574 return (int16_t)s2 * (int16_t)s1 >> 8;
1575}
1576
1577static int16_t do_mulh_h(int16_t s2, int16_t s1)
1578{
1579 return (int32_t)s2 * (int32_t)s1 >> 16;
1580}
1581
1582static int32_t do_mulh_w(int32_t s2, int32_t s1)
1583{
1584 return (int64_t)s2 * (int64_t)s1 >> 32;
1585}
1586
1587static int64_t do_mulh_d(int64_t s2, int64_t s1)
1588{
1589 uint64_t hi_64, lo_64;
1590
1591 muls64(&lo_64, &hi_64, s1, s2);
1592 return hi_64;
1593}
1594
1595static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1596{
1597 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1598}
1599
1600static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1601{
1602 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1603}
1604
1605static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1606{
1607 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1608}
1609
1610static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1611{
1612 uint64_t hi_64, lo_64;
1613
1614 mulu64(&lo_64, &hi_64, s2, s1);
1615 return hi_64;
1616}
1617
1618static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1619{
1620 return (int16_t)s2 * (uint16_t)s1 >> 8;
1621}
1622
1623static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1624{
1625 return (int32_t)s2 * (uint32_t)s1 >> 16;
1626}
1627
1628static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1629{
1630 return (int64_t)s2 * (uint64_t)s1 >> 32;
1631}
1632
1633/*
1634 * Let A = signed operand,
1635 * B = unsigned operand
1636 * P = mulu64(A, B), unsigned product
1637 *
1638 * LET X = 2 ** 64 - A, 2's complement of A
1639 * SP = signed product
1640 * THEN
1641 * IF A < 0
1642 * SP = -X * B
1643 * = -(2 ** 64 - A) * B
1644 * = A * B - 2 ** 64 * B
1645 * = P - 2 ** 64 * B
1646 * ELSE
1647 * SP = P
1648 * THEN
1649 * HI_P -= (A < 0 ? B : 0)
1650 */
1651
1652static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1653{
1654 uint64_t hi_64, lo_64;
1655
1656 mulu64(&lo_64, &hi_64, s2, s1);
1657
1658 hi_64 -= s2 < 0 ? s1 : 0;
1659 return hi_64;
1660}
1661
1662RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1663RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1664RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1665RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1666RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1667RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1668RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1669RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1670RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1671RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1672RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1673RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1674GEN_VEXT_VV(vmulh_vv_b, 1)
1675GEN_VEXT_VV(vmulh_vv_h, 2)
1676GEN_VEXT_VV(vmulh_vv_w, 4)
1677GEN_VEXT_VV(vmulh_vv_d, 8)
1678GEN_VEXT_VV(vmulhu_vv_b, 1)
1679GEN_VEXT_VV(vmulhu_vv_h, 2)
1680GEN_VEXT_VV(vmulhu_vv_w, 4)
1681GEN_VEXT_VV(vmulhu_vv_d, 8)
1682GEN_VEXT_VV(vmulhsu_vv_b, 1)
1683GEN_VEXT_VV(vmulhsu_vv_h, 2)
1684GEN_VEXT_VV(vmulhsu_vv_w, 4)
1685GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1686
1687RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1688RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1689RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1690RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1691RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1692RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1693RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1694RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1695RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1696RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1697RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1698RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1699RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1700RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1701RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1702RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1703GEN_VEXT_VX(vmul_vx_b, 1)
1704GEN_VEXT_VX(vmul_vx_h, 2)
1705GEN_VEXT_VX(vmul_vx_w, 4)
1706GEN_VEXT_VX(vmul_vx_d, 8)
1707GEN_VEXT_VX(vmulh_vx_b, 1)
1708GEN_VEXT_VX(vmulh_vx_h, 2)
1709GEN_VEXT_VX(vmulh_vx_w, 4)
1710GEN_VEXT_VX(vmulh_vx_d, 8)
1711GEN_VEXT_VX(vmulhu_vx_b, 1)
1712GEN_VEXT_VX(vmulhu_vx_h, 2)
1713GEN_VEXT_VX(vmulhu_vx_w, 4)
1714GEN_VEXT_VX(vmulhu_vx_d, 8)
1715GEN_VEXT_VX(vmulhsu_vx_b, 1)
1716GEN_VEXT_VX(vmulhsu_vx_h, 2)
1717GEN_VEXT_VX(vmulhsu_vx_w, 4)
1718GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1719
1720/* Vector Integer Divide Instructions */
1721#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1722#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1723#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1724 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1725#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1726 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1727
1728RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1729RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1730RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1731RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1732RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1733RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1734RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1735RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1736RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1737RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1738RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1739RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1740RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1741RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1742RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1743RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1744GEN_VEXT_VV(vdivu_vv_b, 1)
1745GEN_VEXT_VV(vdivu_vv_h, 2)
1746GEN_VEXT_VV(vdivu_vv_w, 4)
1747GEN_VEXT_VV(vdivu_vv_d, 8)
1748GEN_VEXT_VV(vdiv_vv_b, 1)
1749GEN_VEXT_VV(vdiv_vv_h, 2)
1750GEN_VEXT_VV(vdiv_vv_w, 4)
1751GEN_VEXT_VV(vdiv_vv_d, 8)
1752GEN_VEXT_VV(vremu_vv_b, 1)
1753GEN_VEXT_VV(vremu_vv_h, 2)
1754GEN_VEXT_VV(vremu_vv_w, 4)
1755GEN_VEXT_VV(vremu_vv_d, 8)
1756GEN_VEXT_VV(vrem_vv_b, 1)
1757GEN_VEXT_VV(vrem_vv_h, 2)
1758GEN_VEXT_VV(vrem_vv_w, 4)
1759GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1760
1761RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1762RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1763RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1764RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1765RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1766RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1767RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1768RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1769RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1770RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1771RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1772RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1773RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1774RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1775RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1776RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1777GEN_VEXT_VX(vdivu_vx_b, 1)
1778GEN_VEXT_VX(vdivu_vx_h, 2)
1779GEN_VEXT_VX(vdivu_vx_w, 4)
1780GEN_VEXT_VX(vdivu_vx_d, 8)
1781GEN_VEXT_VX(vdiv_vx_b, 1)
1782GEN_VEXT_VX(vdiv_vx_h, 2)
1783GEN_VEXT_VX(vdiv_vx_w, 4)
1784GEN_VEXT_VX(vdiv_vx_d, 8)
1785GEN_VEXT_VX(vremu_vx_b, 1)
1786GEN_VEXT_VX(vremu_vx_h, 2)
1787GEN_VEXT_VX(vremu_vx_w, 4)
1788GEN_VEXT_VX(vremu_vx_d, 8)
1789GEN_VEXT_VX(vrem_vx_b, 1)
1790GEN_VEXT_VX(vrem_vx_h, 2)
1791GEN_VEXT_VX(vrem_vx_w, 4)
1792GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1793
1794/* Vector Widening Integer Multiply Instructions */
1795RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1796RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1797RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1798RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1799RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1800RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1801RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1802RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1803RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1804GEN_VEXT_VV(vwmul_vv_b, 2)
1805GEN_VEXT_VV(vwmul_vv_h, 4)
1806GEN_VEXT_VV(vwmul_vv_w, 8)
1807GEN_VEXT_VV(vwmulu_vv_b, 2)
1808GEN_VEXT_VV(vwmulu_vv_h, 4)
1809GEN_VEXT_VV(vwmulu_vv_w, 8)
1810GEN_VEXT_VV(vwmulsu_vv_b, 2)
1811GEN_VEXT_VV(vwmulsu_vv_h, 4)
1812GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1813
1814RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1815RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1816RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1817RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1818RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1819RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1820RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1821RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1822RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1823GEN_VEXT_VX(vwmul_vx_b, 2)
1824GEN_VEXT_VX(vwmul_vx_h, 4)
1825GEN_VEXT_VX(vwmul_vx_w, 8)
1826GEN_VEXT_VX(vwmulu_vx_b, 2)
1827GEN_VEXT_VX(vwmulu_vx_h, 4)
1828GEN_VEXT_VX(vwmulu_vx_w, 8)
1829GEN_VEXT_VX(vwmulsu_vx_b, 2)
1830GEN_VEXT_VX(vwmulsu_vx_h, 4)
1831GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1832
1833/* Vector Single-Width Integer Multiply-Add Instructions */
1834#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1835static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1836{ \
1837 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1838 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1839 TD d = *((TD *)vd + HD(i)); \
1840 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1841}
1842
1843#define DO_MACC(N, M, D) (M * N + D)
1844#define DO_NMSAC(N, M, D) (-(M * N) + D)
1845#define DO_MADD(N, M, D) (M * D + N)
1846#define DO_NMSUB(N, M, D) (-(M * D) + N)
1847RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1848RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1849RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1850RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1851RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1852RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1853RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1854RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1855RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1856RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1857RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1858RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1859RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1860RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1861RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1862RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1863GEN_VEXT_VV(vmacc_vv_b, 1)
1864GEN_VEXT_VV(vmacc_vv_h, 2)
1865GEN_VEXT_VV(vmacc_vv_w, 4)
1866GEN_VEXT_VV(vmacc_vv_d, 8)
1867GEN_VEXT_VV(vnmsac_vv_b, 1)
1868GEN_VEXT_VV(vnmsac_vv_h, 2)
1869GEN_VEXT_VV(vnmsac_vv_w, 4)
1870GEN_VEXT_VV(vnmsac_vv_d, 8)
1871GEN_VEXT_VV(vmadd_vv_b, 1)
1872GEN_VEXT_VV(vmadd_vv_h, 2)
1873GEN_VEXT_VV(vmadd_vv_w, 4)
1874GEN_VEXT_VV(vmadd_vv_d, 8)
1875GEN_VEXT_VV(vnmsub_vv_b, 1)
1876GEN_VEXT_VV(vnmsub_vv_h, 2)
1877GEN_VEXT_VV(vnmsub_vv_w, 4)
1878GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1879
1880#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1881static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1882{ \
1883 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1884 TD d = *((TD *)vd + HD(i)); \
1885 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1886}
1887
1888RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1889RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1890RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1891RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1892RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1893RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1894RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1895RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1896RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1897RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1898RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1899RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1900RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1901RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1902RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1903RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1904GEN_VEXT_VX(vmacc_vx_b, 1)
1905GEN_VEXT_VX(vmacc_vx_h, 2)
1906GEN_VEXT_VX(vmacc_vx_w, 4)
1907GEN_VEXT_VX(vmacc_vx_d, 8)
1908GEN_VEXT_VX(vnmsac_vx_b, 1)
1909GEN_VEXT_VX(vnmsac_vx_h, 2)
1910GEN_VEXT_VX(vnmsac_vx_w, 4)
1911GEN_VEXT_VX(vnmsac_vx_d, 8)
1912GEN_VEXT_VX(vmadd_vx_b, 1)
1913GEN_VEXT_VX(vmadd_vx_h, 2)
1914GEN_VEXT_VX(vmadd_vx_w, 4)
1915GEN_VEXT_VX(vmadd_vx_d, 8)
1916GEN_VEXT_VX(vnmsub_vx_b, 1)
1917GEN_VEXT_VX(vnmsub_vx_h, 2)
1918GEN_VEXT_VX(vnmsub_vx_w, 4)
1919GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1920
1921/* Vector Widening Integer Multiply-Add Instructions */
1922RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1923RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1924RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1925RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1926RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1927RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1928RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1929RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1930RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1931GEN_VEXT_VV(vwmaccu_vv_b, 2)
1932GEN_VEXT_VV(vwmaccu_vv_h, 4)
1933GEN_VEXT_VV(vwmaccu_vv_w, 8)
1934GEN_VEXT_VV(vwmacc_vv_b, 2)
1935GEN_VEXT_VV(vwmacc_vv_h, 4)
1936GEN_VEXT_VV(vwmacc_vv_w, 8)
1937GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1938GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1939GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1940
1941RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1942RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1943RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1944RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1945RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1946RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1947RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1948RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1949RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1950RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1951RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1952RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1953GEN_VEXT_VX(vwmaccu_vx_b, 2)
1954GEN_VEXT_VX(vwmaccu_vx_h, 4)
1955GEN_VEXT_VX(vwmaccu_vx_w, 8)
1956GEN_VEXT_VX(vwmacc_vx_b, 2)
1957GEN_VEXT_VX(vwmacc_vx_h, 4)
1958GEN_VEXT_VX(vwmacc_vx_w, 8)
1959GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1960GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1961GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1962GEN_VEXT_VX(vwmaccus_vx_b, 2)
1963GEN_VEXT_VX(vwmaccus_vx_h, 4)
1964GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1965
1966/* Vector Integer Merge and Move Instructions */
3479a814 1967#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1968void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1969 uint32_t desc) \
1970{ \
1971 uint32_t vl = env->vl; \
89a32de2 1972 uint32_t esz = sizeof(ETYPE); \
1973 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1974 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1975 uint32_t i; \
1976 \
f714361e 1977 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1978 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1979 *((ETYPE *)vd + H(i)) = s1; \
1980 } \
f714361e 1981 env->vstart = 0; \
89a32de2 1982 /* set tail elements to 1s */ \
1983 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
1984}
1985
3479a814
FC
1986GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1987GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1988GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1989GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1990
3479a814 1991#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1992void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1993 uint32_t desc) \
1994{ \
1995 uint32_t vl = env->vl; \
89a32de2 1996 uint32_t esz = sizeof(ETYPE); \
1997 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1998 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
1999 uint32_t i; \
2000 \
f714361e 2001 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
2002 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2003 } \
f714361e 2004 env->vstart = 0; \
89a32de2 2005 /* set tail elements to 1s */ \
2006 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2007}
2008
3479a814
FC
2009GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2010GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2011GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2012GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 2013
3479a814 2014#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
2015void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2016 CPURISCVState *env, uint32_t desc) \
2017{ \
f020a7a1 2018 uint32_t vl = env->vl; \
89a32de2 2019 uint32_t esz = sizeof(ETYPE); \
2020 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2021 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2022 uint32_t i; \
2023 \
f714361e 2024 for (i = env->vstart; i < vl; i++) { \
f9298de5 2025 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
2026 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2027 } \
f714361e 2028 env->vstart = 0; \
89a32de2 2029 /* set tail elements to 1s */ \
2030 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2031}
2032
3479a814
FC
2033GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2034GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2035GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2036GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 2037
3479a814 2038#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2039void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2040 void *vs2, CPURISCVState *env, uint32_t desc) \
2041{ \
f020a7a1 2042 uint32_t vl = env->vl; \
89a32de2 2043 uint32_t esz = sizeof(ETYPE); \
2044 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2045 uint32_t vta = vext_vta(desc); \
f020a7a1
LZ
2046 uint32_t i; \
2047 \
f714361e 2048 for (i = env->vstart; i < vl; i++) { \
f020a7a1 2049 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 2050 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
2051 (ETYPE)(target_long)s1); \
2052 *((ETYPE *)vd + H(i)) = d; \
2053 } \
f714361e 2054 env->vstart = 0; \
89a32de2 2055 /* set tail elements to 1s */ \
2056 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
f020a7a1
LZ
2057}
2058
3479a814
FC
2059GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2060GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2061GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2062GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
2063
2064/*
2065 *** Vector Fixed-Point Arithmetic Instructions
2066 */
2067
2068/* Vector Single-Width Saturating Add and Subtract */
2069
2070/*
2071 * As fixed point instructions probably have round mode and saturation,
2072 * define common macros for fixed point here.
2073 */
2074typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2075 CPURISCVState *env, int vxrm);
2076
2077#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2078static inline void \
2079do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2080 CPURISCVState *env, int vxrm) \
2081{ \
2082 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2083 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2084 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2085}
2086
2087static inline void
2088vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2089 CPURISCVState *env,
f9298de5 2090 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
2091 opivv2_rm_fn *fn)
2092{
f714361e 2093 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2094 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
2095 continue;
2096 }
2097 fn(vd, vs1, vs2, i, env, vxrm);
2098 }
f714361e 2099 env->vstart = 0;
eb2650e3
LZ
2100}
2101
2102static inline void
2103vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2104 CPURISCVState *env,
8a085fb2 2105 uint32_t desc,
09106eed 2106 opivv2_rm_fn *fn, uint32_t esz)
eb2650e3 2107{
eb2650e3
LZ
2108 uint32_t vm = vext_vm(desc);
2109 uint32_t vl = env->vl;
09106eed 2110 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2111 uint32_t vta = vext_vta(desc);
eb2650e3
LZ
2112
2113 switch (env->vxrm) {
2114 case 0: /* rnu */
2115 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2116 env, vl, vm, 0, fn);
eb2650e3
LZ
2117 break;
2118 case 1: /* rne */
2119 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2120 env, vl, vm, 1, fn);
eb2650e3
LZ
2121 break;
2122 case 2: /* rdn */
2123 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2124 env, vl, vm, 2, fn);
eb2650e3
LZ
2125 break;
2126 default: /* rod */
2127 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2128 env, vl, vm, 3, fn);
eb2650e3
LZ
2129 break;
2130 }
09106eed 2131 /* set tail elements to 1s */
2132 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2133}
2134
2135/* generate helpers for fixed point instructions with OPIVV format */
09106eed 2136#define GEN_VEXT_VV_RM(NAME, ESZ) \
eb2650e3
LZ
2137void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2138 CPURISCVState *env, uint32_t desc) \
2139{ \
8a085fb2 2140 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
09106eed 2141 do_##NAME, ESZ); \
eb2650e3
LZ
2142}
2143
2144static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2145{
2146 uint8_t res = a + b;
2147 if (res < a) {
2148 res = UINT8_MAX;
2149 env->vxsat = 0x1;
2150 }
2151 return res;
2152}
2153
2154static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2155 uint16_t b)
2156{
2157 uint16_t res = a + b;
2158 if (res < a) {
2159 res = UINT16_MAX;
2160 env->vxsat = 0x1;
2161 }
2162 return res;
2163}
2164
2165static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2166 uint32_t b)
2167{
2168 uint32_t res = a + b;
2169 if (res < a) {
2170 res = UINT32_MAX;
2171 env->vxsat = 0x1;
2172 }
2173 return res;
2174}
2175
2176static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2177 uint64_t b)
2178{
2179 uint64_t res = a + b;
2180 if (res < a) {
2181 res = UINT64_MAX;
2182 env->vxsat = 0x1;
2183 }
2184 return res;
2185}
2186
2187RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2188RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2189RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2190RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
09106eed 2191GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2192GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2193GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2194GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
eb2650e3
LZ
2195
2196typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2197 CPURISCVState *env, int vxrm);
2198
2199#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2200static inline void \
2201do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2202 CPURISCVState *env, int vxrm) \
2203{ \
2204 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2205 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2206}
2207
2208static inline void
2209vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2210 CPURISCVState *env,
f9298de5 2211 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
2212 opivx2_rm_fn *fn)
2213{
f714361e 2214 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2215 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
2216 continue;
2217 }
2218 fn(vd, s1, vs2, i, env, vxrm);
2219 }
f714361e 2220 env->vstart = 0;
eb2650e3
LZ
2221}
2222
2223static inline void
2224vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2225 CPURISCVState *env,
8a085fb2 2226 uint32_t desc,
09106eed 2227 opivx2_rm_fn *fn, uint32_t esz)
eb2650e3 2228{
eb2650e3
LZ
2229 uint32_t vm = vext_vm(desc);
2230 uint32_t vl = env->vl;
09106eed 2231 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2232 uint32_t vta = vext_vta(desc);
eb2650e3
LZ
2233
2234 switch (env->vxrm) {
2235 case 0: /* rnu */
2236 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2237 env, vl, vm, 0, fn);
eb2650e3
LZ
2238 break;
2239 case 1: /* rne */
2240 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2241 env, vl, vm, 1, fn);
eb2650e3
LZ
2242 break;
2243 case 2: /* rdn */
2244 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2245 env, vl, vm, 2, fn);
eb2650e3
LZ
2246 break;
2247 default: /* rod */
2248 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2249 env, vl, vm, 3, fn);
eb2650e3
LZ
2250 break;
2251 }
09106eed 2252 /* set tail elements to 1s */
2253 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
eb2650e3
LZ
2254}
2255
2256/* generate helpers for fixed point instructions with OPIVX format */
09106eed 2257#define GEN_VEXT_VX_RM(NAME, ESZ) \
eb2650e3
LZ
2258void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2259 void *vs2, CPURISCVState *env, uint32_t desc) \
2260{ \
8a085fb2 2261 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
09106eed 2262 do_##NAME, ESZ); \
eb2650e3
LZ
2263}
2264
2265RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2266RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2267RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2268RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
09106eed 2269GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2270GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2271GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2272GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
eb2650e3
LZ
2273
2274static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2275{
2276 int8_t res = a + b;
2277 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2278 res = a > 0 ? INT8_MAX : INT8_MIN;
2279 env->vxsat = 0x1;
2280 }
2281 return res;
2282}
2283
2284static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2285{
2286 int16_t res = a + b;
2287 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2288 res = a > 0 ? INT16_MAX : INT16_MIN;
2289 env->vxsat = 0x1;
2290 }
2291 return res;
2292}
2293
2294static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2295{
2296 int32_t res = a + b;
2297 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2298 res = a > 0 ? INT32_MAX : INT32_MIN;
2299 env->vxsat = 0x1;
2300 }
2301 return res;
2302}
2303
2304static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2305{
2306 int64_t res = a + b;
2307 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2308 res = a > 0 ? INT64_MAX : INT64_MIN;
2309 env->vxsat = 0x1;
2310 }
2311 return res;
2312}
2313
2314RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2315RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2316RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2317RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
09106eed 2318GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2319GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2320GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2321GEN_VEXT_VV_RM(vsadd_vv_d, 8)
eb2650e3
LZ
2322
2323RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2324RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2325RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2326RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
09106eed 2327GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2328GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2329GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2330GEN_VEXT_VX_RM(vsadd_vx_d, 8)
eb2650e3
LZ
2331
2332static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2333{
2334 uint8_t res = a - b;
2335 if (res > a) {
2336 res = 0;
2337 env->vxsat = 0x1;
2338 }
2339 return res;
2340}
2341
2342static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2343 uint16_t b)
2344{
2345 uint16_t res = a - b;
2346 if (res > a) {
2347 res = 0;
2348 env->vxsat = 0x1;
2349 }
2350 return res;
2351}
2352
2353static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2354 uint32_t b)
2355{
2356 uint32_t res = a - b;
2357 if (res > a) {
2358 res = 0;
2359 env->vxsat = 0x1;
2360 }
2361 return res;
2362}
2363
2364static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2365 uint64_t b)
2366{
2367 uint64_t res = a - b;
2368 if (res > a) {
2369 res = 0;
2370 env->vxsat = 0x1;
2371 }
2372 return res;
2373}
2374
2375RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2376RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2377RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2378RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
09106eed 2379GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2380GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2381GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2382GEN_VEXT_VV_RM(vssubu_vv_d, 8)
eb2650e3
LZ
2383
2384RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2385RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2386RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2387RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
09106eed 2388GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2389GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2390GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2391GEN_VEXT_VX_RM(vssubu_vx_d, 8)
eb2650e3
LZ
2392
2393static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2394{
2395 int8_t res = a - b;
2396 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2397 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2398 env->vxsat = 0x1;
2399 }
2400 return res;
2401}
2402
2403static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2404{
2405 int16_t res = a - b;
2406 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2407 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2408 env->vxsat = 0x1;
2409 }
2410 return res;
2411}
2412
2413static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2414{
2415 int32_t res = a - b;
2416 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2417 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2418 env->vxsat = 0x1;
2419 }
2420 return res;
2421}
2422
2423static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2424{
2425 int64_t res = a - b;
2426 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2427 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2428 env->vxsat = 0x1;
2429 }
2430 return res;
2431}
2432
2433RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2434RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2435RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2436RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
09106eed 2437GEN_VEXT_VV_RM(vssub_vv_b, 1)
2438GEN_VEXT_VV_RM(vssub_vv_h, 2)
2439GEN_VEXT_VV_RM(vssub_vv_w, 4)
2440GEN_VEXT_VV_RM(vssub_vv_d, 8)
eb2650e3
LZ
2441
2442RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2443RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2444RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2445RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
09106eed 2446GEN_VEXT_VX_RM(vssub_vx_b, 1)
2447GEN_VEXT_VX_RM(vssub_vx_h, 2)
2448GEN_VEXT_VX_RM(vssub_vx_w, 4)
2449GEN_VEXT_VX_RM(vssub_vx_d, 8)
b7aee481
LZ
2450
2451/* Vector Single-Width Averaging Add and Subtract */
2452static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2453{
2454 uint8_t d = extract64(v, shift, 1);
2455 uint8_t d1;
2456 uint64_t D1, D2;
2457
2458 if (shift == 0 || shift > 64) {
2459 return 0;
2460 }
2461
2462 d1 = extract64(v, shift - 1, 1);
2463 D1 = extract64(v, 0, shift);
2464 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2465 return d1;
2466 } else if (vxrm == 1) { /* round-to-nearest-even */
2467 if (shift > 1) {
2468 D2 = extract64(v, 0, shift - 1);
2469 return d1 & ((D2 != 0) | d);
2470 } else {
2471 return d1 & d;
2472 }
2473 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2474 return !d & (D1 != 0);
2475 }
2476 return 0; /* round-down (truncate) */
2477}
2478
2479static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2480{
2481 int64_t res = (int64_t)a + b;
2482 uint8_t round = get_round(vxrm, res, 1);
2483
2484 return (res >> 1) + round;
2485}
2486
2487static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2488{
2489 int64_t res = a + b;
2490 uint8_t round = get_round(vxrm, res, 1);
2491 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2492
2493 /* With signed overflow, bit 64 is inverse of bit 63. */
2494 return ((res >> 1) ^ over) + round;
2495}
2496
2497RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2498RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2499RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2500RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
09106eed 2501GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2502GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2503GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2504GEN_VEXT_VV_RM(vaadd_vv_d, 8)
b7aee481
LZ
2505
2506RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2507RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2508RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2509RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
09106eed 2510GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2511GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2512GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2513GEN_VEXT_VX_RM(vaadd_vx_d, 8)
b7aee481 2514
8b99a110
FC
2515static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2516 uint32_t a, uint32_t b)
2517{
2518 uint64_t res = (uint64_t)a + b;
2519 uint8_t round = get_round(vxrm, res, 1);
2520
2521 return (res >> 1) + round;
2522}
2523
2524static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2525 uint64_t a, uint64_t b)
2526{
2527 uint64_t res = a + b;
2528 uint8_t round = get_round(vxrm, res, 1);
2529 uint64_t over = (uint64_t)(res < a) << 63;
2530
2531 return ((res >> 1) | over) + round;
2532}
2533
2534RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2535RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2536RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2537RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
09106eed 2538GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2539GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2540GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2541GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
8b99a110
FC
2542
2543RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2544RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2545RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2546RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
09106eed 2547GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2548GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2549GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2550GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
8b99a110 2551
b7aee481
LZ
2552static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2553{
2554 int64_t res = (int64_t)a - b;
2555 uint8_t round = get_round(vxrm, res, 1);
2556
2557 return (res >> 1) + round;
2558}
2559
2560static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2561{
2562 int64_t res = (int64_t)a - b;
2563 uint8_t round = get_round(vxrm, res, 1);
2564 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2565
2566 /* With signed overflow, bit 64 is inverse of bit 63. */
2567 return ((res >> 1) ^ over) + round;
2568}
2569
2570RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2571RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2572RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2573RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
09106eed 2574GEN_VEXT_VV_RM(vasub_vv_b, 1)
2575GEN_VEXT_VV_RM(vasub_vv_h, 2)
2576GEN_VEXT_VV_RM(vasub_vv_w, 4)
2577GEN_VEXT_VV_RM(vasub_vv_d, 8)
b7aee481
LZ
2578
2579RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2580RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2581RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2582RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
09106eed 2583GEN_VEXT_VX_RM(vasub_vx_b, 1)
2584GEN_VEXT_VX_RM(vasub_vx_h, 2)
2585GEN_VEXT_VX_RM(vasub_vx_w, 4)
2586GEN_VEXT_VX_RM(vasub_vx_d, 8)
9f0ff9e5 2587
8b99a110
FC
2588static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2589 uint32_t a, uint32_t b)
2590{
2591 int64_t res = (int64_t)a - b;
2592 uint8_t round = get_round(vxrm, res, 1);
2593
2594 return (res >> 1) + round;
2595}
2596
2597static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2598 uint64_t a, uint64_t b)
2599{
2600 uint64_t res = (uint64_t)a - b;
2601 uint8_t round = get_round(vxrm, res, 1);
2602 uint64_t over = (uint64_t)(res > a) << 63;
2603
2604 return ((res >> 1) | over) + round;
2605}
2606
2607RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2608RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2609RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2610RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
09106eed 2611GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2612GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2613GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2614GEN_VEXT_VV_RM(vasubu_vv_d, 8)
8b99a110
FC
2615
2616RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2617RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2618RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2619RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
09106eed 2620GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2621GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2622GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2623GEN_VEXT_VX_RM(vasubu_vx_d, 8)
8b99a110 2624
9f0ff9e5
LZ
2625/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2626static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2627{
2628 uint8_t round;
2629 int16_t res;
2630
2631 res = (int16_t)a * (int16_t)b;
2632 round = get_round(vxrm, res, 7);
2633 res = (res >> 7) + round;
2634
2635 if (res > INT8_MAX) {
2636 env->vxsat = 0x1;
2637 return INT8_MAX;
2638 } else if (res < INT8_MIN) {
2639 env->vxsat = 0x1;
2640 return INT8_MIN;
2641 } else {
2642 return res;
2643 }
2644}
2645
2646static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2647{
2648 uint8_t round;
2649 int32_t res;
2650
2651 res = (int32_t)a * (int32_t)b;
2652 round = get_round(vxrm, res, 15);
2653 res = (res >> 15) + round;
2654
2655 if (res > INT16_MAX) {
2656 env->vxsat = 0x1;
2657 return INT16_MAX;
2658 } else if (res < INT16_MIN) {
2659 env->vxsat = 0x1;
2660 return INT16_MIN;
2661 } else {
2662 return res;
2663 }
2664}
2665
2666static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2667{
2668 uint8_t round;
2669 int64_t res;
2670
2671 res = (int64_t)a * (int64_t)b;
2672 round = get_round(vxrm, res, 31);
2673 res = (res >> 31) + round;
2674
2675 if (res > INT32_MAX) {
2676 env->vxsat = 0x1;
2677 return INT32_MAX;
2678 } else if (res < INT32_MIN) {
2679 env->vxsat = 0x1;
2680 return INT32_MIN;
2681 } else {
2682 return res;
2683 }
2684}
2685
2686static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2687{
2688 uint8_t round;
2689 uint64_t hi_64, lo_64;
2690 int64_t res;
2691
2692 if (a == INT64_MIN && b == INT64_MIN) {
2693 env->vxsat = 1;
2694 return INT64_MAX;
2695 }
2696
2697 muls64(&lo_64, &hi_64, a, b);
2698 round = get_round(vxrm, lo_64, 63);
2699 /*
2700 * Cannot overflow, as there are always
2701 * 2 sign bits after multiply.
2702 */
2703 res = (hi_64 << 1) | (lo_64 >> 63);
2704 if (round) {
2705 if (res == INT64_MAX) {
2706 env->vxsat = 1;
2707 } else {
2708 res += 1;
2709 }
2710 }
2711 return res;
2712}
2713
2714RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2715RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2716RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2717RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
09106eed 2718GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2719GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2720GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2721GEN_VEXT_VV_RM(vsmul_vv_d, 8)
9f0ff9e5
LZ
2722
2723RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2724RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2725RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2726RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
09106eed 2727GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2728GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2729GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2730GEN_VEXT_VX_RM(vsmul_vx_d, 8)
0a1eaf00 2731
04a61406
LZ
2732/* Vector Single-Width Scaling Shift Instructions */
2733static inline uint8_t
2734vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2735{
2736 uint8_t round, shift = b & 0x7;
2737 uint8_t res;
2738
2739 round = get_round(vxrm, a, shift);
2740 res = (a >> shift) + round;
2741 return res;
2742}
2743static inline uint16_t
2744vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2745{
2746 uint8_t round, shift = b & 0xf;
2747 uint16_t res;
2748
2749 round = get_round(vxrm, a, shift);
2750 res = (a >> shift) + round;
2751 return res;
2752}
2753static inline uint32_t
2754vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2755{
2756 uint8_t round, shift = b & 0x1f;
2757 uint32_t res;
2758
2759 round = get_round(vxrm, a, shift);
2760 res = (a >> shift) + round;
2761 return res;
2762}
2763static inline uint64_t
2764vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2765{
2766 uint8_t round, shift = b & 0x3f;
2767 uint64_t res;
2768
2769 round = get_round(vxrm, a, shift);
2770 res = (a >> shift) + round;
2771 return res;
2772}
2773RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2774RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2775RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2776RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
09106eed 2777GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2778GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2779GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2780GEN_VEXT_VV_RM(vssrl_vv_d, 8)
04a61406
LZ
2781
2782RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2783RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2784RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2785RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
09106eed 2786GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2787GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2788GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2789GEN_VEXT_VX_RM(vssrl_vx_d, 8)
04a61406
LZ
2790
2791static inline int8_t
2792vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2793{
2794 uint8_t round, shift = b & 0x7;
2795 int8_t res;
2796
2797 round = get_round(vxrm, a, shift);
2798 res = (a >> shift) + round;
2799 return res;
2800}
2801static inline int16_t
2802vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2803{
2804 uint8_t round, shift = b & 0xf;
2805 int16_t res;
2806
2807 round = get_round(vxrm, a, shift);
2808 res = (a >> shift) + round;
2809 return res;
2810}
2811static inline int32_t
2812vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2813{
2814 uint8_t round, shift = b & 0x1f;
2815 int32_t res;
2816
2817 round = get_round(vxrm, a, shift);
2818 res = (a >> shift) + round;
2819 return res;
2820}
2821static inline int64_t
2822vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2823{
2824 uint8_t round, shift = b & 0x3f;
2825 int64_t res;
2826
2827 round = get_round(vxrm, a, shift);
2828 res = (a >> shift) + round;
2829 return res;
2830}
9ff3d287 2831
04a61406
LZ
2832RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2833RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2834RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2835RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
09106eed 2836GEN_VEXT_VV_RM(vssra_vv_b, 1)
2837GEN_VEXT_VV_RM(vssra_vv_h, 2)
2838GEN_VEXT_VV_RM(vssra_vv_w, 4)
2839GEN_VEXT_VV_RM(vssra_vv_d, 8)
04a61406
LZ
2840
2841RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2842RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2843RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2844RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
09106eed 2845GEN_VEXT_VX_RM(vssra_vx_b, 1)
2846GEN_VEXT_VX_RM(vssra_vx_h, 2)
2847GEN_VEXT_VX_RM(vssra_vx_w, 4)
2848GEN_VEXT_VX_RM(vssra_vx_d, 8)
9ff3d287
LZ
2849
2850/* Vector Narrowing Fixed-Point Clip Instructions */
2851static inline int8_t
2852vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2853{
2854 uint8_t round, shift = b & 0xf;
2855 int16_t res;
2856
2857 round = get_round(vxrm, a, shift);
2858 res = (a >> shift) + round;
2859 if (res > INT8_MAX) {
2860 env->vxsat = 0x1;
2861 return INT8_MAX;
2862 } else if (res < INT8_MIN) {
2863 env->vxsat = 0x1;
2864 return INT8_MIN;
2865 } else {
2866 return res;
2867 }
2868}
2869
2870static inline int16_t
2871vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2872{
2873 uint8_t round, shift = b & 0x1f;
2874 int32_t res;
2875
2876 round = get_round(vxrm, a, shift);
2877 res = (a >> shift) + round;
2878 if (res > INT16_MAX) {
2879 env->vxsat = 0x1;
2880 return INT16_MAX;
2881 } else if (res < INT16_MIN) {
2882 env->vxsat = 0x1;
2883 return INT16_MIN;
2884 } else {
2885 return res;
2886 }
2887}
2888
2889static inline int32_t
2890vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2891{
2892 uint8_t round, shift = b & 0x3f;
2893 int64_t res;
2894
2895 round = get_round(vxrm, a, shift);
2896 res = (a >> shift) + round;
2897 if (res > INT32_MAX) {
2898 env->vxsat = 0x1;
2899 return INT32_MAX;
2900 } else if (res < INT32_MIN) {
2901 env->vxsat = 0x1;
2902 return INT32_MIN;
2903 } else {
2904 return res;
2905 }
2906}
2907
a70b3a73
FC
2908RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2909RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2910RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
09106eed 2911GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2912GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2913GEN_VEXT_VV_RM(vnclip_wv_w, 4)
a70b3a73
FC
2914
2915RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2916RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2917RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
09106eed 2918GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2919GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2920GEN_VEXT_VX_RM(vnclip_wx_w, 4)
9ff3d287
LZ
2921
2922static inline uint8_t
2923vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2924{
2925 uint8_t round, shift = b & 0xf;
2926 uint16_t res;
2927
2928 round = get_round(vxrm, a, shift);
2929 res = (a >> shift) + round;
2930 if (res > UINT8_MAX) {
2931 env->vxsat = 0x1;
2932 return UINT8_MAX;
2933 } else {
2934 return res;
2935 }
2936}
2937
2938static inline uint16_t
2939vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2940{
2941 uint8_t round, shift = b & 0x1f;
2942 uint32_t res;
2943
2944 round = get_round(vxrm, a, shift);
2945 res = (a >> shift) + round;
2946 if (res > UINT16_MAX) {
2947 env->vxsat = 0x1;
2948 return UINT16_MAX;
2949 } else {
2950 return res;
2951 }
2952}
2953
2954static inline uint32_t
2955vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2956{
2957 uint8_t round, shift = b & 0x3f;
a70b3a73 2958 uint64_t res;
9ff3d287
LZ
2959
2960 round = get_round(vxrm, a, shift);
2961 res = (a >> shift) + round;
2962 if (res > UINT32_MAX) {
2963 env->vxsat = 0x1;
2964 return UINT32_MAX;
2965 } else {
2966 return res;
2967 }
2968}
2969
a70b3a73
FC
2970RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2971RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2972RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
09106eed 2973GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2974GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2975GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
9ff3d287 2976
a70b3a73
FC
2977RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2978RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2979RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
09106eed 2980GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
2981GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
2982GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
ce2a0343
LZ
2983
2984/*
2985 *** Vector Float Point Arithmetic Instructions
2986 */
2987/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2988#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2989static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2990 CPURISCVState *env) \
2991{ \
2992 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2993 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2994 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2995}
2996
5eacf7d8 2997#define GEN_VEXT_VV_ENV(NAME, ESZ) \
ce2a0343
LZ
2998void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2999 void *vs2, CPURISCVState *env, \
3000 uint32_t desc) \
3001{ \
ce2a0343
LZ
3002 uint32_t vm = vext_vm(desc); \
3003 uint32_t vl = env->vl; \
5eacf7d8 3004 uint32_t total_elems = \
3005 vext_get_total_elems(env, desc, ESZ); \
3006 uint32_t vta = vext_vta(desc); \
ce2a0343
LZ
3007 uint32_t i; \
3008 \
f714361e 3009 for (i = env->vstart; i < vl; i++) { \
f9298de5 3010 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
3011 continue; \
3012 } \
3013 do_##NAME(vd, vs1, vs2, i, env); \
3014 } \
f714361e 3015 env->vstart = 0; \
5eacf7d8 3016 /* set tail elements to 1s */ \
3017 vext_set_elems_1s(vd, vta, vl * ESZ, \
3018 total_elems * ESZ); \
ce2a0343
LZ
3019}
3020
3021RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3022RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3023RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
5eacf7d8 3024GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3025GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3026GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
ce2a0343
LZ
3027
3028#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3029static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3030 CPURISCVState *env) \
3031{ \
3032 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3033 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3034}
3035
5eacf7d8 3036#define GEN_VEXT_VF(NAME, ESZ) \
ce2a0343
LZ
3037void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3038 void *vs2, CPURISCVState *env, \
3039 uint32_t desc) \
3040{ \
ce2a0343
LZ
3041 uint32_t vm = vext_vm(desc); \
3042 uint32_t vl = env->vl; \
5eacf7d8 3043 uint32_t total_elems = \
3044 vext_get_total_elems(env, desc, ESZ); \
3045 uint32_t vta = vext_vta(desc); \
ce2a0343
LZ
3046 uint32_t i; \
3047 \
f714361e 3048 for (i = env->vstart; i < vl; i++) { \
f9298de5 3049 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
3050 continue; \
3051 } \
3052 do_##NAME(vd, s1, vs2, i, env); \
3053 } \
f714361e 3054 env->vstart = 0; \
5eacf7d8 3055 /* set tail elements to 1s */ \
3056 vext_set_elems_1s(vd, vta, vl * ESZ, \
3057 total_elems * ESZ); \
ce2a0343
LZ
3058}
3059
3060RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3061RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3062RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
5eacf7d8 3063GEN_VEXT_VF(vfadd_vf_h, 2)
3064GEN_VEXT_VF(vfadd_vf_w, 4)
3065GEN_VEXT_VF(vfadd_vf_d, 8)
ce2a0343
LZ
3066
3067RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3068RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3069RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
5eacf7d8 3070GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3071GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3072GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
ce2a0343
LZ
3073RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3074RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3075RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
5eacf7d8 3076GEN_VEXT_VF(vfsub_vf_h, 2)
3077GEN_VEXT_VF(vfsub_vf_w, 4)
3078GEN_VEXT_VF(vfsub_vf_d, 8)
ce2a0343
LZ
3079
3080static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3081{
3082 return float16_sub(b, a, s);
3083}
3084
3085static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3086{
3087 return float32_sub(b, a, s);
3088}
3089
3090static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3091{
3092 return float64_sub(b, a, s);
3093}
3094
3095RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3096RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3097RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
5eacf7d8 3098GEN_VEXT_VF(vfrsub_vf_h, 2)
3099GEN_VEXT_VF(vfrsub_vf_w, 4)
3100GEN_VEXT_VF(vfrsub_vf_d, 8)
eeffab2e
LZ
3101
3102/* Vector Widening Floating-Point Add/Subtract Instructions */
3103static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3104{
3105 return float32_add(float16_to_float32(a, true, s),
3106 float16_to_float32(b, true, s), s);
3107}
3108
3109static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3110{
3111 return float64_add(float32_to_float64(a, s),
3112 float32_to_float64(b, s), s);
3113
3114}
3115
3116RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3117RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
5eacf7d8 3118GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3119GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
eeffab2e
LZ
3120RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3121RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
5eacf7d8 3122GEN_VEXT_VF(vfwadd_vf_h, 4)
3123GEN_VEXT_VF(vfwadd_vf_w, 8)
eeffab2e
LZ
3124
3125static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3126{
3127 return float32_sub(float16_to_float32(a, true, s),
3128 float16_to_float32(b, true, s), s);
3129}
3130
3131static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3132{
3133 return float64_sub(float32_to_float64(a, s),
3134 float32_to_float64(b, s), s);
3135
3136}
3137
3138RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3139RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
5eacf7d8 3140GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3141GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
eeffab2e
LZ
3142RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3143RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
5eacf7d8 3144GEN_VEXT_VF(vfwsub_vf_h, 4)
3145GEN_VEXT_VF(vfwsub_vf_w, 8)
eeffab2e
LZ
3146
3147static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3148{
3149 return float32_add(a, float16_to_float32(b, true, s), s);
3150}
3151
3152static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3153{
3154 return float64_add(a, float32_to_float64(b, s), s);
3155}
3156
3157RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3158RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
5eacf7d8 3159GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3160GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
eeffab2e
LZ
3161RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3162RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
5eacf7d8 3163GEN_VEXT_VF(vfwadd_wf_h, 4)
3164GEN_VEXT_VF(vfwadd_wf_w, 8)
eeffab2e
LZ
3165
3166static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3167{
3168 return float32_sub(a, float16_to_float32(b, true, s), s);
3169}
3170
3171static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3172{
3173 return float64_sub(a, float32_to_float64(b, s), s);
3174}
3175
3176RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3177RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
5eacf7d8 3178GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3179GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
eeffab2e
LZ
3180RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3181RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
5eacf7d8 3182GEN_VEXT_VF(vfwsub_wf_h, 4)
3183GEN_VEXT_VF(vfwsub_wf_w, 8)
0e0057cb
LZ
3184
3185/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3186RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3187RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3188RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
5eacf7d8 3189GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3190GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3191GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
0e0057cb
LZ
3192RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3193RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3194RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
5eacf7d8 3195GEN_VEXT_VF(vfmul_vf_h, 2)
3196GEN_VEXT_VF(vfmul_vf_w, 4)
3197GEN_VEXT_VF(vfmul_vf_d, 8)
0e0057cb
LZ
3198
3199RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3200RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3201RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
5eacf7d8 3202GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3203GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3204GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
0e0057cb
LZ
3205RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3206RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3207RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
5eacf7d8 3208GEN_VEXT_VF(vfdiv_vf_h, 2)
3209GEN_VEXT_VF(vfdiv_vf_w, 4)
3210GEN_VEXT_VF(vfdiv_vf_d, 8)
0e0057cb
LZ
3211
3212static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3213{
3214 return float16_div(b, a, s);
3215}
3216
3217static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3218{
3219 return float32_div(b, a, s);
3220}
3221
3222static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3223{
3224 return float64_div(b, a, s);
3225}
3226
3227RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3228RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3229RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
5eacf7d8 3230GEN_VEXT_VF(vfrdiv_vf_h, 2)
3231GEN_VEXT_VF(vfrdiv_vf_w, 4)
3232GEN_VEXT_VF(vfrdiv_vf_d, 8)
f7c7b7cd
LZ
3233
3234/* Vector Widening Floating-Point Multiply */
3235static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3236{
3237 return float32_mul(float16_to_float32(a, true, s),
3238 float16_to_float32(b, true, s), s);
3239}
3240
3241static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3242{
3243 return float64_mul(float32_to_float64(a, s),
3244 float32_to_float64(b, s), s);
3245
3246}
3247RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3248RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
5eacf7d8 3249GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3250GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
f7c7b7cd
LZ
3251RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3252RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
5eacf7d8 3253GEN_VEXT_VF(vfwmul_vf_h, 4)
3254GEN_VEXT_VF(vfwmul_vf_w, 8)
4aa5a8fe
LZ
3255
3256/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3257#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3258static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3259 CPURISCVState *env) \
3260{ \
3261 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3262 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3263 TD d = *((TD *)vd + HD(i)); \
3264 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3265}
3266
3267static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3268{
3269 return float16_muladd(a, b, d, 0, s);
3270}
3271
3272static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3273{
3274 return float32_muladd(a, b, d, 0, s);
3275}
3276
3277static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3278{
3279 return float64_muladd(a, b, d, 0, s);
3280}
3281
3282RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3283RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3284RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
5eacf7d8 3285GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3286GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3287GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
4aa5a8fe
LZ
3288
3289#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3290static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3291 CPURISCVState *env) \
3292{ \
3293 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3294 TD d = *((TD *)vd + HD(i)); \
3295 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3296}
3297
3298RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3299RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3300RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
5eacf7d8 3301GEN_VEXT_VF(vfmacc_vf_h, 2)
3302GEN_VEXT_VF(vfmacc_vf_w, 4)
3303GEN_VEXT_VF(vfmacc_vf_d, 8)
4aa5a8fe
LZ
3304
3305static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3306{
3307 return float16_muladd(a, b, d,
3308 float_muladd_negate_c | float_muladd_negate_product, s);
3309}
3310
3311static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3312{
3313 return float32_muladd(a, b, d,
3314 float_muladd_negate_c | float_muladd_negate_product, s);
3315}
3316
3317static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3318{
3319 return float64_muladd(a, b, d,
3320 float_muladd_negate_c | float_muladd_negate_product, s);
3321}
3322
3323RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3324RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3325RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
5eacf7d8 3326GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3327GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3328GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
4aa5a8fe
LZ
3329RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3330RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3331RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
5eacf7d8 3332GEN_VEXT_VF(vfnmacc_vf_h, 2)
3333GEN_VEXT_VF(vfnmacc_vf_w, 4)
3334GEN_VEXT_VF(vfnmacc_vf_d, 8)
4aa5a8fe
LZ
3335
3336static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3337{
3338 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3339}
3340
3341static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3342{
3343 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3344}
3345
3346static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3347{
3348 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3349}
3350
3351RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3352RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3353RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
5eacf7d8 3354GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3355GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3356GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
4aa5a8fe
LZ
3357RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3358RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3359RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
5eacf7d8 3360GEN_VEXT_VF(vfmsac_vf_h, 2)
3361GEN_VEXT_VF(vfmsac_vf_w, 4)
3362GEN_VEXT_VF(vfmsac_vf_d, 8)
4aa5a8fe
LZ
3363
3364static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3365{
3366 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3367}
3368
3369static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3370{
3371 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3372}
3373
3374static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3375{
3376 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3377}
3378
3379RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3380RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3381RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
5eacf7d8 3382GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3383GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3384GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
4aa5a8fe
LZ
3385RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3386RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3387RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
5eacf7d8 3388GEN_VEXT_VF(vfnmsac_vf_h, 2)
3389GEN_VEXT_VF(vfnmsac_vf_w, 4)
3390GEN_VEXT_VF(vfnmsac_vf_d, 8)
4aa5a8fe
LZ
3391
3392static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3393{
3394 return float16_muladd(d, b, a, 0, s);
3395}
3396
3397static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3398{
3399 return float32_muladd(d, b, a, 0, s);
3400}
3401
3402static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3403{
3404 return float64_muladd(d, b, a, 0, s);
3405}
3406
3407RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3408RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3409RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
5eacf7d8 3410GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3411GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3412GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
4aa5a8fe
LZ
3413RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3414RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3415RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
5eacf7d8 3416GEN_VEXT_VF(vfmadd_vf_h, 2)
3417GEN_VEXT_VF(vfmadd_vf_w, 4)
3418GEN_VEXT_VF(vfmadd_vf_d, 8)
4aa5a8fe
LZ
3419
3420static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3421{
3422 return float16_muladd(d, b, a,
3423 float_muladd_negate_c | float_muladd_negate_product, s);
3424}
3425
3426static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3427{
3428 return float32_muladd(d, b, a,
3429 float_muladd_negate_c | float_muladd_negate_product, s);
3430}
3431
3432static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3433{
3434 return float64_muladd(d, b, a,
3435 float_muladd_negate_c | float_muladd_negate_product, s);
3436}
3437
3438RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3439RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3440RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
5eacf7d8 3441GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3442GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3443GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
4aa5a8fe
LZ
3444RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3445RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3446RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
5eacf7d8 3447GEN_VEXT_VF(vfnmadd_vf_h, 2)
3448GEN_VEXT_VF(vfnmadd_vf_w, 4)
3449GEN_VEXT_VF(vfnmadd_vf_d, 8)
4aa5a8fe
LZ
3450
3451static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3452{
3453 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3454}
3455
3456static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3457{
3458 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3459}
3460
3461static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3462{
3463 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3464}
3465
3466RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3467RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3468RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
5eacf7d8 3469GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3470GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3471GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
4aa5a8fe
LZ
3472RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3473RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3474RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
5eacf7d8 3475GEN_VEXT_VF(vfmsub_vf_h, 2)
3476GEN_VEXT_VF(vfmsub_vf_w, 4)
3477GEN_VEXT_VF(vfmsub_vf_d, 8)
4aa5a8fe
LZ
3478
3479static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3480{
3481 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3482}
3483
3484static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3485{
3486 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3487}
3488
3489static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3490{
3491 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3492}
3493
3494RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3495RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3496RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
5eacf7d8 3497GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3498GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3499GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
4aa5a8fe
LZ
3500RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3501RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3502RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
5eacf7d8 3503GEN_VEXT_VF(vfnmsub_vf_h, 2)
3504GEN_VEXT_VF(vfnmsub_vf_w, 4)
3505GEN_VEXT_VF(vfnmsub_vf_d, 8)
0dd50959
LZ
3506
3507/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3508static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3509{
3510 return float32_muladd(float16_to_float32(a, true, s),
3511 float16_to_float32(b, true, s), d, 0, s);
3512}
3513
3514static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3515{
3516 return float64_muladd(float32_to_float64(a, s),
3517 float32_to_float64(b, s), d, 0, s);
3518}
3519
3520RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3521RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
5eacf7d8 3522GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3523GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
0dd50959
LZ
3524RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3525RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
5eacf7d8 3526GEN_VEXT_VF(vfwmacc_vf_h, 4)
3527GEN_VEXT_VF(vfwmacc_vf_w, 8)
0dd50959
LZ
3528
3529static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3530{
3531 return float32_muladd(float16_to_float32(a, true, s),
3532 float16_to_float32(b, true, s), d,
3533 float_muladd_negate_c | float_muladd_negate_product, s);
3534}
3535
3536static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3537{
3538 return float64_muladd(float32_to_float64(a, s),
3539 float32_to_float64(b, s), d,
3540 float_muladd_negate_c | float_muladd_negate_product, s);
3541}
3542
3543RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3544RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
5eacf7d8 3545GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3546GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
0dd50959
LZ
3547RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3548RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
5eacf7d8 3549GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3550GEN_VEXT_VF(vfwnmacc_vf_w, 8)
0dd50959
LZ
3551
3552static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3553{
3554 return float32_muladd(float16_to_float32(a, true, s),
3555 float16_to_float32(b, true, s), d,
3556 float_muladd_negate_c, s);
3557}
3558
3559static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3560{
3561 return float64_muladd(float32_to_float64(a, s),
3562 float32_to_float64(b, s), d,
3563 float_muladd_negate_c, s);
3564}
3565
3566RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3567RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
5eacf7d8 3568GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3569GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
0dd50959
LZ
3570RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3571RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
5eacf7d8 3572GEN_VEXT_VF(vfwmsac_vf_h, 4)
3573GEN_VEXT_VF(vfwmsac_vf_w, 8)
0dd50959
LZ
3574
3575static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3576{
3577 return float32_muladd(float16_to_float32(a, true, s),
3578 float16_to_float32(b, true, s), d,
3579 float_muladd_negate_product, s);
3580}
3581
3582static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3583{
3584 return float64_muladd(float32_to_float64(a, s),
3585 float32_to_float64(b, s), d,
3586 float_muladd_negate_product, s);
3587}
3588
3589RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3590RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
5eacf7d8 3591GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3592GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
0dd50959
LZ
3593RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3594RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
5eacf7d8 3595GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3596GEN_VEXT_VF(vfwnmsac_vf_w, 8)
d9e4ce72
LZ
3597
3598/* Vector Floating-Point Square-Root Instruction */
3599/* (TD, T2, TX2) */
3600#define OP_UU_H uint16_t, uint16_t, uint16_t
3601#define OP_UU_W uint32_t, uint32_t, uint32_t
3602#define OP_UU_D uint64_t, uint64_t, uint64_t
3603
3604#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3605static void do_##NAME(void *vd, void *vs2, int i, \
3606 CPURISCVState *env) \
3607{ \
3608 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3609 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3610}
3611
5eacf7d8 3612#define GEN_VEXT_V_ENV(NAME, ESZ) \
d9e4ce72
LZ
3613void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3614 CPURISCVState *env, uint32_t desc) \
3615{ \
d9e4ce72
LZ
3616 uint32_t vm = vext_vm(desc); \
3617 uint32_t vl = env->vl; \
5eacf7d8 3618 uint32_t total_elems = \
3619 vext_get_total_elems(env, desc, ESZ); \
3620 uint32_t vta = vext_vta(desc); \
d9e4ce72
LZ
3621 uint32_t i; \
3622 \
3623 if (vl == 0) { \
3624 return; \
3625 } \
f714361e 3626 for (i = env->vstart; i < vl; i++) { \
f9298de5 3627 if (!vm && !vext_elem_mask(v0, i)) { \
d9e4ce72
LZ
3628 continue; \
3629 } \
3630 do_##NAME(vd, vs2, i, env); \
3631 } \
f714361e 3632 env->vstart = 0; \
5eacf7d8 3633 vext_set_elems_1s(vd, vta, vl * ESZ, \
3634 total_elems * ESZ); \
d9e4ce72
LZ
3635}
3636
3637RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3638RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3639RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
5eacf7d8 3640GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3641GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3642GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
230b53dd 3643
e848a1e5
FC
3644/*
3645 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3646 *
3647 * Adapted from riscv-v-spec recip.c:
3648 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3649 */
3650static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3651{
3652 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3653 uint64_t exp = extract64(f, frac_size, exp_size);
3654 uint64_t frac = extract64(f, 0, frac_size);
3655
3656 const uint8_t lookup_table[] = {
3657 52, 51, 50, 48, 47, 46, 44, 43,
3658 42, 41, 40, 39, 38, 36, 35, 34,
3659 33, 32, 31, 30, 30, 29, 28, 27,
3660 26, 25, 24, 23, 23, 22, 21, 20,
3661 19, 19, 18, 17, 16, 16, 15, 14,
3662 14, 13, 12, 12, 11, 10, 10, 9,
3663 9, 8, 7, 7, 6, 6, 5, 4,
3664 4, 3, 3, 2, 2, 1, 1, 0,
3665 127, 125, 123, 121, 119, 118, 116, 114,
3666 113, 111, 109, 108, 106, 105, 103, 102,
3667 100, 99, 97, 96, 95, 93, 92, 91,
3668 90, 88, 87, 86, 85, 84, 83, 82,
3669 80, 79, 78, 77, 76, 75, 74, 73,
3670 72, 71, 70, 70, 69, 68, 67, 66,
3671 65, 64, 63, 63, 62, 61, 60, 59,
3672 59, 58, 57, 56, 56, 55, 54, 53
3673 };
3674 const int precision = 7;
3675
3676 if (exp == 0 && frac != 0) { /* subnormal */
3677 /* Normalize the subnormal. */
3678 while (extract64(frac, frac_size - 1, 1) == 0) {
3679 exp--;
3680 frac <<= 1;
3681 }
3682
3683 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3684 }
3685
3686 int idx = ((exp & 1) << (precision - 1)) |
3687 (frac >> (frac_size - precision + 1));
3688 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3689 (frac_size - precision);
3690 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3691
3692 uint64_t val = 0;
3693 val = deposit64(val, 0, frac_size, out_frac);
3694 val = deposit64(val, frac_size, exp_size, out_exp);
3695 val = deposit64(val, frac_size + exp_size, 1, sign);
3696 return val;
3697}
3698
3699static float16 frsqrt7_h(float16 f, float_status *s)
3700{
3701 int exp_size = 5, frac_size = 10;
3702 bool sign = float16_is_neg(f);
3703
3704 /*
3705 * frsqrt7(sNaN) = canonical NaN
3706 * frsqrt7(-inf) = canonical NaN
3707 * frsqrt7(-normal) = canonical NaN
3708 * frsqrt7(-subnormal) = canonical NaN
3709 */
3710 if (float16_is_signaling_nan(f, s) ||
3711 (float16_is_infinity(f) && sign) ||
3712 (float16_is_normal(f) && sign) ||
3713 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3714 s->float_exception_flags |= float_flag_invalid;
3715 return float16_default_nan(s);
3716 }
3717
3718 /* frsqrt7(qNaN) = canonical NaN */
3719 if (float16_is_quiet_nan(f, s)) {
3720 return float16_default_nan(s);
3721 }
3722
3723 /* frsqrt7(+-0) = +-inf */
3724 if (float16_is_zero(f)) {
3725 s->float_exception_flags |= float_flag_divbyzero;
3726 return float16_set_sign(float16_infinity, sign);
3727 }
3728
3729 /* frsqrt7(+inf) = +0 */
3730 if (float16_is_infinity(f) && !sign) {
3731 return float16_set_sign(float16_zero, sign);
3732 }
3733
3734 /* +normal, +subnormal */
3735 uint64_t val = frsqrt7(f, exp_size, frac_size);
3736 return make_float16(val);
3737}
3738
3739static float32 frsqrt7_s(float32 f, float_status *s)
3740{
3741 int exp_size = 8, frac_size = 23;
3742 bool sign = float32_is_neg(f);
3743
3744 /*
3745 * frsqrt7(sNaN) = canonical NaN
3746 * frsqrt7(-inf) = canonical NaN
3747 * frsqrt7(-normal) = canonical NaN
3748 * frsqrt7(-subnormal) = canonical NaN
3749 */
3750 if (float32_is_signaling_nan(f, s) ||
3751 (float32_is_infinity(f) && sign) ||
3752 (float32_is_normal(f) && sign) ||
3753 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3754 s->float_exception_flags |= float_flag_invalid;
3755 return float32_default_nan(s);
3756 }
3757
3758 /* frsqrt7(qNaN) = canonical NaN */
3759 if (float32_is_quiet_nan(f, s)) {
3760 return float32_default_nan(s);
3761 }
3762
3763 /* frsqrt7(+-0) = +-inf */
3764 if (float32_is_zero(f)) {
3765 s->float_exception_flags |= float_flag_divbyzero;
3766 return float32_set_sign(float32_infinity, sign);
3767 }
3768
3769 /* frsqrt7(+inf) = +0 */
3770 if (float32_is_infinity(f) && !sign) {
3771 return float32_set_sign(float32_zero, sign);
3772 }
3773
3774 /* +normal, +subnormal */
3775 uint64_t val = frsqrt7(f, exp_size, frac_size);
3776 return make_float32(val);
3777}
3778
3779static float64 frsqrt7_d(float64 f, float_status *s)
3780{
3781 int exp_size = 11, frac_size = 52;
3782 bool sign = float64_is_neg(f);
3783
3784 /*
3785 * frsqrt7(sNaN) = canonical NaN
3786 * frsqrt7(-inf) = canonical NaN
3787 * frsqrt7(-normal) = canonical NaN
3788 * frsqrt7(-subnormal) = canonical NaN
3789 */
3790 if (float64_is_signaling_nan(f, s) ||
3791 (float64_is_infinity(f) && sign) ||
3792 (float64_is_normal(f) && sign) ||
3793 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3794 s->float_exception_flags |= float_flag_invalid;
3795 return float64_default_nan(s);
3796 }
3797
3798 /* frsqrt7(qNaN) = canonical NaN */
3799 if (float64_is_quiet_nan(f, s)) {
3800 return float64_default_nan(s);
3801 }
3802
3803 /* frsqrt7(+-0) = +-inf */
3804 if (float64_is_zero(f)) {
3805 s->float_exception_flags |= float_flag_divbyzero;
3806 return float64_set_sign(float64_infinity, sign);
3807 }
3808
3809 /* frsqrt7(+inf) = +0 */
3810 if (float64_is_infinity(f) && !sign) {
3811 return float64_set_sign(float64_zero, sign);
3812 }
3813
3814 /* +normal, +subnormal */
3815 uint64_t val = frsqrt7(f, exp_size, frac_size);
3816 return make_float64(val);
3817}
3818
3819RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3820RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3821RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
5eacf7d8 3822GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3823GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3824GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
e848a1e5 3825
55c35407
FC
3826/*
3827 * Vector Floating-Point Reciprocal Estimate Instruction
3828 *
3829 * Adapted from riscv-v-spec recip.c:
3830 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3831 */
3832static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3833 float_status *s)
3834{
3835 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3836 uint64_t exp = extract64(f, frac_size, exp_size);
3837 uint64_t frac = extract64(f, 0, frac_size);
3838
3839 const uint8_t lookup_table[] = {
3840 127, 125, 123, 121, 119, 117, 116, 114,
3841 112, 110, 109, 107, 105, 104, 102, 100,
3842 99, 97, 96, 94, 93, 91, 90, 88,
3843 87, 85, 84, 83, 81, 80, 79, 77,
3844 76, 75, 74, 72, 71, 70, 69, 68,
3845 66, 65, 64, 63, 62, 61, 60, 59,
3846 58, 57, 56, 55, 54, 53, 52, 51,
3847 50, 49, 48, 47, 46, 45, 44, 43,
3848 42, 41, 40, 40, 39, 38, 37, 36,
3849 35, 35, 34, 33, 32, 31, 31, 30,
3850 29, 28, 28, 27, 26, 25, 25, 24,
3851 23, 23, 22, 21, 21, 20, 19, 19,
3852 18, 17, 17, 16, 15, 15, 14, 14,
3853 13, 12, 12, 11, 11, 10, 9, 9,
3854 8, 8, 7, 7, 6, 5, 5, 4,
3855 4, 3, 3, 2, 2, 1, 1, 0
3856 };
3857 const int precision = 7;
3858
3859 if (exp == 0 && frac != 0) { /* subnormal */
3860 /* Normalize the subnormal. */
3861 while (extract64(frac, frac_size - 1, 1) == 0) {
3862 exp--;
3863 frac <<= 1;
3864 }
3865
3866 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3867
3868 if (exp != 0 && exp != UINT64_MAX) {
3869 /*
3870 * Overflow to inf or max value of same sign,
3871 * depending on sign and rounding mode.
3872 */
3873 s->float_exception_flags |= (float_flag_inexact |
3874 float_flag_overflow);
3875
3876 if ((s->float_rounding_mode == float_round_to_zero) ||
3877 ((s->float_rounding_mode == float_round_down) && !sign) ||
3878 ((s->float_rounding_mode == float_round_up) && sign)) {
3879 /* Return greatest/negative finite value. */
3880 return (sign << (exp_size + frac_size)) |
3881 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3882 } else {
3883 /* Return +-inf. */
3884 return (sign << (exp_size + frac_size)) |
3885 MAKE_64BIT_MASK(frac_size, exp_size);
3886 }
3887 }
3888 }
3889
3890 int idx = frac >> (frac_size - precision);
3891 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3892 (frac_size - precision);
3893 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3894
3895 if (out_exp == 0 || out_exp == UINT64_MAX) {
3896 /*
3897 * The result is subnormal, but don't raise the underflow exception,
3898 * because there's no additional loss of precision.
3899 */
3900 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3901 if (out_exp == UINT64_MAX) {
3902 out_frac >>= 1;
3903 out_exp = 0;
3904 }
3905 }
3906
3907 uint64_t val = 0;
3908 val = deposit64(val, 0, frac_size, out_frac);
3909 val = deposit64(val, frac_size, exp_size, out_exp);
3910 val = deposit64(val, frac_size + exp_size, 1, sign);
3911 return val;
3912}
3913
3914static float16 frec7_h(float16 f, float_status *s)
3915{
3916 int exp_size = 5, frac_size = 10;
3917 bool sign = float16_is_neg(f);
3918
3919 /* frec7(+-inf) = +-0 */
3920 if (float16_is_infinity(f)) {
3921 return float16_set_sign(float16_zero, sign);
3922 }
3923
3924 /* frec7(+-0) = +-inf */
3925 if (float16_is_zero(f)) {
3926 s->float_exception_flags |= float_flag_divbyzero;
3927 return float16_set_sign(float16_infinity, sign);
3928 }
3929
3930 /* frec7(sNaN) = canonical NaN */
3931 if (float16_is_signaling_nan(f, s)) {
3932 s->float_exception_flags |= float_flag_invalid;
3933 return float16_default_nan(s);
3934 }
3935
3936 /* frec7(qNaN) = canonical NaN */
3937 if (float16_is_quiet_nan(f, s)) {
3938 return float16_default_nan(s);
3939 }
3940
3941 /* +-normal, +-subnormal */
3942 uint64_t val = frec7(f, exp_size, frac_size, s);
3943 return make_float16(val);
3944}
3945
3946static float32 frec7_s(float32 f, float_status *s)
3947{
3948 int exp_size = 8, frac_size = 23;
3949 bool sign = float32_is_neg(f);
3950
3951 /* frec7(+-inf) = +-0 */
3952 if (float32_is_infinity(f)) {
3953 return float32_set_sign(float32_zero, sign);
3954 }
3955
3956 /* frec7(+-0) = +-inf */
3957 if (float32_is_zero(f)) {
3958 s->float_exception_flags |= float_flag_divbyzero;
3959 return float32_set_sign(float32_infinity, sign);
3960 }
3961
3962 /* frec7(sNaN) = canonical NaN */
3963 if (float32_is_signaling_nan(f, s)) {
3964 s->float_exception_flags |= float_flag_invalid;
3965 return float32_default_nan(s);
3966 }
3967
3968 /* frec7(qNaN) = canonical NaN */
3969 if (float32_is_quiet_nan(f, s)) {
3970 return float32_default_nan(s);
3971 }
3972
3973 /* +-normal, +-subnormal */
3974 uint64_t val = frec7(f, exp_size, frac_size, s);
3975 return make_float32(val);
3976}
3977
3978static float64 frec7_d(float64 f, float_status *s)
3979{
3980 int exp_size = 11, frac_size = 52;
3981 bool sign = float64_is_neg(f);
3982
3983 /* frec7(+-inf) = +-0 */
3984 if (float64_is_infinity(f)) {
3985 return float64_set_sign(float64_zero, sign);
3986 }
3987
3988 /* frec7(+-0) = +-inf */
3989 if (float64_is_zero(f)) {
3990 s->float_exception_flags |= float_flag_divbyzero;
3991 return float64_set_sign(float64_infinity, sign);
3992 }
3993
3994 /* frec7(sNaN) = canonical NaN */
3995 if (float64_is_signaling_nan(f, s)) {
3996 s->float_exception_flags |= float_flag_invalid;
3997 return float64_default_nan(s);
3998 }
3999
4000 /* frec7(qNaN) = canonical NaN */
4001 if (float64_is_quiet_nan(f, s)) {
4002 return float64_default_nan(s);
4003 }
4004
4005 /* +-normal, +-subnormal */
4006 uint64_t val = frec7(f, exp_size, frac_size, s);
4007 return make_float64(val);
4008}
4009
4010RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4011RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4012RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
5eacf7d8 4013GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4014GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4015GEN_VEXT_V_ENV(vfrec7_v_d, 8)
55c35407 4016
230b53dd 4017/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
4018RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4019RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4020RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
5eacf7d8 4021GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4022GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4023GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
49c5611a
FC
4024RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4025RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4026RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
5eacf7d8 4027GEN_VEXT_VF(vfmin_vf_h, 2)
4028GEN_VEXT_VF(vfmin_vf_w, 4)
4029GEN_VEXT_VF(vfmin_vf_d, 8)
230b53dd 4030
49c5611a
FC
4031RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4032RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4033RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
5eacf7d8 4034GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4035GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4036GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
49c5611a
FC
4037RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4038RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4039RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
5eacf7d8 4040GEN_VEXT_VF(vfmax_vf_h, 2)
4041GEN_VEXT_VF(vfmax_vf_w, 4)
4042GEN_VEXT_VF(vfmax_vf_d, 8)
1d426b81
LZ
4043
4044/* Vector Floating-Point Sign-Injection Instructions */
4045static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4046{
4047 return deposit64(b, 0, 15, a);
4048}
4049
4050static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4051{
4052 return deposit64(b, 0, 31, a);
4053}
4054
4055static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4056{
4057 return deposit64(b, 0, 63, a);
4058}
4059
4060RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4061RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4062RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
5eacf7d8 4063GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4064GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4065GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
1d426b81
LZ
4066RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4067RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4068RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
5eacf7d8 4069GEN_VEXT_VF(vfsgnj_vf_h, 2)
4070GEN_VEXT_VF(vfsgnj_vf_w, 4)
4071GEN_VEXT_VF(vfsgnj_vf_d, 8)
1d426b81
LZ
4072
4073static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4074{
4075 return deposit64(~b, 0, 15, a);
4076}
4077
4078static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4079{
4080 return deposit64(~b, 0, 31, a);
4081}
4082
4083static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4084{
4085 return deposit64(~b, 0, 63, a);
4086}
4087
4088RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4089RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4090RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
5eacf7d8 4091GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4092GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4093GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
1d426b81
LZ
4094RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4095RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4096RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
5eacf7d8 4097GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4098GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4099GEN_VEXT_VF(vfsgnjn_vf_d, 8)
1d426b81
LZ
4100
4101static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4102{
4103 return deposit64(b ^ a, 0, 15, a);
4104}
4105
4106static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4107{
4108 return deposit64(b ^ a, 0, 31, a);
4109}
4110
4111static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4112{
4113 return deposit64(b ^ a, 0, 63, a);
4114}
4115
4116RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4117RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4118RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
5eacf7d8 4119GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4120GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4121GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
1d426b81
LZ
4122RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4123RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4124RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
5eacf7d8 4125GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4126GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4127GEN_VEXT_VF(vfsgnjx_vf_d, 8)
2a68e9e5
LZ
4128
4129/* Vector Floating-Point Compare Instructions */
4130#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4131void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4132 CPURISCVState *env, uint32_t desc) \
4133{ \
2a68e9e5
LZ
4134 uint32_t vm = vext_vm(desc); \
4135 uint32_t vl = env->vl; \
5eacf7d8 4136 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4137 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
2a68e9e5
LZ
4138 uint32_t i; \
4139 \
f714361e 4140 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4141 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4142 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4143 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
4144 continue; \
4145 } \
f9298de5 4146 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4147 DO_OP(s2, s1, &env->fp_status)); \
4148 } \
f714361e 4149 env->vstart = 0; \
5eacf7d8 4150 /* mask destination register are always tail-agnostic */ \
4151 /* set tail elements to 1s */ \
4152 if (vta_all_1s) { \
4153 for (; i < total_elems; i++) { \
4154 vext_set_elem_mask(vd, i, 1); \
4155 } \
4156 } \
2a68e9e5
LZ
4157}
4158
2a68e9e5
LZ
4159GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4160GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4161GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4162
4163#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4164void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4165 CPURISCVState *env, uint32_t desc) \
4166{ \
2a68e9e5
LZ
4167 uint32_t vm = vext_vm(desc); \
4168 uint32_t vl = env->vl; \
5eacf7d8 4169 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4170 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
2a68e9e5
LZ
4171 uint32_t i; \
4172 \
f714361e 4173 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4174 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4175 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
4176 continue; \
4177 } \
f9298de5 4178 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4179 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4180 } \
f714361e 4181 env->vstart = 0; \
5eacf7d8 4182 /* mask destination register are always tail-agnostic */ \
4183 /* set tail elements to 1s */ \
4184 if (vta_all_1s) { \
4185 for (; i < total_elems; i++) { \
4186 vext_set_elem_mask(vd, i, 1); \
4187 } \
4188 } \
2a68e9e5
LZ
4189}
4190
4191GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4192GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4193GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4194
4195static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4196{
4197 FloatRelation compare = float16_compare_quiet(a, b, s);
4198 return compare != float_relation_equal;
4199}
4200
4201static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4202{
4203 FloatRelation compare = float32_compare_quiet(a, b, s);
4204 return compare != float_relation_equal;
4205}
4206
4207static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4208{
4209 FloatRelation compare = float64_compare_quiet(a, b, s);
4210 return compare != float_relation_equal;
4211}
4212
4213GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4214GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4215GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4216GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4217GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4218GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4219
2a68e9e5
LZ
4220GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4221GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4222GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4223GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4224GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4225GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4226
2a68e9e5
LZ
4227GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4228GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4229GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4230GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4231GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4232GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4233
4234static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4235{
4236 FloatRelation compare = float16_compare(a, b, s);
4237 return compare == float_relation_greater;
4238}
4239
4240static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4241{
4242 FloatRelation compare = float32_compare(a, b, s);
4243 return compare == float_relation_greater;
4244}
4245
4246static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4247{
4248 FloatRelation compare = float64_compare(a, b, s);
4249 return compare == float_relation_greater;
4250}
4251
4252GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4253GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4254GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4255
4256static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4257{
4258 FloatRelation compare = float16_compare(a, b, s);
4259 return compare == float_relation_greater ||
4260 compare == float_relation_equal;
4261}
4262
4263static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4264{
4265 FloatRelation compare = float32_compare(a, b, s);
4266 return compare == float_relation_greater ||
4267 compare == float_relation_equal;
4268}
4269
4270static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4271{
4272 FloatRelation compare = float64_compare(a, b, s);
4273 return compare == float_relation_greater ||
4274 compare == float_relation_equal;
4275}
4276
4277GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4278GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4279GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4280
121ddbb3
LZ
4281/* Vector Floating-Point Classify Instruction */
4282#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4283static void do_##NAME(void *vd, void *vs2, int i) \
4284{ \
4285 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4286 *((TD *)vd + HD(i)) = OP(s2); \
4287}
4288
5eacf7d8 4289#define GEN_VEXT_V(NAME, ESZ) \
121ddbb3
LZ
4290void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4291 CPURISCVState *env, uint32_t desc) \
4292{ \
121ddbb3
LZ
4293 uint32_t vm = vext_vm(desc); \
4294 uint32_t vl = env->vl; \
5eacf7d8 4295 uint32_t total_elems = \
4296 vext_get_total_elems(env, desc, ESZ); \
4297 uint32_t vta = vext_vta(desc); \
121ddbb3
LZ
4298 uint32_t i; \
4299 \
f714361e 4300 for (i = env->vstart; i < vl; i++) { \
f9298de5 4301 if (!vm && !vext_elem_mask(v0, i)) { \
121ddbb3
LZ
4302 continue; \
4303 } \
4304 do_##NAME(vd, vs2, i); \
4305 } \
f714361e 4306 env->vstart = 0; \
5eacf7d8 4307 /* set tail elements to 1s */ \
4308 vext_set_elems_1s(vd, vta, vl * ESZ, \
4309 total_elems * ESZ); \
121ddbb3
LZ
4310}
4311
4312target_ulong fclass_h(uint64_t frs1)
4313{
4314 float16 f = frs1;
4315 bool sign = float16_is_neg(f);
4316
4317 if (float16_is_infinity(f)) {
4318 return sign ? 1 << 0 : 1 << 7;
4319 } else if (float16_is_zero(f)) {
4320 return sign ? 1 << 3 : 1 << 4;
4321 } else if (float16_is_zero_or_denormal(f)) {
4322 return sign ? 1 << 2 : 1 << 5;
4323 } else if (float16_is_any_nan(f)) {
4324 float_status s = { }; /* for snan_bit_is_one */
4325 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4326 } else {
4327 return sign ? 1 << 1 : 1 << 6;
4328 }
4329}
4330
4331target_ulong fclass_s(uint64_t frs1)
4332{
4333 float32 f = frs1;
4334 bool sign = float32_is_neg(f);
4335
4336 if (float32_is_infinity(f)) {
4337 return sign ? 1 << 0 : 1 << 7;
4338 } else if (float32_is_zero(f)) {
4339 return sign ? 1 << 3 : 1 << 4;
4340 } else if (float32_is_zero_or_denormal(f)) {
4341 return sign ? 1 << 2 : 1 << 5;
4342 } else if (float32_is_any_nan(f)) {
4343 float_status s = { }; /* for snan_bit_is_one */
4344 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4345 } else {
4346 return sign ? 1 << 1 : 1 << 6;
4347 }
4348}
4349
4350target_ulong fclass_d(uint64_t frs1)
4351{
4352 float64 f = frs1;
4353 bool sign = float64_is_neg(f);
4354
4355 if (float64_is_infinity(f)) {
4356 return sign ? 1 << 0 : 1 << 7;
4357 } else if (float64_is_zero(f)) {
4358 return sign ? 1 << 3 : 1 << 4;
4359 } else if (float64_is_zero_or_denormal(f)) {
4360 return sign ? 1 << 2 : 1 << 5;
4361 } else if (float64_is_any_nan(f)) {
4362 float_status s = { }; /* for snan_bit_is_one */
4363 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4364 } else {
4365 return sign ? 1 << 1 : 1 << 6;
4366 }
4367}
4368
4369RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4370RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4371RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
5eacf7d8 4372GEN_VEXT_V(vfclass_v_h, 2)
4373GEN_VEXT_V(vfclass_v_w, 4)
4374GEN_VEXT_V(vfclass_v_d, 8)
64ab5846
LZ
4375
4376/* Vector Floating-Point Merge Instruction */
5eacf7d8 4377
3479a814 4378#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4379void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4380 CPURISCVState *env, uint32_t desc) \
4381{ \
64ab5846
LZ
4382 uint32_t vm = vext_vm(desc); \
4383 uint32_t vl = env->vl; \
5eacf7d8 4384 uint32_t esz = sizeof(ETYPE); \
4385 uint32_t total_elems = \
4386 vext_get_total_elems(env, desc, esz); \
4387 uint32_t vta = vext_vta(desc); \
64ab5846
LZ
4388 uint32_t i; \
4389 \
f714361e 4390 for (i = env->vstart; i < vl; i++) { \
64ab5846
LZ
4391 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4392 *((ETYPE *)vd + H(i)) \
f9298de5 4393 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4394 } \
f714361e 4395 env->vstart = 0; \
5eacf7d8 4396 /* set tail elements to 1s */ \
4397 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
64ab5846
LZ
4398}
4399
3479a814
FC
4400GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4401GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4402GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4403
4404/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4405/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4406RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4407RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4408RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
5eacf7d8 4409GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4410GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4411GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
92100973
LZ
4412
4413/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4414RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4415RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4416RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
5eacf7d8 4417GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4418GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4419GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
92100973
LZ
4420
4421/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4422RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4423RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4424RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
5eacf7d8 4425GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4426GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4427GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
92100973
LZ
4428
4429/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4430RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4431RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4432RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
5eacf7d8 4433GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4434GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4435GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4514b7b1
LZ
4436
4437/* Widening Floating-Point/Integer Type-Convert Instructions */
4438/* (TD, T2, TX2) */
3ce4c09d 4439#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4440#define WOP_UU_H uint32_t, uint16_t, uint16_t
4441#define WOP_UU_W uint64_t, uint32_t, uint32_t
4442/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4443RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4444RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
5eacf7d8 4445GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4446GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4514b7b1
LZ
4447
4448/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4449RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4450RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
5eacf7d8 4451GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4452GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4514b7b1
LZ
4453
4454/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3ce4c09d 4455RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4456RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4457RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
5eacf7d8 4458GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4459GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4460GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4514b7b1
LZ
4461
4462/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4463RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4464RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4465RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
5eacf7d8 4466GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4467GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4468GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4514b7b1
LZ
4469
4470/*
3ce4c09d 4471 * vfwcvt.f.f.v vd, vs2, vm
4514b7b1
LZ
4472 * Convert single-width float to double-width float.
4473 */
4474static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4475{
4476 return float16_to_float32(a, true, s);
4477}
4478
4479RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4480RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
5eacf7d8 4481GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4482GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
878d406e
LZ
4483
4484/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4485/* (TD, T2, TX2) */
ff679b58 4486#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4487#define NOP_UU_H uint16_t, uint32_t, uint32_t
4488#define NOP_UU_W uint32_t, uint64_t, uint64_t
4489/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4490RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4491RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4492RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
5eacf7d8 4493GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4494GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4495GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
878d406e
LZ
4496
4497/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4498RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4499RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4500RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
5eacf7d8 4501GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4502GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4503GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
878d406e
LZ
4504
4505/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
ff679b58
FC
4506RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4507RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
5eacf7d8 4508GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4509GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
878d406e
LZ
4510
4511/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4512RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4513RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
5eacf7d8 4514GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4515GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
878d406e
LZ
4516
4517/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4518static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4519{
4520 return float32_to_float16(a, true, s);
4521}
4522
ff679b58
FC
4523RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4524RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
5eacf7d8 4525GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4526GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
fe5c9ab1
LZ
4527
4528/*
4529 *** Vector Reduction Operations
4530 */
4531/* Vector Single-Width Integer Reduction Instructions */
3479a814 4532#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1
LZ
4533void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4534 void *vs2, CPURISCVState *env, uint32_t desc) \
4535{ \
fe5c9ab1
LZ
4536 uint32_t vm = vext_vm(desc); \
4537 uint32_t vl = env->vl; \
df4f52a7 4538 uint32_t esz = sizeof(TD); \
4539 uint32_t vlenb = simd_maxsz(desc); \
4540 uint32_t vta = vext_vta(desc); \
fe5c9ab1 4541 uint32_t i; \
fe5c9ab1
LZ
4542 TD s1 = *((TD *)vs1 + HD(0)); \
4543 \
f714361e 4544 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4545 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4546 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4547 continue; \
4548 } \
4549 s1 = OP(s1, (TD)s2); \
4550 } \
4551 *((TD *)vd + HD(0)) = s1; \
f714361e 4552 env->vstart = 0; \
df4f52a7 4553 /* set tail elements to 1s */ \
4554 vext_set_elems_1s(vd, vta, esz, vlenb); \
fe5c9ab1
LZ
4555}
4556
4557/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4558GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4559GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4560GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4561GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4562
4563/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4564GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4565GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4566GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4567GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4568
4569/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4570GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4571GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4572GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4573GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4574
4575/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4576GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4577GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4578GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4579GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4580
4581/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4582GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4583GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4584GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4585GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4586
4587/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4588GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4589GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4590GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4591GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4592
4593/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4594GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4595GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4596GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4597GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4598
4599/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4600GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4601GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4602GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4603GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4604
4605/* Vector Widening Integer Reduction Instructions */
4606/* signed sum reduction into double-width accumulator */
3479a814
FC
4607GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4608GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4609GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4610
4611/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4612GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4613GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4614GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4615
4616/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4617#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4618void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4619 void *vs2, CPURISCVState *env, \
4620 uint32_t desc) \
4621{ \
523547f1
LZ
4622 uint32_t vm = vext_vm(desc); \
4623 uint32_t vl = env->vl; \
df4f52a7 4624 uint32_t esz = sizeof(TD); \
4625 uint32_t vlenb = simd_maxsz(desc); \
4626 uint32_t vta = vext_vta(desc); \
523547f1 4627 uint32_t i; \
523547f1
LZ
4628 TD s1 = *((TD *)vs1 + HD(0)); \
4629 \
f714361e 4630 for (i = env->vstart; i < vl; i++) { \
523547f1 4631 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4632 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4633 continue; \
4634 } \
4635 s1 = OP(s1, (TD)s2, &env->fp_status); \
4636 } \
4637 *((TD *)vd + HD(0)) = s1; \
f714361e 4638 env->vstart = 0; \
df4f52a7 4639 /* set tail elements to 1s */ \
4640 vext_set_elems_1s(vd, vta, esz, vlenb); \
523547f1
LZ
4641}
4642
4643/* Unordered sum */
3479a814
FC
4644GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4645GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4646GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4647
4648/* Maximum value */
08b60eeb
FC
4649GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4650GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4651GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
523547f1
LZ
4652
4653/* Minimum value */
08b60eeb
FC
4654GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4655GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4656GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
696b0c26
LZ
4657
4658/* Vector Widening Floating-Point Reduction Instructions */
4659/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4660void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4661 void *vs2, CPURISCVState *env, uint32_t desc)
4662{
696b0c26
LZ
4663 uint32_t vm = vext_vm(desc);
4664 uint32_t vl = env->vl;
df4f52a7 4665 uint32_t esz = sizeof(uint32_t);
4666 uint32_t vlenb = simd_maxsz(desc);
4667 uint32_t vta = vext_vta(desc);
696b0c26 4668 uint32_t i;
696b0c26
LZ
4669 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4670
f714361e 4671 for (i = env->vstart; i < vl; i++) {
696b0c26 4672 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
f9298de5 4673 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4674 continue;
4675 }
4676 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4677 &env->fp_status);
4678 }
4679 *((uint32_t *)vd + H4(0)) = s1;
f714361e 4680 env->vstart = 0;
df4f52a7 4681 /* set tail elements to 1s */
4682 vext_set_elems_1s(vd, vta, esz, vlenb);
696b0c26
LZ
4683}
4684
4685void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4686 void *vs2, CPURISCVState *env, uint32_t desc)
4687{
696b0c26
LZ
4688 uint32_t vm = vext_vm(desc);
4689 uint32_t vl = env->vl;
df4f52a7 4690 uint32_t esz = sizeof(uint64_t);
4691 uint32_t vlenb = simd_maxsz(desc);
4692 uint32_t vta = vext_vta(desc);
696b0c26 4693 uint32_t i;
696b0c26
LZ
4694 uint64_t s1 = *((uint64_t *)vs1);
4695
f714361e 4696 for (i = env->vstart; i < vl; i++) {
696b0c26 4697 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
f9298de5 4698 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4699 continue;
4700 }
4701 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4702 &env->fp_status);
4703 }
4704 *((uint64_t *)vd) = s1;
f714361e 4705 env->vstart = 0;
df4f52a7 4706 /* set tail elements to 1s */
4707 vext_set_elems_1s(vd, vta, esz, vlenb);
696b0c26 4708}
c21f34ae
LZ
4709
4710/*
4711 *** Vector Mask Operations
4712 */
4713/* Vector Mask-Register Logical Instructions */
4714#define GEN_VEXT_MASK_VV(NAME, OP) \
4715void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4716 void *vs2, CPURISCVState *env, \
4717 uint32_t desc) \
4718{ \
c21f34ae 4719 uint32_t vl = env->vl; \
acc6ffd4 4720 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
4721 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
c21f34ae
LZ
4722 uint32_t i; \
4723 int a, b; \
4724 \
f714361e 4725 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4726 a = vext_elem_mask(vs1, i); \
4727 b = vext_elem_mask(vs2, i); \
4728 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4729 } \
f714361e 4730 env->vstart = 0; \
acc6ffd4 4731 /* mask destination register are always tail- \
4732 * agnostic \
4733 */ \
4734 /* set tail elements to 1s */ \
4735 if (vta_all_1s) { \
4736 for (; i < total_elems; i++) { \
4737 vext_set_elem_mask(vd, i, 1); \
4738 } \
4739 } \
c21f34ae
LZ
4740}
4741
4742#define DO_NAND(N, M) (!(N & M))
4743#define DO_ANDNOT(N, M) (N & !M)
4744#define DO_NOR(N, M) (!(N | M))
4745#define DO_ORNOT(N, M) (N | !M)
4746#define DO_XNOR(N, M) (!(N ^ M))
4747
4748GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4749GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4750GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4751GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4752GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4753GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4754GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4755GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4756
0014aa74
FC
4757/* Vector count population in mask vcpop */
4758target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4759 uint32_t desc)
2e88f551
LZ
4760{
4761 target_ulong cnt = 0;
2e88f551
LZ
4762 uint32_t vm = vext_vm(desc);
4763 uint32_t vl = env->vl;
4764 int i;
4765
f714361e 4766 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4767 if (vm || vext_elem_mask(v0, i)) {
4768 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4769 cnt++;
4770 }
4771 }
4772 }
f714361e 4773 env->vstart = 0;
2e88f551
LZ
4774 return cnt;
4775}
0db67e1c 4776
d71a24fc
FC
4777/* vfirst find-first-set mask bit*/
4778target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4779 uint32_t desc)
0db67e1c 4780{
0db67e1c
LZ
4781 uint32_t vm = vext_vm(desc);
4782 uint32_t vl = env->vl;
4783 int i;
4784
f714361e 4785 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4786 if (vm || vext_elem_mask(v0, i)) {
4787 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4788 return i;
4789 }
4790 }
4791 }
f714361e 4792 env->vstart = 0;
0db67e1c
LZ
4793 return -1LL;
4794}
81fbf7da
LZ
4795
4796enum set_mask_type {
4797 ONLY_FIRST = 1,
4798 INCLUDE_FIRST,
4799 BEFORE_FIRST,
4800};
4801
4802static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4803 uint32_t desc, enum set_mask_type type)
4804{
81fbf7da
LZ
4805 uint32_t vm = vext_vm(desc);
4806 uint32_t vl = env->vl;
acc6ffd4 4807 uint32_t total_elems = env_archcpu(env)->cfg.vlen;
4808 uint32_t vta_all_1s = vext_vta_all_1s(desc);
81fbf7da
LZ
4809 int i;
4810 bool first_mask_bit = false;
4811
f714361e 4812 for (i = env->vstart; i < vl; i++) {
f9298de5 4813 if (!vm && !vext_elem_mask(v0, i)) {
81fbf7da
LZ
4814 continue;
4815 }
4816 /* write a zero to all following active elements */
4817 if (first_mask_bit) {
f9298de5 4818 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4819 continue;
4820 }
f9298de5 4821 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4822 first_mask_bit = true;
4823 if (type == BEFORE_FIRST) {
f9298de5 4824 vext_set_elem_mask(vd, i, 0);
81fbf7da 4825 } else {
f9298de5 4826 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4827 }
4828 } else {
4829 if (type == ONLY_FIRST) {
f9298de5 4830 vext_set_elem_mask(vd, i, 0);
81fbf7da 4831 } else {
f9298de5 4832 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4833 }
4834 }
4835 }
f714361e 4836 env->vstart = 0;
acc6ffd4 4837 /* mask destination register are always tail-agnostic */
4838 /* set tail elements to 1s */
4839 if (vta_all_1s) {
4840 for (; i < total_elems; i++) {
4841 vext_set_elem_mask(vd, i, 1);
4842 }
4843 }
81fbf7da
LZ
4844}
4845
4846void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4847 uint32_t desc)
4848{
4849 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4850}
4851
4852void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4853 uint32_t desc)
4854{
4855 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4856}
4857
4858void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4859 uint32_t desc)
4860{
4861 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4862}
78d90cfe
LZ
4863
4864/* Vector Iota Instruction */
3479a814 4865#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4866void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4867 uint32_t desc) \
4868{ \
78d90cfe
LZ
4869 uint32_t vm = vext_vm(desc); \
4870 uint32_t vl = env->vl; \
acc6ffd4 4871 uint32_t esz = sizeof(ETYPE); \
4872 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4873 uint32_t vta = vext_vta(desc); \
78d90cfe
LZ
4874 uint32_t sum = 0; \
4875 int i; \
4876 \
f714361e 4877 for (i = env->vstart; i < vl; i++) { \
f9298de5 4878 if (!vm && !vext_elem_mask(v0, i)) { \
78d90cfe
LZ
4879 continue; \
4880 } \
4881 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4882 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4883 sum++; \
4884 } \
4885 } \
f714361e 4886 env->vstart = 0; \
acc6ffd4 4887 /* set tail elements to 1s */ \
4888 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
78d90cfe
LZ
4889}
4890
3479a814
FC
4891GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4892GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4893GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4894GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4895
4896/* Vector Element Index Instruction */
3479a814 4897#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4898void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4899{ \
126bec3f
LZ
4900 uint32_t vm = vext_vm(desc); \
4901 uint32_t vl = env->vl; \
acc6ffd4 4902 uint32_t esz = sizeof(ETYPE); \
4903 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4904 uint32_t vta = vext_vta(desc); \
126bec3f
LZ
4905 int i; \
4906 \
f714361e 4907 for (i = env->vstart; i < vl; i++) { \
f9298de5 4908 if (!vm && !vext_elem_mask(v0, i)) { \
126bec3f
LZ
4909 continue; \
4910 } \
4911 *((ETYPE *)vd + H(i)) = i; \
4912 } \
f714361e 4913 env->vstart = 0; \
acc6ffd4 4914 /* set tail elements to 1s */ \
4915 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
126bec3f
LZ
4916}
4917
3479a814
FC
4918GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4919GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4920GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4921GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4922
4923/*
4924 *** Vector Permutation Instructions
4925 */
4926
4927/* Vector Slide Instructions */
3479a814 4928#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4929void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4930 CPURISCVState *env, uint32_t desc) \
4931{ \
ec17e036
LZ
4932 uint32_t vm = vext_vm(desc); \
4933 uint32_t vl = env->vl; \
803963f7 4934 uint32_t esz = sizeof(ETYPE); \
4935 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4936 uint32_t vta = vext_vta(desc); \
f714361e 4937 target_ulong offset = s1, i_min, i; \
ec17e036 4938 \
f714361e
FC
4939 i_min = MAX(env->vstart, offset); \
4940 for (i = i_min; i < vl; i++) { \
f9298de5 4941 if (!vm && !vext_elem_mask(v0, i)) { \
ec17e036
LZ
4942 continue; \
4943 } \
4944 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4945 } \
803963f7 4946 /* set tail elements to 1s */ \
4947 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4948}
4949
4950/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4951GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4952GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4953GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4954GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4955
3479a814 4956#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4957void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4958 CPURISCVState *env, uint32_t desc) \
4959{ \
6438ed61 4960 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4961 uint32_t vm = vext_vm(desc); \
4962 uint32_t vl = env->vl; \
803963f7 4963 uint32_t esz = sizeof(ETYPE); \
4964 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4965 uint32_t vta = vext_vta(desc); \
6438ed61 4966 target_ulong i_max, i; \
ec17e036 4967 \
f714361e
FC
4968 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4969 for (i = env->vstart; i < i_max; ++i) { \
6438ed61
FC
4970 if (vm || vext_elem_mask(v0, i)) { \
4971 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4972 } \
4973 } \
4974 \
4975 for (i = i_max; i < vl; ++i) { \
4976 if (vm || vext_elem_mask(v0, i)) { \
4977 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4978 } \
ec17e036 4979 } \
f714361e
FC
4980 \
4981 env->vstart = 0; \
803963f7 4982 /* set tail elements to 1s */ \
4983 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
ec17e036
LZ
4984}
4985
4986/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4987GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4988GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4989GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4990GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4991
c7b8a421 4992#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
4993static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4994 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 4995{ \
c7b8a421 4996 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4997 uint32_t vm = vext_vm(desc); \
4998 uint32_t vl = env->vl; \
803963f7 4999 uint32_t esz = sizeof(ETYPE); \
5000 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5001 uint32_t vta = vext_vta(desc); \
8500d4ab
FC
5002 uint32_t i; \
5003 \
f714361e 5004 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
5005 if (!vm && !vext_elem_mask(v0, i)) { \
5006 continue; \
5007 } \
5008 if (i == 0) { \
5009 *((ETYPE *)vd + H(i)) = s1; \
5010 } else { \
5011 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5012 } \
5013 } \
f714361e 5014 env->vstart = 0; \
803963f7 5015 /* set tail elements to 1s */ \
5016 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5017}
5018
5019GEN_VEXT_VSLIE1UP(8, H1)
5020GEN_VEXT_VSLIE1UP(16, H2)
5021GEN_VEXT_VSLIE1UP(32, H4)
5022GEN_VEXT_VSLIE1UP(64, H8)
5023
c7b8a421 5024#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
5025void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5026 CPURISCVState *env, uint32_t desc) \
5027{ \
c7b8a421 5028 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5029}
5030
5031/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
5032GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5033GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5034GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5035GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5036
c7b8a421 5037#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5038static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
5039 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 5040{ \
c7b8a421 5041 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
5042 uint32_t vm = vext_vm(desc); \
5043 uint32_t vl = env->vl; \
803963f7 5044 uint32_t esz = sizeof(ETYPE); \
5045 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5046 uint32_t vta = vext_vta(desc); \
8500d4ab
FC
5047 uint32_t i; \
5048 \
f714361e 5049 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
5050 if (!vm && !vext_elem_mask(v0, i)) { \
5051 continue; \
5052 } \
5053 if (i == vl - 1) { \
5054 *((ETYPE *)vd + H(i)) = s1; \
5055 } else { \
5056 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5057 } \
5058 } \
f714361e 5059 env->vstart = 0; \
803963f7 5060 /* set tail elements to 1s */ \
5061 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
8500d4ab
FC
5062}
5063
5064GEN_VEXT_VSLIDE1DOWN(8, H1)
5065GEN_VEXT_VSLIDE1DOWN(16, H2)
5066GEN_VEXT_VSLIDE1DOWN(32, H4)
5067GEN_VEXT_VSLIDE1DOWN(64, H8)
5068
c7b8a421 5069#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
5070void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5071 CPURISCVState *env, uint32_t desc) \
5072{ \
c7b8a421 5073 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
5074}
5075
5076/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
5077GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5078GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5079GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5080GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5081
5082/* Vector Floating-Point Slide Instructions */
c7b8a421 5083#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
5084void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5085 CPURISCVState *env, uint32_t desc) \
5086{ \
c7b8a421 5087 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5088}
5089
5090/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5091GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5092GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5093GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5094
c7b8a421 5095#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
5096void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5097 CPURISCVState *env, uint32_t desc) \
5098{ \
c7b8a421 5099 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
5100}
5101
5102/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5103GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5104GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5105GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
5106
5107/* Vector Register Gather Instruction */
50bfb45b 5108#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
5109void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5110 CPURISCVState *env, uint32_t desc) \
5111{ \
f714361e 5112 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
5113 uint32_t vm = vext_vm(desc); \
5114 uint32_t vl = env->vl; \
803963f7 5115 uint32_t esz = sizeof(TS2); \
5116 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5117 uint32_t vta = vext_vta(desc); \
b11e84b8
FC
5118 uint64_t index; \
5119 uint32_t i; \
e4b83d5c 5120 \
f714361e 5121 for (i = env->vstart; i < vl; i++) { \
f9298de5 5122 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
5123 continue; \
5124 } \
50bfb45b 5125 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 5126 if (index >= vlmax) { \
50bfb45b 5127 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 5128 } else { \
50bfb45b 5129 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
5130 } \
5131 } \
f714361e 5132 env->vstart = 0; \
803963f7 5133 /* set tail elements to 1s */ \
5134 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5135}
5136
5137/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
5138GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5139GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5140GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5141GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5142
5143GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5144GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5145GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5146GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 5147
3479a814 5148#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
5149void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5150 CPURISCVState *env, uint32_t desc) \
5151{ \
5a9f8e15 5152 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5153 uint32_t vm = vext_vm(desc); \
5154 uint32_t vl = env->vl; \
803963f7 5155 uint32_t esz = sizeof(ETYPE); \
5156 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5157 uint32_t vta = vext_vta(desc); \
b11e84b8
FC
5158 uint64_t index = s1; \
5159 uint32_t i; \
e4b83d5c 5160 \
f714361e 5161 for (i = env->vstart; i < vl; i++) { \
f9298de5 5162 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
5163 continue; \
5164 } \
5165 if (index >= vlmax) { \
5166 *((ETYPE *)vd + H(i)) = 0; \
5167 } else { \
5168 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5169 } \
5170 } \
f714361e 5171 env->vstart = 0; \
803963f7 5172 /* set tail elements to 1s */ \
5173 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
e4b83d5c
LZ
5174}
5175
5176/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5177GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5178GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5179GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5180GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5181
5182/* Vector Compress Instruction */
3479a814 5183#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5184void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5185 CPURISCVState *env, uint32_t desc) \
5186{ \
31bf42a2 5187 uint32_t vl = env->vl; \
803963f7 5188 uint32_t esz = sizeof(ETYPE); \
5189 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5190 uint32_t vta = vext_vta(desc); \
31bf42a2
LZ
5191 uint32_t num = 0, i; \
5192 \
f714361e 5193 for (i = env->vstart; i < vl; i++) { \
f9298de5 5194 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5195 continue; \
5196 } \
5197 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5198 num++; \
5199 } \
f714361e 5200 env->vstart = 0; \
803963f7 5201 /* set tail elements to 1s */ \
5202 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
31bf42a2
LZ
5203}
5204
5205/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5206GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5207GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5208GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5209GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5210
f714361e 5211/* Vector Whole Register Move */
f32d82f6
WL
5212void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5213{
f06193c4 5214 /* EEW = SEW */
f32d82f6 5215 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5216 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5217 uint32_t startb = env->vstart * sewb;
5218 uint32_t i = startb;
f32d82f6
WL
5219
5220 memcpy((uint8_t *)vd + H1(i),
5221 (uint8_t *)vs2 + H1(i),
f06193c4 5222 maxsz - startb);
f714361e 5223
f32d82f6
WL
5224 env->vstart = 0;
5225}
f714361e 5226
cd01340e
FC
5227/* Vector Integer Extension */
5228#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5229void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5230 CPURISCVState *env, uint32_t desc) \
5231{ \
5232 uint32_t vl = env->vl; \
5233 uint32_t vm = vext_vm(desc); \
803963f7 5234 uint32_t esz = sizeof(ETYPE); \
5235 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5236 uint32_t vta = vext_vta(desc); \
cd01340e
FC
5237 uint32_t i; \
5238 \
f714361e 5239 for (i = env->vstart; i < vl; i++) { \
cd01340e
FC
5240 if (!vm && !vext_elem_mask(v0, i)) { \
5241 continue; \
5242 } \
5243 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5244 } \
f714361e 5245 env->vstart = 0; \
803963f7 5246 /* set tail elements to 1s */ \
5247 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
cd01340e
FC
5248}
5249
5250GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5251GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5252GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5253GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5254GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5255GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5256
5257GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5258GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5259GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5260GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5261GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5262GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)