]> git.proxmox.com Git - mirror_qemu.git/blame - target/riscv/vector_helper.c
target/riscv: rvv: Add tail agnostic for vector integer comparison instructions
[mirror_qemu.git] / target / riscv / vector_helper.c
CommitLineData
2b7168fc
LZ
1/*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "qemu/osdep.h"
5a9f8e15 20#include "qemu/host-utils.h"
e848a1e5 21#include "qemu/bitops.h"
2b7168fc 22#include "cpu.h"
751538d5 23#include "exec/memop.h"
2b7168fc
LZ
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
ce2a0343 26#include "fpu/softfloat.h"
751538d5
LZ
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
2b7168fc
LZ
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
d9b7609a 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
2b7168fc
LZ
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
31961cfe
LZ
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
2b7168fc 44
d9b7609a
FC
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
2b7168fc 57 /* only set vill bit. */
d96a271a
LZ
58 env->vill = 1;
59 env->vtype = 0;
2b7168fc
LZ
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
ac6bcf4d 74 env->vill = 0;
2b7168fc
LZ
75 return vl;
76}
751538d5
LZ
77
78/*
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
81 */
e03b5686 82#if HOST_BIG_ENDIAN
751538d5
LZ
83#define H1(x) ((x) ^ 7)
84#define H1_2(x) ((x) ^ 6)
85#define H1_4(x) ((x) ^ 4)
86#define H2(x) ((x) ^ 3)
87#define H4(x) ((x) ^ 1)
88#define H8(x) ((x))
89#else
90#define H1(x) (x)
91#define H1_2(x) (x)
92#define H1_4(x) (x)
93#define H2(x) (x)
94#define H4(x) (x)
95#define H8(x) (x)
96#endif
97
98static inline uint32_t vext_nf(uint32_t desc)
99{
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101}
102
751538d5
LZ
103static inline uint32_t vext_vm(uint32_t desc)
104{
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106}
107
33f1beaf
FC
108/*
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
119 */
120static inline int32_t vext_lmul(uint32_t desc)
751538d5 121{
33f1beaf 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
751538d5
LZ
123}
124
f1eed927 125static inline uint32_t vext_vta(uint32_t desc)
126{
127 return FIELD_EX32(simd_data(desc), VDATA, VTA);
128}
129
5c19fc15 130static inline uint32_t vext_vta_all_1s(uint32_t desc)
131{
132 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
133}
134
751538d5 135/*
5a9f8e15 136 * Get the maximum number of elements can be operated.
751538d5 137 *
c7b8a421 138 * log2_esz: log2 of element size in bytes.
751538d5 139 */
c7b8a421 140static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
751538d5 141{
5a9f8e15 142 /*
8a4b5257 143 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
5a9f8e15
FC
144 * so vlen in bytes (vlenb) is encoded as maxsz.
145 */
146 uint32_t vlenb = simd_maxsz(desc);
147
148 /* Return VLMAX */
c7b8a421 149 int scale = vext_lmul(desc) - log2_esz;
5a9f8e15 150 return scale < 0 ? vlenb >> -scale : vlenb << scale;
751538d5
LZ
151}
152
f1eed927 153/*
154 * Get number of total elements, including prestart, body and tail elements.
155 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
156 * are held in the same vector register.
157 */
158static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
159 uint32_t esz)
160{
161 uint32_t vlenb = simd_maxsz(desc);
162 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
163 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
164 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
165 return (vlenb << emul) / esz;
166}
167
d6b9d930
LZ
168static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
169{
170 return (addr & env->cur_pmmask) | env->cur_pmbase;
171}
172
751538d5
LZ
173/*
174 * This function checks watchpoint before real load operation.
175 *
176 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
177 * In user mode, there is no watchpoint support now.
178 *
179 * It will trigger an exception if there is no mapping in TLB
180 * and page table walk can't fill the TLB entry. Then the guest
181 * software can return here after process the exception or never return.
182 */
183static void probe_pages(CPURISCVState *env, target_ulong addr,
184 target_ulong len, uintptr_t ra,
185 MMUAccessType access_type)
186{
187 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
188 target_ulong curlen = MIN(pagelen, len);
189
d6b9d930 190 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
191 cpu_mmu_index(env, false), ra);
192 if (len > curlen) {
193 addr += curlen;
194 curlen = len - curlen;
d6b9d930 195 probe_access(env, adjust_addr(env, addr), curlen, access_type,
751538d5
LZ
196 cpu_mmu_index(env, false), ra);
197 }
198}
199
f1eed927 200/* set agnostic elements to 1s */
201static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
202 uint32_t tot)
203{
204 if (is_agnostic == 0) {
205 /* policy undisturbed */
206 return;
207 }
208 if (tot - cnt == 0) {
209 return ;
210 }
211 memset(base + cnt, -1, tot - cnt);
212}
213
f9298de5
FC
214static inline void vext_set_elem_mask(void *v0, int index,
215 uint8_t value)
3a6f8f68 216{
f9298de5
FC
217 int idx = index / 64;
218 int pos = index % 64;
3a6f8f68 219 uint64_t old = ((uint64_t *)v0)[idx];
f9298de5 220 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
3a6f8f68 221}
751538d5 222
f9298de5
FC
223/*
224 * Earlier designs (pre-0.9) had a varying number of bits
225 * per mask value (MLEN). In the 0.9 design, MLEN=1.
226 * (Section 4.5)
227 */
228static inline int vext_elem_mask(void *v0, int index)
751538d5 229{
f9298de5
FC
230 int idx = index / 64;
231 int pos = index % 64;
751538d5
LZ
232 return (((uint64_t *)v0)[idx] >> pos) & 1;
233}
234
235/* elements operations for load and store */
236typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
237 uint32_t idx, void *vd, uintptr_t retaddr);
751538d5 238
79556fb6 239#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
751538d5
LZ
240static void NAME(CPURISCVState *env, abi_ptr addr, \
241 uint32_t idx, void *vd, uintptr_t retaddr)\
242{ \
751538d5 243 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
79556fb6 244 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
751538d5
LZ
245} \
246
79556fb6
FC
247GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
248GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
249GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
250GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
751538d5
LZ
251
252#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
253static void NAME(CPURISCVState *env, abi_ptr addr, \
254 uint32_t idx, void *vd, uintptr_t retaddr)\
255{ \
256 ETYPE data = *((ETYPE *)vd + H(idx)); \
257 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
258}
259
751538d5
LZ
260GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
261GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
262GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
263GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
264
265/*
266 *** stride: access vector element from strided memory
267 */
268static void
269vext_ldst_stride(void *vd, void *v0, target_ulong base,
270 target_ulong stride, CPURISCVState *env,
271 uint32_t desc, uint32_t vm,
3479a814 272 vext_ldst_elem_fn *ldst_elem,
c7b8a421 273 uint32_t log2_esz, uintptr_t ra)
751538d5
LZ
274{
275 uint32_t i, k;
276 uint32_t nf = vext_nf(desc);
c7b8a421 277 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 278 uint32_t esz = 1 << log2_esz;
279 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
280 uint32_t vta = vext_vta(desc);
751538d5 281
f714361e 282 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f9298de5 283 if (!vm && !vext_elem_mask(v0, i)) {
751538d5
LZ
284 continue;
285 }
f714361e 286
751538d5 287 k = 0;
751538d5 288 while (k < nf) {
c7b8a421 289 target_ulong addr = base + stride * i + (k << log2_esz);
d6b9d930 290 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
291 k++;
292 }
293 }
f714361e 294 env->vstart = 0;
752614ca 295 /* set tail elements to 1s */
296 for (k = 0; k < nf; ++k) {
297 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
298 (k * max_elems + max_elems) * esz);
299 }
300 if (nf * max_elems % total_elems != 0) {
301 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
302 uint32_t registers_used =
303 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
304 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
305 registers_used * vlenb);
306 }
751538d5
LZ
307}
308
79556fb6 309#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
310void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
311 target_ulong stride, CPURISCVState *env, \
312 uint32_t desc) \
313{ \
314 uint32_t vm = vext_vm(desc); \
315 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
25eae048 316 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
317}
318
79556fb6
FC
319GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
320GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
321GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
322GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
323
324#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
751538d5
LZ
325void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
326 target_ulong stride, CPURISCVState *env, \
327 uint32_t desc) \
328{ \
329 uint32_t vm = vext_vm(desc); \
330 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
25eae048 331 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
332}
333
79556fb6
FC
334GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
335GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
336GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
337GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
751538d5
LZ
338
339/*
340 *** unit-stride: access elements stored contiguously in memory
341 */
342
343/* unmasked unit-stride load and store operation*/
344static void
345vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 346 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
25eae048 347 uintptr_t ra)
751538d5
LZ
348{
349 uint32_t i, k;
350 uint32_t nf = vext_nf(desc);
c7b8a421 351 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 352 uint32_t esz = 1 << log2_esz;
353 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
354 uint32_t vta = vext_vta(desc);
751538d5 355
751538d5 356 /* load bytes from guest memory */
5c89e9c0 357 for (i = env->vstart; i < evl; i++, env->vstart++) {
751538d5
LZ
358 k = 0;
359 while (k < nf) {
c7b8a421 360 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 361 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
751538d5
LZ
362 k++;
363 }
364 }
f714361e 365 env->vstart = 0;
752614ca 366 /* set tail elements to 1s */
367 for (k = 0; k < nf; ++k) {
368 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz,
369 (k * max_elems + max_elems) * esz);
370 }
371 if (nf * max_elems % total_elems != 0) {
372 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
373 uint32_t registers_used =
374 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
375 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
376 registers_used * vlenb);
377 }
751538d5
LZ
378}
379
380/*
381 * masked unit-stride load and store operation will be a special case of stride,
382 * stride = NF * sizeof (MTYPE)
383 */
384
79556fb6 385#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
751538d5
LZ
386void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
387 CPURISCVState *env, uint32_t desc) \
388{ \
5a9f8e15 389 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
751538d5 390 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
25eae048 391 ctzl(sizeof(ETYPE)), GETPC()); \
751538d5
LZ
392} \
393 \
394void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
395 CPURISCVState *env, uint32_t desc) \
396{ \
3479a814 397 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
25eae048 398 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
399}
400
79556fb6
FC
401GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
402GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
403GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
404GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
405
5c89e9c0
FC
406#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
407void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
408 CPURISCVState *env, uint32_t desc) \
409{ \
410 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
411 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
25eae048 412 ctzl(sizeof(ETYPE)), GETPC()); \
5c89e9c0
FC
413} \
414 \
415void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
416 CPURISCVState *env, uint32_t desc) \
417{ \
418 vext_ldst_us(vd, base, env, desc, STORE_FN, \
25eae048 419 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
751538d5
LZ
420}
421
79556fb6
FC
422GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
423GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
424GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
425GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
f732560e 426
26086aea
FC
427/*
428 *** unit stride mask load and store, EEW = 1
429 */
430void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
431 CPURISCVState *env, uint32_t desc)
432{
433 /* evl = ceil(vl/8) */
434 uint8_t evl = (env->vl + 7) >> 3;
435 vext_ldst_us(vd, base, env, desc, lde_b,
25eae048 436 0, evl, GETPC());
26086aea
FC
437}
438
439void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
440 CPURISCVState *env, uint32_t desc)
441{
442 /* evl = ceil(vl/8) */
443 uint8_t evl = (env->vl + 7) >> 3;
444 vext_ldst_us(vd, base, env, desc, ste_b,
25eae048 445 0, evl, GETPC());
26086aea
FC
446}
447
f732560e
LZ
448/*
449 *** index: access vector element from indexed memory
450 */
451typedef target_ulong vext_get_index_addr(target_ulong base,
452 uint32_t idx, void *vs2);
453
454#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
455static target_ulong NAME(target_ulong base, \
456 uint32_t idx, void *vs2) \
457{ \
458 return (base + *((ETYPE *)vs2 + H(idx))); \
459}
460
83fcd573
FC
461GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
462GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
463GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
464GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
f732560e
LZ
465
466static inline void
467vext_ldst_index(void *vd, void *v0, target_ulong base,
468 void *vs2, CPURISCVState *env, uint32_t desc,
469 vext_get_index_addr get_index_addr,
470 vext_ldst_elem_fn *ldst_elem,
c7b8a421 471 uint32_t log2_esz, uintptr_t ra)
f732560e
LZ
472{
473 uint32_t i, k;
474 uint32_t nf = vext_nf(desc);
475 uint32_t vm = vext_vm(desc);
c7b8a421 476 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 477 uint32_t esz = 1 << log2_esz;
478 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
479 uint32_t vta = vext_vta(desc);
f732560e 480
f732560e 481 /* load bytes from guest memory */
f714361e 482 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
f9298de5 483 if (!vm && !vext_elem_mask(v0, i)) {
f732560e
LZ
484 continue;
485 }
f714361e
FC
486
487 k = 0;
f732560e 488 while (k < nf) {
c7b8a421 489 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
d6b9d930 490 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
f732560e
LZ
491 k++;
492 }
493 }
f714361e 494 env->vstart = 0;
752614ca 495 /* set tail elements to 1s */
496 for (k = 0; k < nf; ++k) {
497 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
498 (k * max_elems + max_elems) * esz);
499 }
500 if (nf * max_elems % total_elems != 0) {
501 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
502 uint32_t registers_used =
503 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
504 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
505 registers_used * vlenb);
506 }
f732560e
LZ
507}
508
08b9d0ed 509#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
f732560e
LZ
510void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
511 void *vs2, CPURISCVState *env, uint32_t desc) \
512{ \
513 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
25eae048 514 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
f732560e
LZ
515}
516
08b9d0ed
FC
517GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
518GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
519GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
520GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
521GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
522GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
523GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
524GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
525GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
526GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
527GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
528GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
529GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
530GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
531GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
532GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
533
534#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
f732560e
LZ
535void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
536 void *vs2, CPURISCVState *env, uint32_t desc) \
537{ \
538 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
5a9f8e15 539 STORE_FN, ctzl(sizeof(ETYPE)), \
25eae048 540 GETPC()); \
f732560e
LZ
541}
542
08b9d0ed
FC
543GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
544GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
545GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
546GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
547GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
548GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
549GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
550GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
551GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
552GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
553GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
554GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
555GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
556GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
557GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
558GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
022b4ecf
LZ
559
560/*
561 *** unit-stride fault-only-fisrt load instructions
562 */
563static inline void
564vext_ldff(void *vd, void *v0, target_ulong base,
565 CPURISCVState *env, uint32_t desc,
566 vext_ldst_elem_fn *ldst_elem,
c7b8a421 567 uint32_t log2_esz, uintptr_t ra)
022b4ecf
LZ
568{
569 void *host;
570 uint32_t i, k, vl = 0;
022b4ecf
LZ
571 uint32_t nf = vext_nf(desc);
572 uint32_t vm = vext_vm(desc);
c7b8a421 573 uint32_t max_elems = vext_max_elems(desc, log2_esz);
752614ca 574 uint32_t esz = 1 << log2_esz;
575 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
576 uint32_t vta = vext_vta(desc);
022b4ecf
LZ
577 target_ulong addr, offset, remain;
578
579 /* probe every access*/
f714361e 580 for (i = env->vstart; i < env->vl; i++) {
f9298de5 581 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
582 continue;
583 }
c7b8a421 584 addr = adjust_addr(env, base + i * (nf << log2_esz));
022b4ecf 585 if (i == 0) {
c7b8a421 586 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
022b4ecf
LZ
587 } else {
588 /* if it triggers an exception, no need to check watchpoint */
c7b8a421 589 remain = nf << log2_esz;
022b4ecf
LZ
590 while (remain > 0) {
591 offset = -(addr | TARGET_PAGE_MASK);
592 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
593 cpu_mmu_index(env, false));
594 if (host) {
595#ifdef CONFIG_USER_ONLY
01d09525 596 if (page_check_range(addr, offset, PAGE_READ) < 0) {
022b4ecf
LZ
597 vl = i;
598 goto ProbeSuccess;
599 }
600#else
01d09525 601 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
022b4ecf
LZ
602#endif
603 } else {
604 vl = i;
605 goto ProbeSuccess;
606 }
607 if (remain <= offset) {
608 break;
609 }
610 remain -= offset;
d6b9d930 611 addr = adjust_addr(env, addr + offset);
022b4ecf
LZ
612 }
613 }
614 }
615ProbeSuccess:
616 /* load bytes from guest memory */
617 if (vl != 0) {
618 env->vl = vl;
619 }
f714361e 620 for (i = env->vstart; i < env->vl; i++) {
022b4ecf 621 k = 0;
f9298de5 622 if (!vm && !vext_elem_mask(v0, i)) {
022b4ecf
LZ
623 continue;
624 }
625 while (k < nf) {
c7b8a421 626 target_ulong addr = base + ((i * nf + k) << log2_esz);
d6b9d930 627 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
022b4ecf
LZ
628 k++;
629 }
630 }
f714361e 631 env->vstart = 0;
752614ca 632 /* set tail elements to 1s */
633 for (k = 0; k < nf; ++k) {
634 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
635 (k * max_elems + max_elems) * esz);
636 }
637 if (nf * max_elems % total_elems != 0) {
638 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
639 uint32_t registers_used =
640 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
641 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
642 registers_used * vlenb);
643 }
022b4ecf
LZ
644}
645
d3e5e2ff
FC
646#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
647void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
648 CPURISCVState *env, uint32_t desc) \
649{ \
650 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
5a9f8e15 651 ctzl(sizeof(ETYPE)), GETPC()); \
022b4ecf
LZ
652}
653
d3e5e2ff
FC
654GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
655GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
656GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
657GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
268fcca6 658
268fcca6
LZ
659#define DO_SWAP(N, M) (M)
660#define DO_AND(N, M) (N & M)
661#define DO_XOR(N, M) (N ^ M)
662#define DO_OR(N, M) (N | M)
663#define DO_ADD(N, M) (N + M)
664
268fcca6
LZ
665/* Signed min/max */
666#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
667#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
668
669/* Unsigned min/max */
670#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
671#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
672
30206bd8
FC
673/*
674 *** load and store whole register instructions
675 */
676static void
677vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
c7b8a421 678 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
30206bd8 679{
f714361e 680 uint32_t i, k, off, pos;
30206bd8
FC
681 uint32_t nf = vext_nf(desc);
682 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
c7b8a421 683 uint32_t max_elems = vlenb >> log2_esz;
30206bd8 684
f714361e
FC
685 k = env->vstart / max_elems;
686 off = env->vstart % max_elems;
30206bd8 687
f714361e
FC
688 if (off) {
689 /* load/store rest of elements of current segment pointed by vstart */
690 for (pos = off; pos < max_elems; pos++, env->vstart++) {
c7b8a421 691 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
d6b9d930 692 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
f714361e
FC
693 }
694 k++;
695 }
696
697 /* load/store elements for rest of segments */
698 for (; k < nf; k++) {
699 for (i = 0; i < max_elems; i++, env->vstart++) {
c7b8a421 700 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
d6b9d930 701 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
30206bd8
FC
702 }
703 }
f714361e
FC
704
705 env->vstart = 0;
30206bd8
FC
706}
707
708#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
709void HELPER(NAME)(void *vd, target_ulong base, \
710 CPURISCVState *env, uint32_t desc) \
711{ \
712 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
25eae048 713 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
714}
715
716GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
717GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
718GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
719GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
720GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
721GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
722GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
723GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
724GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
725GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
726GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
727GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
728GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
729GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
730GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
731GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
732
733#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
734void HELPER(NAME)(void *vd, target_ulong base, \
735 CPURISCVState *env, uint32_t desc) \
736{ \
737 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
25eae048 738 ctzl(sizeof(ETYPE)), GETPC()); \
30206bd8
FC
739}
740
741GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
742GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
743GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
744GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
745
43740e3a
LZ
746/*
747 *** Vector Integer Arithmetic Instructions
748 */
749
750/* expand macro args before macro */
751#define RVVCALL(macro, ...) macro(__VA_ARGS__)
752
753/* (TD, T1, T2, TX1, TX2) */
754#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
755#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
756#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
757#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
558fa779
LZ
758#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
759#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
760#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
761#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
958b85f3
LZ
762#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
763#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
764#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
765#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
97b1cba3
LZ
766#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
767#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
768#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
769#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
770#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
771#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
772#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
773#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
774#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
775#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
776#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
777#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
9ff3d287
LZ
778#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
779#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
780#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
781#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
782#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
783#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
43740e3a
LZ
784
785/* operation of two vector elements */
786typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
787
788#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
789static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
790{ \
791 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
792 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
793 *((TD *)vd + HD(i)) = OP(s2, s1); \
794}
795#define DO_SUB(N, M) (N - M)
796#define DO_RSUB(N, M) (M - N)
797
798RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
799RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
800RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
801RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
802RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
803RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
804RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
805RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
806
807static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
808 CPURISCVState *env, uint32_t desc,
f1eed927 809 opivv2_fn *fn, uint32_t esz)
43740e3a 810{
43740e3a
LZ
811 uint32_t vm = vext_vm(desc);
812 uint32_t vl = env->vl;
f1eed927 813 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
814 uint32_t vta = vext_vta(desc);
43740e3a
LZ
815 uint32_t i;
816
f714361e 817 for (i = env->vstart; i < vl; i++) {
f9298de5 818 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
819 continue;
820 }
821 fn(vd, vs1, vs2, i);
822 }
f714361e 823 env->vstart = 0;
f1eed927 824 /* set tail elements to 1s */
825 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
826}
827
828/* generate the helpers for OPIVV */
f1eed927 829#define GEN_VEXT_VV(NAME, ESZ) \
43740e3a
LZ
830void HELPER(NAME)(void *vd, void *v0, void *vs1, \
831 void *vs2, CPURISCVState *env, \
832 uint32_t desc) \
833{ \
8a085fb2 834 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
f1eed927 835 do_##NAME, ESZ); \
43740e3a
LZ
836}
837
f1eed927 838GEN_VEXT_VV(vadd_vv_b, 1)
839GEN_VEXT_VV(vadd_vv_h, 2)
840GEN_VEXT_VV(vadd_vv_w, 4)
841GEN_VEXT_VV(vadd_vv_d, 8)
842GEN_VEXT_VV(vsub_vv_b, 1)
843GEN_VEXT_VV(vsub_vv_h, 2)
844GEN_VEXT_VV(vsub_vv_w, 4)
845GEN_VEXT_VV(vsub_vv_d, 8)
43740e3a
LZ
846
847typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
848
849/*
850 * (T1)s1 gives the real operator type.
851 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
852 */
853#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
854static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
855{ \
856 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
857 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
858}
859
860RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
861RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
862RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
863RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
864RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
865RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
866RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
867RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
868RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
869RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
870RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
871RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
872
873static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
874 CPURISCVState *env, uint32_t desc,
5c19fc15 875 opivx2_fn fn, uint32_t esz)
43740e3a 876{
43740e3a
LZ
877 uint32_t vm = vext_vm(desc);
878 uint32_t vl = env->vl;
5c19fc15 879 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
880 uint32_t vta = vext_vta(desc);
43740e3a
LZ
881 uint32_t i;
882
f714361e 883 for (i = env->vstart; i < vl; i++) {
f9298de5 884 if (!vm && !vext_elem_mask(v0, i)) {
43740e3a
LZ
885 continue;
886 }
887 fn(vd, s1, vs2, i);
888 }
f714361e 889 env->vstart = 0;
5c19fc15 890 /* set tail elements to 1s */
891 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
43740e3a
LZ
892}
893
894/* generate the helpers for OPIVX */
5c19fc15 895#define GEN_VEXT_VX(NAME, ESZ) \
43740e3a
LZ
896void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
897 void *vs2, CPURISCVState *env, \
898 uint32_t desc) \
899{ \
8a085fb2 900 do_vext_vx(vd, v0, s1, vs2, env, desc, \
5c19fc15 901 do_##NAME, ESZ); \
3479a814
FC
902}
903
5c19fc15 904GEN_VEXT_VX(vadd_vx_b, 1)
905GEN_VEXT_VX(vadd_vx_h, 2)
906GEN_VEXT_VX(vadd_vx_w, 4)
907GEN_VEXT_VX(vadd_vx_d, 8)
908GEN_VEXT_VX(vsub_vx_b, 1)
909GEN_VEXT_VX(vsub_vx_h, 2)
910GEN_VEXT_VX(vsub_vx_w, 4)
911GEN_VEXT_VX(vsub_vx_d, 8)
912GEN_VEXT_VX(vrsub_vx_b, 1)
913GEN_VEXT_VX(vrsub_vx_h, 2)
914GEN_VEXT_VX(vrsub_vx_w, 4)
915GEN_VEXT_VX(vrsub_vx_d, 8)
43740e3a
LZ
916
917void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
918{
919 intptr_t oprsz = simd_oprsz(desc);
920 intptr_t i;
921
922 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
923 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
924 }
925}
926
927void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
928{
929 intptr_t oprsz = simd_oprsz(desc);
930 intptr_t i;
931
932 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
933 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
934 }
935}
936
937void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
938{
939 intptr_t oprsz = simd_oprsz(desc);
940 intptr_t i;
941
942 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
943 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
944 }
945}
946
947void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
948{
949 intptr_t oprsz = simd_oprsz(desc);
950 intptr_t i;
951
952 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
953 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
954 }
955}
8fcdf776
LZ
956
957/* Vector Widening Integer Add/Subtract */
958#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
959#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
960#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
961#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
962#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
963#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
964#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
965#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
966#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
967#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
968#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
969#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
970RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
971RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
972RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
973RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
974RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
975RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
976RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
977RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
978RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
979RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
980RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
981RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
982RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
983RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
984RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
985RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
986RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
987RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
988RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
989RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
990RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
991RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
992RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
993RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
f1eed927 994GEN_VEXT_VV(vwaddu_vv_b, 2)
995GEN_VEXT_VV(vwaddu_vv_h, 4)
996GEN_VEXT_VV(vwaddu_vv_w, 8)
997GEN_VEXT_VV(vwsubu_vv_b, 2)
998GEN_VEXT_VV(vwsubu_vv_h, 4)
999GEN_VEXT_VV(vwsubu_vv_w, 8)
1000GEN_VEXT_VV(vwadd_vv_b, 2)
1001GEN_VEXT_VV(vwadd_vv_h, 4)
1002GEN_VEXT_VV(vwadd_vv_w, 8)
1003GEN_VEXT_VV(vwsub_vv_b, 2)
1004GEN_VEXT_VV(vwsub_vv_h, 4)
1005GEN_VEXT_VV(vwsub_vv_w, 8)
1006GEN_VEXT_VV(vwaddu_wv_b, 2)
1007GEN_VEXT_VV(vwaddu_wv_h, 4)
1008GEN_VEXT_VV(vwaddu_wv_w, 8)
1009GEN_VEXT_VV(vwsubu_wv_b, 2)
1010GEN_VEXT_VV(vwsubu_wv_h, 4)
1011GEN_VEXT_VV(vwsubu_wv_w, 8)
1012GEN_VEXT_VV(vwadd_wv_b, 2)
1013GEN_VEXT_VV(vwadd_wv_h, 4)
1014GEN_VEXT_VV(vwadd_wv_w, 8)
1015GEN_VEXT_VV(vwsub_wv_b, 2)
1016GEN_VEXT_VV(vwsub_wv_h, 4)
1017GEN_VEXT_VV(vwsub_wv_w, 8)
8fcdf776
LZ
1018
1019RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1020RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1021RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1022RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1023RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1024RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1025RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1026RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1027RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1028RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1029RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1030RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1031RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1032RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1033RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1034RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1035RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1036RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1037RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1038RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1039RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1040RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1041RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1042RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
5c19fc15 1043GEN_VEXT_VX(vwaddu_vx_b, 2)
1044GEN_VEXT_VX(vwaddu_vx_h, 4)
1045GEN_VEXT_VX(vwaddu_vx_w, 8)
1046GEN_VEXT_VX(vwsubu_vx_b, 2)
1047GEN_VEXT_VX(vwsubu_vx_h, 4)
1048GEN_VEXT_VX(vwsubu_vx_w, 8)
1049GEN_VEXT_VX(vwadd_vx_b, 2)
1050GEN_VEXT_VX(vwadd_vx_h, 4)
1051GEN_VEXT_VX(vwadd_vx_w, 8)
1052GEN_VEXT_VX(vwsub_vx_b, 2)
1053GEN_VEXT_VX(vwsub_vx_h, 4)
1054GEN_VEXT_VX(vwsub_vx_w, 8)
1055GEN_VEXT_VX(vwaddu_wx_b, 2)
1056GEN_VEXT_VX(vwaddu_wx_h, 4)
1057GEN_VEXT_VX(vwaddu_wx_w, 8)
1058GEN_VEXT_VX(vwsubu_wx_b, 2)
1059GEN_VEXT_VX(vwsubu_wx_h, 4)
1060GEN_VEXT_VX(vwsubu_wx_w, 8)
1061GEN_VEXT_VX(vwadd_wx_b, 2)
1062GEN_VEXT_VX(vwadd_wx_h, 4)
1063GEN_VEXT_VX(vwadd_wx_w, 8)
1064GEN_VEXT_VX(vwsub_wx_b, 2)
1065GEN_VEXT_VX(vwsub_wx_h, 4)
1066GEN_VEXT_VX(vwsub_wx_w, 8)
3a6f8f68
LZ
1067
1068/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1069#define DO_VADC(N, M, C) (N + M + C)
1070#define DO_VSBC(N, M, C) (N - M - C)
1071
3479a814 1072#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1073void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1074 CPURISCVState *env, uint32_t desc) \
1075{ \
3a6f8f68 1076 uint32_t vl = env->vl; \
5c19fc15 1077 uint32_t esz = sizeof(ETYPE); \
1078 uint32_t total_elems = \
1079 vext_get_total_elems(env, desc, esz); \
1080 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1081 uint32_t i; \
1082 \
f714361e 1083 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1084 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1085 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1086 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1087 \
1088 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1089 } \
f714361e 1090 env->vstart = 0; \
5c19fc15 1091 /* set tail elements to 1s */ \
1092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1093}
1094
3479a814
FC
1095GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1096GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1097GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1098GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1099
3479a814
FC
1100GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1101GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1102GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1103GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
3a6f8f68 1104
3479a814 1105#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
3a6f8f68
LZ
1106void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1107 CPURISCVState *env, uint32_t desc) \
1108{ \
3a6f8f68 1109 uint32_t vl = env->vl; \
5c19fc15 1110 uint32_t esz = sizeof(ETYPE); \
1111 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1112 uint32_t vta = vext_vta(desc); \
3a6f8f68
LZ
1113 uint32_t i; \
1114 \
f714361e 1115 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1116 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1117 ETYPE carry = vext_elem_mask(v0, i); \
3a6f8f68
LZ
1118 \
1119 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1120 } \
f714361e 1121 env->vstart = 0; \
5c19fc15 1122 /* set tail elements to 1s */ \
1123 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3a6f8f68
LZ
1124}
1125
3479a814
FC
1126GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1127GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1128GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1129GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
3a6f8f68 1130
3479a814
FC
1131GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1132GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1133GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1134GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
3a6f8f68
LZ
1135
1136#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1137 (__typeof(N))(N + M) < N)
1138#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1139
1140#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1141void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1142 CPURISCVState *env, uint32_t desc) \
1143{ \
3a6f8f68 1144 uint32_t vl = env->vl; \
bb45485a 1145 uint32_t vm = vext_vm(desc); \
5c19fc15 1146 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1147 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1148 uint32_t i; \
1149 \
f714361e 1150 for (i = env->vstart; i < vl; i++) { \
3a6f8f68
LZ
1151 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1152 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1153 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1154 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
3a6f8f68 1155 } \
f714361e 1156 env->vstart = 0; \
5c19fc15 1157 /* mask destination register are always tail-agnostic */ \
1158 /* set tail elements to 1s */ \
1159 if (vta_all_1s) { \
1160 for (; i < total_elems; i++) { \
1161 vext_set_elem_mask(vd, i, 1); \
1162 } \
1163 } \
3a6f8f68
LZ
1164}
1165
1166GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1167GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1168GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1169GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1170
1171GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1172GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1173GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1174GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1175
1176#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1177void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1178 void *vs2, CPURISCVState *env, uint32_t desc) \
1179{ \
3a6f8f68 1180 uint32_t vl = env->vl; \
bb45485a 1181 uint32_t vm = vext_vm(desc); \
5c19fc15 1182 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1183 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
3a6f8f68
LZ
1184 uint32_t i; \
1185 \
f714361e 1186 for (i = env->vstart; i < vl; i++) { \
3a6f8f68 1187 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
bb45485a 1188 ETYPE carry = !vm && vext_elem_mask(v0, i); \
f9298de5 1189 vext_set_elem_mask(vd, i, \
3a6f8f68
LZ
1190 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1191 } \
f714361e 1192 env->vstart = 0; \
5c19fc15 1193 /* mask destination register are always tail-agnostic */ \
1194 /* set tail elements to 1s */ \
1195 if (vta_all_1s) { \
1196 for (; i < total_elems; i++) { \
1197 vext_set_elem_mask(vd, i, 1); \
1198 } \
1199 } \
3a6f8f68
LZ
1200}
1201
1202GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1203GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1204GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1205GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1206
1207GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1208GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1209GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1210GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
d3842924
LZ
1211
1212/* Vector Bitwise Logical Instructions */
1213RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1214RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1215RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1216RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1217RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1218RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1219RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1220RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1221RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1222RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1223RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1224RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
f1eed927 1225GEN_VEXT_VV(vand_vv_b, 1)
1226GEN_VEXT_VV(vand_vv_h, 2)
1227GEN_VEXT_VV(vand_vv_w, 4)
1228GEN_VEXT_VV(vand_vv_d, 8)
1229GEN_VEXT_VV(vor_vv_b, 1)
1230GEN_VEXT_VV(vor_vv_h, 2)
1231GEN_VEXT_VV(vor_vv_w, 4)
1232GEN_VEXT_VV(vor_vv_d, 8)
1233GEN_VEXT_VV(vxor_vv_b, 1)
1234GEN_VEXT_VV(vxor_vv_h, 2)
1235GEN_VEXT_VV(vxor_vv_w, 4)
1236GEN_VEXT_VV(vxor_vv_d, 8)
d3842924
LZ
1237
1238RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1239RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1240RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1241RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1242RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1243RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1244RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1245RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1246RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1247RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1248RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1249RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
5c19fc15 1250GEN_VEXT_VX(vand_vx_b, 1)
1251GEN_VEXT_VX(vand_vx_h, 2)
1252GEN_VEXT_VX(vand_vx_w, 4)
1253GEN_VEXT_VX(vand_vx_d, 8)
1254GEN_VEXT_VX(vor_vx_b, 1)
1255GEN_VEXT_VX(vor_vx_h, 2)
1256GEN_VEXT_VX(vor_vx_w, 4)
1257GEN_VEXT_VX(vor_vx_d, 8)
1258GEN_VEXT_VX(vxor_vx_b, 1)
1259GEN_VEXT_VX(vxor_vx_h, 2)
1260GEN_VEXT_VX(vxor_vx_w, 4)
1261GEN_VEXT_VX(vxor_vx_d, 8)
3277d955
LZ
1262
1263/* Vector Single-Width Bit Shift Instructions */
1264#define DO_SLL(N, M) (N << (M))
1265#define DO_SRL(N, M) (N >> (M))
1266
1267/* generate the helpers for shift instructions with two vector operators */
3479a814 1268#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
3277d955
LZ
1269void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1270 void *vs2, CPURISCVState *env, uint32_t desc) \
1271{ \
3277d955
LZ
1272 uint32_t vm = vext_vm(desc); \
1273 uint32_t vl = env->vl; \
7b1bff41 1274 uint32_t esz = sizeof(TS1); \
1275 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1276 uint32_t vta = vext_vta(desc); \
3277d955
LZ
1277 uint32_t i; \
1278 \
f714361e 1279 for (i = env->vstart; i < vl; i++) { \
f9298de5 1280 if (!vm && !vext_elem_mask(v0, i)) { \
3277d955
LZ
1281 continue; \
1282 } \
1283 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1284 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1285 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1286 } \
f714361e 1287 env->vstart = 0; \
7b1bff41 1288 /* set tail elements to 1s */ \
1289 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
3277d955
LZ
1290}
1291
3479a814
FC
1292GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1293GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1294GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1295GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
3277d955 1296
3479a814
FC
1297GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1298GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1299GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1300GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
3277d955 1301
3479a814
FC
1302GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1303GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1304GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1305GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
3277d955
LZ
1306
1307/* generate the helpers for shift instructions with one vector and one scalar */
3479a814
FC
1308#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1309void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1310 void *vs2, CPURISCVState *env, uint32_t desc) \
1311{ \
1312 uint32_t vm = vext_vm(desc); \
1313 uint32_t vl = env->vl; \
7b1bff41 1314 uint32_t esz = sizeof(TD); \
1315 uint32_t total_elems = \
1316 vext_get_total_elems(env, desc, esz); \
1317 uint32_t vta = vext_vta(desc); \
3479a814
FC
1318 uint32_t i; \
1319 \
f714361e 1320 for (i = env->vstart; i < vl; i++) { \
3479a814
FC
1321 if (!vm && !vext_elem_mask(v0, i)) { \
1322 continue; \
1323 } \
1324 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1325 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1326 } \
f714361e 1327 env->vstart = 0; \
7b1bff41 1328 /* set tail elements to 1s */ \
1329 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
3479a814
FC
1330}
1331
1332GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1333GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1334GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1335GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1336
1337GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1338GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1339GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1340GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1341
1342GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1343GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1344GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1345GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
7689b028
LZ
1346
1347/* Vector Narrowing Integer Right Shift Instructions */
7daa5852
FC
1348GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1349GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1350GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1351GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1352GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1353GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1354GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1355GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1356GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1357GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1358GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1359GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1366fc79
LZ
1360
1361/* Vector Integer Comparison Instructions */
1362#define DO_MSEQ(N, M) (N == M)
1363#define DO_MSNE(N, M) (N != M)
1364#define DO_MSLT(N, M) (N < M)
1365#define DO_MSLE(N, M) (N <= M)
1366#define DO_MSGT(N, M) (N > M)
1367
1368#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1369void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1370 CPURISCVState *env, uint32_t desc) \
1371{ \
1366fc79
LZ
1372 uint32_t vm = vext_vm(desc); \
1373 uint32_t vl = env->vl; \
38581e5c 1374 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1375 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1366fc79
LZ
1376 uint32_t i; \
1377 \
f714361e 1378 for (i = env->vstart; i < vl; i++) { \
1366fc79
LZ
1379 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1380 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1381 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1382 continue; \
1383 } \
f9298de5 1384 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1366fc79 1385 } \
f714361e 1386 env->vstart = 0; \
38581e5c 1387 /* mask destination register are always tail-agnostic */ \
1388 /* set tail elements to 1s */ \
1389 if (vta_all_1s) { \
1390 for (; i < total_elems; i++) { \
1391 vext_set_elem_mask(vd, i, 1); \
1392 } \
1393 } \
1366fc79
LZ
1394}
1395
1396GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1397GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1398GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1399GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1400
1401GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1402GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1403GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1404GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1405
1406GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1407GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1408GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1409GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1410
1411GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1412GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1413GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1414GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1415
1416GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1417GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1418GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1419GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1420
1421GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1422GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1423GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1424GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1425
1426#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1427void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1428 CPURISCVState *env, uint32_t desc) \
1429{ \
1366fc79
LZ
1430 uint32_t vm = vext_vm(desc); \
1431 uint32_t vl = env->vl; \
38581e5c 1432 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1433 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1366fc79
LZ
1434 uint32_t i; \
1435 \
f714361e 1436 for (i = env->vstart; i < vl; i++) { \
1366fc79 1437 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 1438 if (!vm && !vext_elem_mask(v0, i)) { \
1366fc79
LZ
1439 continue; \
1440 } \
f9298de5 1441 vext_set_elem_mask(vd, i, \
1366fc79
LZ
1442 DO_OP(s2, (ETYPE)(target_long)s1)); \
1443 } \
f714361e 1444 env->vstart = 0; \
38581e5c 1445 /* mask destination register are always tail-agnostic */ \
1446 /* set tail elements to 1s */ \
1447 if (vta_all_1s) { \
1448 for (; i < total_elems; i++) { \
1449 vext_set_elem_mask(vd, i, 1); \
1450 } \
1451 } \
1366fc79
LZ
1452}
1453
1454GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1455GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1456GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1457GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1458
1459GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1460GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1461GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1462GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1463
1464GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1465GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1466GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1467GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1468
1469GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1470GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1471GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1472GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1473
1474GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1475GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1476GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1477GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1478
1479GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1480GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1481GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1482GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1483
1484GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1485GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1486GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1487GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1488
1489GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1490GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1491GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1492GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
558fa779
LZ
1493
1494/* Vector Integer Min/Max Instructions */
1495RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1496RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1497RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1498RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1499RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1500RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1501RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1502RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1503RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1504RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1505RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1506RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1507RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1508RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1509RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1510RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
f1eed927 1511GEN_VEXT_VV(vminu_vv_b, 1)
1512GEN_VEXT_VV(vminu_vv_h, 2)
1513GEN_VEXT_VV(vminu_vv_w, 4)
1514GEN_VEXT_VV(vminu_vv_d, 8)
1515GEN_VEXT_VV(vmin_vv_b, 1)
1516GEN_VEXT_VV(vmin_vv_h, 2)
1517GEN_VEXT_VV(vmin_vv_w, 4)
1518GEN_VEXT_VV(vmin_vv_d, 8)
1519GEN_VEXT_VV(vmaxu_vv_b, 1)
1520GEN_VEXT_VV(vmaxu_vv_h, 2)
1521GEN_VEXT_VV(vmaxu_vv_w, 4)
1522GEN_VEXT_VV(vmaxu_vv_d, 8)
1523GEN_VEXT_VV(vmax_vv_b, 1)
1524GEN_VEXT_VV(vmax_vv_h, 2)
1525GEN_VEXT_VV(vmax_vv_w, 4)
1526GEN_VEXT_VV(vmax_vv_d, 8)
558fa779
LZ
1527
1528RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1529RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1530RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1531RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1532RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1533RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1534RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1535RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1536RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1537RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1538RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1539RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1540RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1541RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1542RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1543RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
5c19fc15 1544GEN_VEXT_VX(vminu_vx_b, 1)
1545GEN_VEXT_VX(vminu_vx_h, 2)
1546GEN_VEXT_VX(vminu_vx_w, 4)
1547GEN_VEXT_VX(vminu_vx_d, 8)
1548GEN_VEXT_VX(vmin_vx_b, 1)
1549GEN_VEXT_VX(vmin_vx_h, 2)
1550GEN_VEXT_VX(vmin_vx_w, 4)
1551GEN_VEXT_VX(vmin_vx_d, 8)
1552GEN_VEXT_VX(vmaxu_vx_b, 1)
1553GEN_VEXT_VX(vmaxu_vx_h, 2)
1554GEN_VEXT_VX(vmaxu_vx_w, 4)
1555GEN_VEXT_VX(vmaxu_vx_d, 8)
1556GEN_VEXT_VX(vmax_vx_b, 1)
1557GEN_VEXT_VX(vmax_vx_h, 2)
1558GEN_VEXT_VX(vmax_vx_w, 4)
1559GEN_VEXT_VX(vmax_vx_d, 8)
958b85f3
LZ
1560
1561/* Vector Single-Width Integer Multiply Instructions */
1562#define DO_MUL(N, M) (N * M)
1563RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1564RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1565RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1566RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
f1eed927 1567GEN_VEXT_VV(vmul_vv_b, 1)
1568GEN_VEXT_VV(vmul_vv_h, 2)
1569GEN_VEXT_VV(vmul_vv_w, 4)
1570GEN_VEXT_VV(vmul_vv_d, 8)
958b85f3
LZ
1571
1572static int8_t do_mulh_b(int8_t s2, int8_t s1)
1573{
1574 return (int16_t)s2 * (int16_t)s1 >> 8;
1575}
1576
1577static int16_t do_mulh_h(int16_t s2, int16_t s1)
1578{
1579 return (int32_t)s2 * (int32_t)s1 >> 16;
1580}
1581
1582static int32_t do_mulh_w(int32_t s2, int32_t s1)
1583{
1584 return (int64_t)s2 * (int64_t)s1 >> 32;
1585}
1586
1587static int64_t do_mulh_d(int64_t s2, int64_t s1)
1588{
1589 uint64_t hi_64, lo_64;
1590
1591 muls64(&lo_64, &hi_64, s1, s2);
1592 return hi_64;
1593}
1594
1595static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1596{
1597 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1598}
1599
1600static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1601{
1602 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1603}
1604
1605static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1606{
1607 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1608}
1609
1610static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1611{
1612 uint64_t hi_64, lo_64;
1613
1614 mulu64(&lo_64, &hi_64, s2, s1);
1615 return hi_64;
1616}
1617
1618static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1619{
1620 return (int16_t)s2 * (uint16_t)s1 >> 8;
1621}
1622
1623static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1624{
1625 return (int32_t)s2 * (uint32_t)s1 >> 16;
1626}
1627
1628static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1629{
1630 return (int64_t)s2 * (uint64_t)s1 >> 32;
1631}
1632
1633/*
1634 * Let A = signed operand,
1635 * B = unsigned operand
1636 * P = mulu64(A, B), unsigned product
1637 *
1638 * LET X = 2 ** 64 - A, 2's complement of A
1639 * SP = signed product
1640 * THEN
1641 * IF A < 0
1642 * SP = -X * B
1643 * = -(2 ** 64 - A) * B
1644 * = A * B - 2 ** 64 * B
1645 * = P - 2 ** 64 * B
1646 * ELSE
1647 * SP = P
1648 * THEN
1649 * HI_P -= (A < 0 ? B : 0)
1650 */
1651
1652static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1653{
1654 uint64_t hi_64, lo_64;
1655
1656 mulu64(&lo_64, &hi_64, s2, s1);
1657
1658 hi_64 -= s2 < 0 ? s1 : 0;
1659 return hi_64;
1660}
1661
1662RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1663RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1664RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1665RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1666RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1667RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1668RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1669RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1670RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1671RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1672RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1673RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
f1eed927 1674GEN_VEXT_VV(vmulh_vv_b, 1)
1675GEN_VEXT_VV(vmulh_vv_h, 2)
1676GEN_VEXT_VV(vmulh_vv_w, 4)
1677GEN_VEXT_VV(vmulh_vv_d, 8)
1678GEN_VEXT_VV(vmulhu_vv_b, 1)
1679GEN_VEXT_VV(vmulhu_vv_h, 2)
1680GEN_VEXT_VV(vmulhu_vv_w, 4)
1681GEN_VEXT_VV(vmulhu_vv_d, 8)
1682GEN_VEXT_VV(vmulhsu_vv_b, 1)
1683GEN_VEXT_VV(vmulhsu_vv_h, 2)
1684GEN_VEXT_VV(vmulhsu_vv_w, 4)
1685GEN_VEXT_VV(vmulhsu_vv_d, 8)
958b85f3
LZ
1686
1687RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1688RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1689RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1690RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1691RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1692RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1693RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1694RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1695RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1696RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1697RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1698RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1699RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1700RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1701RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1702RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
5c19fc15 1703GEN_VEXT_VX(vmul_vx_b, 1)
1704GEN_VEXT_VX(vmul_vx_h, 2)
1705GEN_VEXT_VX(vmul_vx_w, 4)
1706GEN_VEXT_VX(vmul_vx_d, 8)
1707GEN_VEXT_VX(vmulh_vx_b, 1)
1708GEN_VEXT_VX(vmulh_vx_h, 2)
1709GEN_VEXT_VX(vmulh_vx_w, 4)
1710GEN_VEXT_VX(vmulh_vx_d, 8)
1711GEN_VEXT_VX(vmulhu_vx_b, 1)
1712GEN_VEXT_VX(vmulhu_vx_h, 2)
1713GEN_VEXT_VX(vmulhu_vx_w, 4)
1714GEN_VEXT_VX(vmulhu_vx_d, 8)
1715GEN_VEXT_VX(vmulhsu_vx_b, 1)
1716GEN_VEXT_VX(vmulhsu_vx_h, 2)
1717GEN_VEXT_VX(vmulhsu_vx_w, 4)
1718GEN_VEXT_VX(vmulhsu_vx_d, 8)
85e6658c
LZ
1719
1720/* Vector Integer Divide Instructions */
1721#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1722#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1723#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1724 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1725#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1726 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1727
1728RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1729RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1730RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1731RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1732RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1733RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1734RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1735RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1736RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1737RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1738RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1739RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1740RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1741RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1742RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1743RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
f1eed927 1744GEN_VEXT_VV(vdivu_vv_b, 1)
1745GEN_VEXT_VV(vdivu_vv_h, 2)
1746GEN_VEXT_VV(vdivu_vv_w, 4)
1747GEN_VEXT_VV(vdivu_vv_d, 8)
1748GEN_VEXT_VV(vdiv_vv_b, 1)
1749GEN_VEXT_VV(vdiv_vv_h, 2)
1750GEN_VEXT_VV(vdiv_vv_w, 4)
1751GEN_VEXT_VV(vdiv_vv_d, 8)
1752GEN_VEXT_VV(vremu_vv_b, 1)
1753GEN_VEXT_VV(vremu_vv_h, 2)
1754GEN_VEXT_VV(vremu_vv_w, 4)
1755GEN_VEXT_VV(vremu_vv_d, 8)
1756GEN_VEXT_VV(vrem_vv_b, 1)
1757GEN_VEXT_VV(vrem_vv_h, 2)
1758GEN_VEXT_VV(vrem_vv_w, 4)
1759GEN_VEXT_VV(vrem_vv_d, 8)
85e6658c
LZ
1760
1761RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1762RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1763RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1764RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1765RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1766RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1767RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1768RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1769RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1770RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1771RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1772RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1773RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1774RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1775RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1776RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
5c19fc15 1777GEN_VEXT_VX(vdivu_vx_b, 1)
1778GEN_VEXT_VX(vdivu_vx_h, 2)
1779GEN_VEXT_VX(vdivu_vx_w, 4)
1780GEN_VEXT_VX(vdivu_vx_d, 8)
1781GEN_VEXT_VX(vdiv_vx_b, 1)
1782GEN_VEXT_VX(vdiv_vx_h, 2)
1783GEN_VEXT_VX(vdiv_vx_w, 4)
1784GEN_VEXT_VX(vdiv_vx_d, 8)
1785GEN_VEXT_VX(vremu_vx_b, 1)
1786GEN_VEXT_VX(vremu_vx_h, 2)
1787GEN_VEXT_VX(vremu_vx_w, 4)
1788GEN_VEXT_VX(vremu_vx_d, 8)
1789GEN_VEXT_VX(vrem_vx_b, 1)
1790GEN_VEXT_VX(vrem_vx_h, 2)
1791GEN_VEXT_VX(vrem_vx_w, 4)
1792GEN_VEXT_VX(vrem_vx_d, 8)
97b1cba3
LZ
1793
1794/* Vector Widening Integer Multiply Instructions */
1795RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1796RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1797RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1798RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1799RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1800RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1801RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1802RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1803RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
f1eed927 1804GEN_VEXT_VV(vwmul_vv_b, 2)
1805GEN_VEXT_VV(vwmul_vv_h, 4)
1806GEN_VEXT_VV(vwmul_vv_w, 8)
1807GEN_VEXT_VV(vwmulu_vv_b, 2)
1808GEN_VEXT_VV(vwmulu_vv_h, 4)
1809GEN_VEXT_VV(vwmulu_vv_w, 8)
1810GEN_VEXT_VV(vwmulsu_vv_b, 2)
1811GEN_VEXT_VV(vwmulsu_vv_h, 4)
1812GEN_VEXT_VV(vwmulsu_vv_w, 8)
97b1cba3
LZ
1813
1814RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1815RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1816RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1817RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1818RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1819RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1820RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1821RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1822RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
5c19fc15 1823GEN_VEXT_VX(vwmul_vx_b, 2)
1824GEN_VEXT_VX(vwmul_vx_h, 4)
1825GEN_VEXT_VX(vwmul_vx_w, 8)
1826GEN_VEXT_VX(vwmulu_vx_b, 2)
1827GEN_VEXT_VX(vwmulu_vx_h, 4)
1828GEN_VEXT_VX(vwmulu_vx_w, 8)
1829GEN_VEXT_VX(vwmulsu_vx_b, 2)
1830GEN_VEXT_VX(vwmulsu_vx_h, 4)
1831GEN_VEXT_VX(vwmulsu_vx_w, 8)
54df813a
LZ
1832
1833/* Vector Single-Width Integer Multiply-Add Instructions */
1834#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1835static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1836{ \
1837 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1838 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1839 TD d = *((TD *)vd + HD(i)); \
1840 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1841}
1842
1843#define DO_MACC(N, M, D) (M * N + D)
1844#define DO_NMSAC(N, M, D) (-(M * N) + D)
1845#define DO_MADD(N, M, D) (M * D + N)
1846#define DO_NMSUB(N, M, D) (-(M * D) + N)
1847RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1848RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1849RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1850RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1851RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1852RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1853RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1854RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1855RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1856RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1857RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1858RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1859RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1860RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1861RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1862RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
f1eed927 1863GEN_VEXT_VV(vmacc_vv_b, 1)
1864GEN_VEXT_VV(vmacc_vv_h, 2)
1865GEN_VEXT_VV(vmacc_vv_w, 4)
1866GEN_VEXT_VV(vmacc_vv_d, 8)
1867GEN_VEXT_VV(vnmsac_vv_b, 1)
1868GEN_VEXT_VV(vnmsac_vv_h, 2)
1869GEN_VEXT_VV(vnmsac_vv_w, 4)
1870GEN_VEXT_VV(vnmsac_vv_d, 8)
1871GEN_VEXT_VV(vmadd_vv_b, 1)
1872GEN_VEXT_VV(vmadd_vv_h, 2)
1873GEN_VEXT_VV(vmadd_vv_w, 4)
1874GEN_VEXT_VV(vmadd_vv_d, 8)
1875GEN_VEXT_VV(vnmsub_vv_b, 1)
1876GEN_VEXT_VV(vnmsub_vv_h, 2)
1877GEN_VEXT_VV(vnmsub_vv_w, 4)
1878GEN_VEXT_VV(vnmsub_vv_d, 8)
54df813a
LZ
1879
1880#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1881static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1882{ \
1883 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1884 TD d = *((TD *)vd + HD(i)); \
1885 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1886}
1887
1888RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1889RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1890RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1891RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1892RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1893RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1894RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1895RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1896RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1897RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1898RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1899RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1900RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1901RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1902RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1903RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
5c19fc15 1904GEN_VEXT_VX(vmacc_vx_b, 1)
1905GEN_VEXT_VX(vmacc_vx_h, 2)
1906GEN_VEXT_VX(vmacc_vx_w, 4)
1907GEN_VEXT_VX(vmacc_vx_d, 8)
1908GEN_VEXT_VX(vnmsac_vx_b, 1)
1909GEN_VEXT_VX(vnmsac_vx_h, 2)
1910GEN_VEXT_VX(vnmsac_vx_w, 4)
1911GEN_VEXT_VX(vnmsac_vx_d, 8)
1912GEN_VEXT_VX(vmadd_vx_b, 1)
1913GEN_VEXT_VX(vmadd_vx_h, 2)
1914GEN_VEXT_VX(vmadd_vx_w, 4)
1915GEN_VEXT_VX(vmadd_vx_d, 8)
1916GEN_VEXT_VX(vnmsub_vx_b, 1)
1917GEN_VEXT_VX(vnmsub_vx_h, 2)
1918GEN_VEXT_VX(vnmsub_vx_w, 4)
1919GEN_VEXT_VX(vnmsub_vx_d, 8)
2b587b33
LZ
1920
1921/* Vector Widening Integer Multiply-Add Instructions */
1922RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1923RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1924RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1925RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1926RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1927RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1928RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1929RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1930RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
f1eed927 1931GEN_VEXT_VV(vwmaccu_vv_b, 2)
1932GEN_VEXT_VV(vwmaccu_vv_h, 4)
1933GEN_VEXT_VV(vwmaccu_vv_w, 8)
1934GEN_VEXT_VV(vwmacc_vv_b, 2)
1935GEN_VEXT_VV(vwmacc_vv_h, 4)
1936GEN_VEXT_VV(vwmacc_vv_w, 8)
1937GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1938GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1939GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2b587b33
LZ
1940
1941RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1942RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1943RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1944RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1945RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1946RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1947RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1948RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1949RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1950RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1951RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1952RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
5c19fc15 1953GEN_VEXT_VX(vwmaccu_vx_b, 2)
1954GEN_VEXT_VX(vwmaccu_vx_h, 4)
1955GEN_VEXT_VX(vwmaccu_vx_w, 8)
1956GEN_VEXT_VX(vwmacc_vx_b, 2)
1957GEN_VEXT_VX(vwmacc_vx_h, 4)
1958GEN_VEXT_VX(vwmacc_vx_w, 8)
1959GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1960GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1961GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1962GEN_VEXT_VX(vwmaccus_vx_b, 2)
1963GEN_VEXT_VX(vwmaccus_vx_h, 4)
1964GEN_VEXT_VX(vwmaccus_vx_w, 8)
f020a7a1
LZ
1965
1966/* Vector Integer Merge and Move Instructions */
3479a814 1967#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
f020a7a1
LZ
1968void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1969 uint32_t desc) \
1970{ \
1971 uint32_t vl = env->vl; \
f020a7a1
LZ
1972 uint32_t i; \
1973 \
f714361e 1974 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1975 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1976 *((ETYPE *)vd + H(i)) = s1; \
1977 } \
f714361e 1978 env->vstart = 0; \
f020a7a1
LZ
1979}
1980
3479a814
FC
1981GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1982GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1983GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1984GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
f020a7a1 1985
3479a814 1986#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
f020a7a1
LZ
1987void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1988 uint32_t desc) \
1989{ \
1990 uint32_t vl = env->vl; \
f020a7a1
LZ
1991 uint32_t i; \
1992 \
f714361e 1993 for (i = env->vstart; i < vl; i++) { \
f020a7a1
LZ
1994 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1995 } \
f714361e 1996 env->vstart = 0; \
f020a7a1
LZ
1997}
1998
3479a814
FC
1999GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2000GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2001GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2002GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
f020a7a1 2003
3479a814 2004#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
f020a7a1
LZ
2005void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2006 CPURISCVState *env, uint32_t desc) \
2007{ \
f020a7a1 2008 uint32_t vl = env->vl; \
f020a7a1
LZ
2009 uint32_t i; \
2010 \
f714361e 2011 for (i = env->vstart; i < vl; i++) { \
f9298de5 2012 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
f020a7a1
LZ
2013 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2014 } \
f714361e 2015 env->vstart = 0; \
f020a7a1
LZ
2016}
2017
3479a814
FC
2018GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2019GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2020GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2021GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
f020a7a1 2022
3479a814 2023#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
f020a7a1
LZ
2024void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2025 void *vs2, CPURISCVState *env, uint32_t desc) \
2026{ \
f020a7a1 2027 uint32_t vl = env->vl; \
f020a7a1
LZ
2028 uint32_t i; \
2029 \
f714361e 2030 for (i = env->vstart; i < vl; i++) { \
f020a7a1 2031 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 2032 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
f020a7a1
LZ
2033 (ETYPE)(target_long)s1); \
2034 *((ETYPE *)vd + H(i)) = d; \
2035 } \
f714361e 2036 env->vstart = 0; \
f020a7a1
LZ
2037}
2038
3479a814
FC
2039GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2040GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2041GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2042GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
eb2650e3
LZ
2043
2044/*
2045 *** Vector Fixed-Point Arithmetic Instructions
2046 */
2047
2048/* Vector Single-Width Saturating Add and Subtract */
2049
2050/*
2051 * As fixed point instructions probably have round mode and saturation,
2052 * define common macros for fixed point here.
2053 */
2054typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2055 CPURISCVState *env, int vxrm);
2056
2057#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2058static inline void \
2059do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2060 CPURISCVState *env, int vxrm) \
2061{ \
2062 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2063 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2064 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2065}
2066
2067static inline void
2068vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2069 CPURISCVState *env,
f9298de5 2070 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
2071 opivv2_rm_fn *fn)
2072{
f714361e 2073 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2074 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
2075 continue;
2076 }
2077 fn(vd, vs1, vs2, i, env, vxrm);
2078 }
f714361e 2079 env->vstart = 0;
eb2650e3
LZ
2080}
2081
2082static inline void
2083vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2084 CPURISCVState *env,
8a085fb2 2085 uint32_t desc,
3479a814 2086 opivv2_rm_fn *fn)
eb2650e3 2087{
eb2650e3
LZ
2088 uint32_t vm = vext_vm(desc);
2089 uint32_t vl = env->vl;
2090
2091 switch (env->vxrm) {
2092 case 0: /* rnu */
2093 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2094 env, vl, vm, 0, fn);
eb2650e3
LZ
2095 break;
2096 case 1: /* rne */
2097 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2098 env, vl, vm, 1, fn);
eb2650e3
LZ
2099 break;
2100 case 2: /* rdn */
2101 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2102 env, vl, vm, 2, fn);
eb2650e3
LZ
2103 break;
2104 default: /* rod */
2105 vext_vv_rm_1(vd, v0, vs1, vs2,
f9298de5 2106 env, vl, vm, 3, fn);
eb2650e3
LZ
2107 break;
2108 }
eb2650e3
LZ
2109}
2110
2111/* generate helpers for fixed point instructions with OPIVV format */
8a085fb2 2112#define GEN_VEXT_VV_RM(NAME) \
eb2650e3
LZ
2113void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2114 CPURISCVState *env, uint32_t desc) \
2115{ \
8a085fb2 2116 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
3479a814 2117 do_##NAME); \
eb2650e3
LZ
2118}
2119
2120static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2121{
2122 uint8_t res = a + b;
2123 if (res < a) {
2124 res = UINT8_MAX;
2125 env->vxsat = 0x1;
2126 }
2127 return res;
2128}
2129
2130static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2131 uint16_t b)
2132{
2133 uint16_t res = a + b;
2134 if (res < a) {
2135 res = UINT16_MAX;
2136 env->vxsat = 0x1;
2137 }
2138 return res;
2139}
2140
2141static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2142 uint32_t b)
2143{
2144 uint32_t res = a + b;
2145 if (res < a) {
2146 res = UINT32_MAX;
2147 env->vxsat = 0x1;
2148 }
2149 return res;
2150}
2151
2152static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2153 uint64_t b)
2154{
2155 uint64_t res = a + b;
2156 if (res < a) {
2157 res = UINT64_MAX;
2158 env->vxsat = 0x1;
2159 }
2160 return res;
2161}
2162
2163RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2164RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2165RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2166RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
8a085fb2 2167GEN_VEXT_VV_RM(vsaddu_vv_b)
2168GEN_VEXT_VV_RM(vsaddu_vv_h)
2169GEN_VEXT_VV_RM(vsaddu_vv_w)
2170GEN_VEXT_VV_RM(vsaddu_vv_d)
eb2650e3
LZ
2171
2172typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2173 CPURISCVState *env, int vxrm);
2174
2175#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2176static inline void \
2177do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2178 CPURISCVState *env, int vxrm) \
2179{ \
2180 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2181 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2182}
2183
2184static inline void
2185vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2186 CPURISCVState *env,
f9298de5 2187 uint32_t vl, uint32_t vm, int vxrm,
eb2650e3
LZ
2188 opivx2_rm_fn *fn)
2189{
f714361e 2190 for (uint32_t i = env->vstart; i < vl; i++) {
f9298de5 2191 if (!vm && !vext_elem_mask(v0, i)) {
eb2650e3
LZ
2192 continue;
2193 }
2194 fn(vd, s1, vs2, i, env, vxrm);
2195 }
f714361e 2196 env->vstart = 0;
eb2650e3
LZ
2197}
2198
2199static inline void
2200vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2201 CPURISCVState *env,
8a085fb2 2202 uint32_t desc,
3479a814 2203 opivx2_rm_fn *fn)
eb2650e3 2204{
eb2650e3
LZ
2205 uint32_t vm = vext_vm(desc);
2206 uint32_t vl = env->vl;
2207
2208 switch (env->vxrm) {
2209 case 0: /* rnu */
2210 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2211 env, vl, vm, 0, fn);
eb2650e3
LZ
2212 break;
2213 case 1: /* rne */
2214 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2215 env, vl, vm, 1, fn);
eb2650e3
LZ
2216 break;
2217 case 2: /* rdn */
2218 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2219 env, vl, vm, 2, fn);
eb2650e3
LZ
2220 break;
2221 default: /* rod */
2222 vext_vx_rm_1(vd, v0, s1, vs2,
f9298de5 2223 env, vl, vm, 3, fn);
eb2650e3
LZ
2224 break;
2225 }
eb2650e3
LZ
2226}
2227
2228/* generate helpers for fixed point instructions with OPIVX format */
8a085fb2 2229#define GEN_VEXT_VX_RM(NAME) \
eb2650e3
LZ
2230void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2231 void *vs2, CPURISCVState *env, uint32_t desc) \
2232{ \
8a085fb2 2233 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
3479a814 2234 do_##NAME); \
eb2650e3
LZ
2235}
2236
2237RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2238RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2239RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2240RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
8a085fb2 2241GEN_VEXT_VX_RM(vsaddu_vx_b)
2242GEN_VEXT_VX_RM(vsaddu_vx_h)
2243GEN_VEXT_VX_RM(vsaddu_vx_w)
2244GEN_VEXT_VX_RM(vsaddu_vx_d)
eb2650e3
LZ
2245
2246static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2247{
2248 int8_t res = a + b;
2249 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2250 res = a > 0 ? INT8_MAX : INT8_MIN;
2251 env->vxsat = 0x1;
2252 }
2253 return res;
2254}
2255
2256static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2257{
2258 int16_t res = a + b;
2259 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2260 res = a > 0 ? INT16_MAX : INT16_MIN;
2261 env->vxsat = 0x1;
2262 }
2263 return res;
2264}
2265
2266static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2267{
2268 int32_t res = a + b;
2269 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2270 res = a > 0 ? INT32_MAX : INT32_MIN;
2271 env->vxsat = 0x1;
2272 }
2273 return res;
2274}
2275
2276static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2277{
2278 int64_t res = a + b;
2279 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2280 res = a > 0 ? INT64_MAX : INT64_MIN;
2281 env->vxsat = 0x1;
2282 }
2283 return res;
2284}
2285
2286RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2287RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2288RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2289RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
8a085fb2 2290GEN_VEXT_VV_RM(vsadd_vv_b)
2291GEN_VEXT_VV_RM(vsadd_vv_h)
2292GEN_VEXT_VV_RM(vsadd_vv_w)
2293GEN_VEXT_VV_RM(vsadd_vv_d)
eb2650e3
LZ
2294
2295RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2296RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2297RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2298RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
8a085fb2 2299GEN_VEXT_VX_RM(vsadd_vx_b)
2300GEN_VEXT_VX_RM(vsadd_vx_h)
2301GEN_VEXT_VX_RM(vsadd_vx_w)
2302GEN_VEXT_VX_RM(vsadd_vx_d)
eb2650e3
LZ
2303
2304static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2305{
2306 uint8_t res = a - b;
2307 if (res > a) {
2308 res = 0;
2309 env->vxsat = 0x1;
2310 }
2311 return res;
2312}
2313
2314static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2315 uint16_t b)
2316{
2317 uint16_t res = a - b;
2318 if (res > a) {
2319 res = 0;
2320 env->vxsat = 0x1;
2321 }
2322 return res;
2323}
2324
2325static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2326 uint32_t b)
2327{
2328 uint32_t res = a - b;
2329 if (res > a) {
2330 res = 0;
2331 env->vxsat = 0x1;
2332 }
2333 return res;
2334}
2335
2336static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2337 uint64_t b)
2338{
2339 uint64_t res = a - b;
2340 if (res > a) {
2341 res = 0;
2342 env->vxsat = 0x1;
2343 }
2344 return res;
2345}
2346
2347RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2348RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2349RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2350RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
8a085fb2 2351GEN_VEXT_VV_RM(vssubu_vv_b)
2352GEN_VEXT_VV_RM(vssubu_vv_h)
2353GEN_VEXT_VV_RM(vssubu_vv_w)
2354GEN_VEXT_VV_RM(vssubu_vv_d)
eb2650e3
LZ
2355
2356RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2357RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2358RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2359RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
8a085fb2 2360GEN_VEXT_VX_RM(vssubu_vx_b)
2361GEN_VEXT_VX_RM(vssubu_vx_h)
2362GEN_VEXT_VX_RM(vssubu_vx_w)
2363GEN_VEXT_VX_RM(vssubu_vx_d)
eb2650e3
LZ
2364
2365static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2366{
2367 int8_t res = a - b;
2368 if ((res ^ a) & (a ^ b) & INT8_MIN) {
65606f21 2369 res = a >= 0 ? INT8_MAX : INT8_MIN;
eb2650e3
LZ
2370 env->vxsat = 0x1;
2371 }
2372 return res;
2373}
2374
2375static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2376{
2377 int16_t res = a - b;
2378 if ((res ^ a) & (a ^ b) & INT16_MIN) {
65606f21 2379 res = a >= 0 ? INT16_MAX : INT16_MIN;
eb2650e3
LZ
2380 env->vxsat = 0x1;
2381 }
2382 return res;
2383}
2384
2385static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2386{
2387 int32_t res = a - b;
2388 if ((res ^ a) & (a ^ b) & INT32_MIN) {
65606f21 2389 res = a >= 0 ? INT32_MAX : INT32_MIN;
eb2650e3
LZ
2390 env->vxsat = 0x1;
2391 }
2392 return res;
2393}
2394
2395static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2396{
2397 int64_t res = a - b;
2398 if ((res ^ a) & (a ^ b) & INT64_MIN) {
65606f21 2399 res = a >= 0 ? INT64_MAX : INT64_MIN;
eb2650e3
LZ
2400 env->vxsat = 0x1;
2401 }
2402 return res;
2403}
2404
2405RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2406RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2407RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2408RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
8a085fb2 2409GEN_VEXT_VV_RM(vssub_vv_b)
2410GEN_VEXT_VV_RM(vssub_vv_h)
2411GEN_VEXT_VV_RM(vssub_vv_w)
2412GEN_VEXT_VV_RM(vssub_vv_d)
eb2650e3
LZ
2413
2414RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2415RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2416RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2417RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
8a085fb2 2418GEN_VEXT_VX_RM(vssub_vx_b)
2419GEN_VEXT_VX_RM(vssub_vx_h)
2420GEN_VEXT_VX_RM(vssub_vx_w)
2421GEN_VEXT_VX_RM(vssub_vx_d)
b7aee481
LZ
2422
2423/* Vector Single-Width Averaging Add and Subtract */
2424static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2425{
2426 uint8_t d = extract64(v, shift, 1);
2427 uint8_t d1;
2428 uint64_t D1, D2;
2429
2430 if (shift == 0 || shift > 64) {
2431 return 0;
2432 }
2433
2434 d1 = extract64(v, shift - 1, 1);
2435 D1 = extract64(v, 0, shift);
2436 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2437 return d1;
2438 } else if (vxrm == 1) { /* round-to-nearest-even */
2439 if (shift > 1) {
2440 D2 = extract64(v, 0, shift - 1);
2441 return d1 & ((D2 != 0) | d);
2442 } else {
2443 return d1 & d;
2444 }
2445 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2446 return !d & (D1 != 0);
2447 }
2448 return 0; /* round-down (truncate) */
2449}
2450
2451static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2452{
2453 int64_t res = (int64_t)a + b;
2454 uint8_t round = get_round(vxrm, res, 1);
2455
2456 return (res >> 1) + round;
2457}
2458
2459static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2460{
2461 int64_t res = a + b;
2462 uint8_t round = get_round(vxrm, res, 1);
2463 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2464
2465 /* With signed overflow, bit 64 is inverse of bit 63. */
2466 return ((res >> 1) ^ over) + round;
2467}
2468
2469RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2470RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2471RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2472RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
8a085fb2 2473GEN_VEXT_VV_RM(vaadd_vv_b)
2474GEN_VEXT_VV_RM(vaadd_vv_h)
2475GEN_VEXT_VV_RM(vaadd_vv_w)
2476GEN_VEXT_VV_RM(vaadd_vv_d)
b7aee481
LZ
2477
2478RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2479RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2480RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2481RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
8a085fb2 2482GEN_VEXT_VX_RM(vaadd_vx_b)
2483GEN_VEXT_VX_RM(vaadd_vx_h)
2484GEN_VEXT_VX_RM(vaadd_vx_w)
2485GEN_VEXT_VX_RM(vaadd_vx_d)
b7aee481 2486
8b99a110
FC
2487static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2488 uint32_t a, uint32_t b)
2489{
2490 uint64_t res = (uint64_t)a + b;
2491 uint8_t round = get_round(vxrm, res, 1);
2492
2493 return (res >> 1) + round;
2494}
2495
2496static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2497 uint64_t a, uint64_t b)
2498{
2499 uint64_t res = a + b;
2500 uint8_t round = get_round(vxrm, res, 1);
2501 uint64_t over = (uint64_t)(res < a) << 63;
2502
2503 return ((res >> 1) | over) + round;
2504}
2505
2506RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2507RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2508RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2509RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
8a085fb2 2510GEN_VEXT_VV_RM(vaaddu_vv_b)
2511GEN_VEXT_VV_RM(vaaddu_vv_h)
2512GEN_VEXT_VV_RM(vaaddu_vv_w)
2513GEN_VEXT_VV_RM(vaaddu_vv_d)
8b99a110
FC
2514
2515RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2516RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2517RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2518RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
8a085fb2 2519GEN_VEXT_VX_RM(vaaddu_vx_b)
2520GEN_VEXT_VX_RM(vaaddu_vx_h)
2521GEN_VEXT_VX_RM(vaaddu_vx_w)
2522GEN_VEXT_VX_RM(vaaddu_vx_d)
8b99a110 2523
b7aee481
LZ
2524static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2525{
2526 int64_t res = (int64_t)a - b;
2527 uint8_t round = get_round(vxrm, res, 1);
2528
2529 return (res >> 1) + round;
2530}
2531
2532static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2533{
2534 int64_t res = (int64_t)a - b;
2535 uint8_t round = get_round(vxrm, res, 1);
2536 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2537
2538 /* With signed overflow, bit 64 is inverse of bit 63. */
2539 return ((res >> 1) ^ over) + round;
2540}
2541
2542RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2543RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2544RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2545RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
8a085fb2 2546GEN_VEXT_VV_RM(vasub_vv_b)
2547GEN_VEXT_VV_RM(vasub_vv_h)
2548GEN_VEXT_VV_RM(vasub_vv_w)
2549GEN_VEXT_VV_RM(vasub_vv_d)
b7aee481
LZ
2550
2551RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2552RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2553RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2554RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
8a085fb2 2555GEN_VEXT_VX_RM(vasub_vx_b)
2556GEN_VEXT_VX_RM(vasub_vx_h)
2557GEN_VEXT_VX_RM(vasub_vx_w)
2558GEN_VEXT_VX_RM(vasub_vx_d)
9f0ff9e5 2559
8b99a110
FC
2560static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2561 uint32_t a, uint32_t b)
2562{
2563 int64_t res = (int64_t)a - b;
2564 uint8_t round = get_round(vxrm, res, 1);
2565
2566 return (res >> 1) + round;
2567}
2568
2569static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2570 uint64_t a, uint64_t b)
2571{
2572 uint64_t res = (uint64_t)a - b;
2573 uint8_t round = get_round(vxrm, res, 1);
2574 uint64_t over = (uint64_t)(res > a) << 63;
2575
2576 return ((res >> 1) | over) + round;
2577}
2578
2579RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2580RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2581RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2582RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
8a085fb2 2583GEN_VEXT_VV_RM(vasubu_vv_b)
2584GEN_VEXT_VV_RM(vasubu_vv_h)
2585GEN_VEXT_VV_RM(vasubu_vv_w)
2586GEN_VEXT_VV_RM(vasubu_vv_d)
8b99a110
FC
2587
2588RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2589RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2590RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2591RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
8a085fb2 2592GEN_VEXT_VX_RM(vasubu_vx_b)
2593GEN_VEXT_VX_RM(vasubu_vx_h)
2594GEN_VEXT_VX_RM(vasubu_vx_w)
2595GEN_VEXT_VX_RM(vasubu_vx_d)
8b99a110 2596
9f0ff9e5
LZ
2597/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2598static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2599{
2600 uint8_t round;
2601 int16_t res;
2602
2603 res = (int16_t)a * (int16_t)b;
2604 round = get_round(vxrm, res, 7);
2605 res = (res >> 7) + round;
2606
2607 if (res > INT8_MAX) {
2608 env->vxsat = 0x1;
2609 return INT8_MAX;
2610 } else if (res < INT8_MIN) {
2611 env->vxsat = 0x1;
2612 return INT8_MIN;
2613 } else {
2614 return res;
2615 }
2616}
2617
2618static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2619{
2620 uint8_t round;
2621 int32_t res;
2622
2623 res = (int32_t)a * (int32_t)b;
2624 round = get_round(vxrm, res, 15);
2625 res = (res >> 15) + round;
2626
2627 if (res > INT16_MAX) {
2628 env->vxsat = 0x1;
2629 return INT16_MAX;
2630 } else if (res < INT16_MIN) {
2631 env->vxsat = 0x1;
2632 return INT16_MIN;
2633 } else {
2634 return res;
2635 }
2636}
2637
2638static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2639{
2640 uint8_t round;
2641 int64_t res;
2642
2643 res = (int64_t)a * (int64_t)b;
2644 round = get_round(vxrm, res, 31);
2645 res = (res >> 31) + round;
2646
2647 if (res > INT32_MAX) {
2648 env->vxsat = 0x1;
2649 return INT32_MAX;
2650 } else if (res < INT32_MIN) {
2651 env->vxsat = 0x1;
2652 return INT32_MIN;
2653 } else {
2654 return res;
2655 }
2656}
2657
2658static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2659{
2660 uint8_t round;
2661 uint64_t hi_64, lo_64;
2662 int64_t res;
2663
2664 if (a == INT64_MIN && b == INT64_MIN) {
2665 env->vxsat = 1;
2666 return INT64_MAX;
2667 }
2668
2669 muls64(&lo_64, &hi_64, a, b);
2670 round = get_round(vxrm, lo_64, 63);
2671 /*
2672 * Cannot overflow, as there are always
2673 * 2 sign bits after multiply.
2674 */
2675 res = (hi_64 << 1) | (lo_64 >> 63);
2676 if (round) {
2677 if (res == INT64_MAX) {
2678 env->vxsat = 1;
2679 } else {
2680 res += 1;
2681 }
2682 }
2683 return res;
2684}
2685
2686RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2687RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2688RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2689RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
8a085fb2 2690GEN_VEXT_VV_RM(vsmul_vv_b)
2691GEN_VEXT_VV_RM(vsmul_vv_h)
2692GEN_VEXT_VV_RM(vsmul_vv_w)
2693GEN_VEXT_VV_RM(vsmul_vv_d)
9f0ff9e5
LZ
2694
2695RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2696RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2697RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2698RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
8a085fb2 2699GEN_VEXT_VX_RM(vsmul_vx_b)
2700GEN_VEXT_VX_RM(vsmul_vx_h)
2701GEN_VEXT_VX_RM(vsmul_vx_w)
2702GEN_VEXT_VX_RM(vsmul_vx_d)
0a1eaf00 2703
04a61406
LZ
2704/* Vector Single-Width Scaling Shift Instructions */
2705static inline uint8_t
2706vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2707{
2708 uint8_t round, shift = b & 0x7;
2709 uint8_t res;
2710
2711 round = get_round(vxrm, a, shift);
2712 res = (a >> shift) + round;
2713 return res;
2714}
2715static inline uint16_t
2716vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2717{
2718 uint8_t round, shift = b & 0xf;
2719 uint16_t res;
2720
2721 round = get_round(vxrm, a, shift);
2722 res = (a >> shift) + round;
2723 return res;
2724}
2725static inline uint32_t
2726vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2727{
2728 uint8_t round, shift = b & 0x1f;
2729 uint32_t res;
2730
2731 round = get_round(vxrm, a, shift);
2732 res = (a >> shift) + round;
2733 return res;
2734}
2735static inline uint64_t
2736vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2737{
2738 uint8_t round, shift = b & 0x3f;
2739 uint64_t res;
2740
2741 round = get_round(vxrm, a, shift);
2742 res = (a >> shift) + round;
2743 return res;
2744}
2745RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2746RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2747RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2748RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
8a085fb2 2749GEN_VEXT_VV_RM(vssrl_vv_b)
2750GEN_VEXT_VV_RM(vssrl_vv_h)
2751GEN_VEXT_VV_RM(vssrl_vv_w)
2752GEN_VEXT_VV_RM(vssrl_vv_d)
04a61406
LZ
2753
2754RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2755RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2756RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2757RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
8a085fb2 2758GEN_VEXT_VX_RM(vssrl_vx_b)
2759GEN_VEXT_VX_RM(vssrl_vx_h)
2760GEN_VEXT_VX_RM(vssrl_vx_w)
2761GEN_VEXT_VX_RM(vssrl_vx_d)
04a61406
LZ
2762
2763static inline int8_t
2764vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2765{
2766 uint8_t round, shift = b & 0x7;
2767 int8_t res;
2768
2769 round = get_round(vxrm, a, shift);
2770 res = (a >> shift) + round;
2771 return res;
2772}
2773static inline int16_t
2774vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2775{
2776 uint8_t round, shift = b & 0xf;
2777 int16_t res;
2778
2779 round = get_round(vxrm, a, shift);
2780 res = (a >> shift) + round;
2781 return res;
2782}
2783static inline int32_t
2784vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2785{
2786 uint8_t round, shift = b & 0x1f;
2787 int32_t res;
2788
2789 round = get_round(vxrm, a, shift);
2790 res = (a >> shift) + round;
2791 return res;
2792}
2793static inline int64_t
2794vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2795{
2796 uint8_t round, shift = b & 0x3f;
2797 int64_t res;
2798
2799 round = get_round(vxrm, a, shift);
2800 res = (a >> shift) + round;
2801 return res;
2802}
9ff3d287 2803
04a61406
LZ
2804RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2805RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2806RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2807RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
8a085fb2 2808GEN_VEXT_VV_RM(vssra_vv_b)
2809GEN_VEXT_VV_RM(vssra_vv_h)
2810GEN_VEXT_VV_RM(vssra_vv_w)
2811GEN_VEXT_VV_RM(vssra_vv_d)
04a61406
LZ
2812
2813RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2814RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2815RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2816RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
8a085fb2 2817GEN_VEXT_VX_RM(vssra_vx_b)
2818GEN_VEXT_VX_RM(vssra_vx_h)
2819GEN_VEXT_VX_RM(vssra_vx_w)
2820GEN_VEXT_VX_RM(vssra_vx_d)
9ff3d287
LZ
2821
2822/* Vector Narrowing Fixed-Point Clip Instructions */
2823static inline int8_t
2824vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2825{
2826 uint8_t round, shift = b & 0xf;
2827 int16_t res;
2828
2829 round = get_round(vxrm, a, shift);
2830 res = (a >> shift) + round;
2831 if (res > INT8_MAX) {
2832 env->vxsat = 0x1;
2833 return INT8_MAX;
2834 } else if (res < INT8_MIN) {
2835 env->vxsat = 0x1;
2836 return INT8_MIN;
2837 } else {
2838 return res;
2839 }
2840}
2841
2842static inline int16_t
2843vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2844{
2845 uint8_t round, shift = b & 0x1f;
2846 int32_t res;
2847
2848 round = get_round(vxrm, a, shift);
2849 res = (a >> shift) + round;
2850 if (res > INT16_MAX) {
2851 env->vxsat = 0x1;
2852 return INT16_MAX;
2853 } else if (res < INT16_MIN) {
2854 env->vxsat = 0x1;
2855 return INT16_MIN;
2856 } else {
2857 return res;
2858 }
2859}
2860
2861static inline int32_t
2862vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2863{
2864 uint8_t round, shift = b & 0x3f;
2865 int64_t res;
2866
2867 round = get_round(vxrm, a, shift);
2868 res = (a >> shift) + round;
2869 if (res > INT32_MAX) {
2870 env->vxsat = 0x1;
2871 return INT32_MAX;
2872 } else if (res < INT32_MIN) {
2873 env->vxsat = 0x1;
2874 return INT32_MIN;
2875 } else {
2876 return res;
2877 }
2878}
2879
a70b3a73
FC
2880RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2881RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2882RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
8a085fb2 2883GEN_VEXT_VV_RM(vnclip_wv_b)
2884GEN_VEXT_VV_RM(vnclip_wv_h)
2885GEN_VEXT_VV_RM(vnclip_wv_w)
a70b3a73
FC
2886
2887RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2888RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2889RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
8a085fb2 2890GEN_VEXT_VX_RM(vnclip_wx_b)
2891GEN_VEXT_VX_RM(vnclip_wx_h)
2892GEN_VEXT_VX_RM(vnclip_wx_w)
9ff3d287
LZ
2893
2894static inline uint8_t
2895vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2896{
2897 uint8_t round, shift = b & 0xf;
2898 uint16_t res;
2899
2900 round = get_round(vxrm, a, shift);
2901 res = (a >> shift) + round;
2902 if (res > UINT8_MAX) {
2903 env->vxsat = 0x1;
2904 return UINT8_MAX;
2905 } else {
2906 return res;
2907 }
2908}
2909
2910static inline uint16_t
2911vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2912{
2913 uint8_t round, shift = b & 0x1f;
2914 uint32_t res;
2915
2916 round = get_round(vxrm, a, shift);
2917 res = (a >> shift) + round;
2918 if (res > UINT16_MAX) {
2919 env->vxsat = 0x1;
2920 return UINT16_MAX;
2921 } else {
2922 return res;
2923 }
2924}
2925
2926static inline uint32_t
2927vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2928{
2929 uint8_t round, shift = b & 0x3f;
a70b3a73 2930 uint64_t res;
9ff3d287
LZ
2931
2932 round = get_round(vxrm, a, shift);
2933 res = (a >> shift) + round;
2934 if (res > UINT32_MAX) {
2935 env->vxsat = 0x1;
2936 return UINT32_MAX;
2937 } else {
2938 return res;
2939 }
2940}
2941
a70b3a73
FC
2942RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2943RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2944RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
8a085fb2 2945GEN_VEXT_VV_RM(vnclipu_wv_b)
2946GEN_VEXT_VV_RM(vnclipu_wv_h)
2947GEN_VEXT_VV_RM(vnclipu_wv_w)
9ff3d287 2948
a70b3a73
FC
2949RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2950RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2951RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
8a085fb2 2952GEN_VEXT_VX_RM(vnclipu_wx_b)
2953GEN_VEXT_VX_RM(vnclipu_wx_h)
2954GEN_VEXT_VX_RM(vnclipu_wx_w)
ce2a0343
LZ
2955
2956/*
2957 *** Vector Float Point Arithmetic Instructions
2958 */
2959/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2960#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2961static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2962 CPURISCVState *env) \
2963{ \
2964 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2965 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2966 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2967}
2968
8a085fb2 2969#define GEN_VEXT_VV_ENV(NAME) \
ce2a0343
LZ
2970void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2971 void *vs2, CPURISCVState *env, \
2972 uint32_t desc) \
2973{ \
ce2a0343
LZ
2974 uint32_t vm = vext_vm(desc); \
2975 uint32_t vl = env->vl; \
2976 uint32_t i; \
2977 \
f714361e 2978 for (i = env->vstart; i < vl; i++) { \
f9298de5 2979 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
2980 continue; \
2981 } \
2982 do_##NAME(vd, vs1, vs2, i, env); \
2983 } \
f714361e 2984 env->vstart = 0; \
ce2a0343
LZ
2985}
2986
2987RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2988RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2989RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
8a085fb2 2990GEN_VEXT_VV_ENV(vfadd_vv_h)
2991GEN_VEXT_VV_ENV(vfadd_vv_w)
2992GEN_VEXT_VV_ENV(vfadd_vv_d)
ce2a0343
LZ
2993
2994#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2995static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2996 CPURISCVState *env) \
2997{ \
2998 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2999 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3000}
3001
8a085fb2 3002#define GEN_VEXT_VF(NAME) \
ce2a0343
LZ
3003void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3004 void *vs2, CPURISCVState *env, \
3005 uint32_t desc) \
3006{ \
ce2a0343
LZ
3007 uint32_t vm = vext_vm(desc); \
3008 uint32_t vl = env->vl; \
3009 uint32_t i; \
3010 \
f714361e 3011 for (i = env->vstart; i < vl; i++) { \
f9298de5 3012 if (!vm && !vext_elem_mask(v0, i)) { \
ce2a0343
LZ
3013 continue; \
3014 } \
3015 do_##NAME(vd, s1, vs2, i, env); \
3016 } \
f714361e 3017 env->vstart = 0; \
ce2a0343
LZ
3018}
3019
3020RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3021RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3022RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
8a085fb2 3023GEN_VEXT_VF(vfadd_vf_h)
3024GEN_VEXT_VF(vfadd_vf_w)
3025GEN_VEXT_VF(vfadd_vf_d)
ce2a0343
LZ
3026
3027RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3028RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3029RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
8a085fb2 3030GEN_VEXT_VV_ENV(vfsub_vv_h)
3031GEN_VEXT_VV_ENV(vfsub_vv_w)
3032GEN_VEXT_VV_ENV(vfsub_vv_d)
ce2a0343
LZ
3033RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3034RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3035RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
8a085fb2 3036GEN_VEXT_VF(vfsub_vf_h)
3037GEN_VEXT_VF(vfsub_vf_w)
3038GEN_VEXT_VF(vfsub_vf_d)
ce2a0343
LZ
3039
3040static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3041{
3042 return float16_sub(b, a, s);
3043}
3044
3045static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3046{
3047 return float32_sub(b, a, s);
3048}
3049
3050static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3051{
3052 return float64_sub(b, a, s);
3053}
3054
3055RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3056RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3057RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
8a085fb2 3058GEN_VEXT_VF(vfrsub_vf_h)
3059GEN_VEXT_VF(vfrsub_vf_w)
3060GEN_VEXT_VF(vfrsub_vf_d)
eeffab2e
LZ
3061
3062/* Vector Widening Floating-Point Add/Subtract Instructions */
3063static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3064{
3065 return float32_add(float16_to_float32(a, true, s),
3066 float16_to_float32(b, true, s), s);
3067}
3068
3069static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3070{
3071 return float64_add(float32_to_float64(a, s),
3072 float32_to_float64(b, s), s);
3073
3074}
3075
3076RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3077RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
8a085fb2 3078GEN_VEXT_VV_ENV(vfwadd_vv_h)
3079GEN_VEXT_VV_ENV(vfwadd_vv_w)
eeffab2e
LZ
3080RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3081RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
8a085fb2 3082GEN_VEXT_VF(vfwadd_vf_h)
3083GEN_VEXT_VF(vfwadd_vf_w)
eeffab2e
LZ
3084
3085static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3086{
3087 return float32_sub(float16_to_float32(a, true, s),
3088 float16_to_float32(b, true, s), s);
3089}
3090
3091static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3092{
3093 return float64_sub(float32_to_float64(a, s),
3094 float32_to_float64(b, s), s);
3095
3096}
3097
3098RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3099RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
8a085fb2 3100GEN_VEXT_VV_ENV(vfwsub_vv_h)
3101GEN_VEXT_VV_ENV(vfwsub_vv_w)
eeffab2e
LZ
3102RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3103RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
8a085fb2 3104GEN_VEXT_VF(vfwsub_vf_h)
3105GEN_VEXT_VF(vfwsub_vf_w)
eeffab2e
LZ
3106
3107static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3108{
3109 return float32_add(a, float16_to_float32(b, true, s), s);
3110}
3111
3112static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3113{
3114 return float64_add(a, float32_to_float64(b, s), s);
3115}
3116
3117RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3118RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
8a085fb2 3119GEN_VEXT_VV_ENV(vfwadd_wv_h)
3120GEN_VEXT_VV_ENV(vfwadd_wv_w)
eeffab2e
LZ
3121RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3122RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
8a085fb2 3123GEN_VEXT_VF(vfwadd_wf_h)
3124GEN_VEXT_VF(vfwadd_wf_w)
eeffab2e
LZ
3125
3126static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3127{
3128 return float32_sub(a, float16_to_float32(b, true, s), s);
3129}
3130
3131static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3132{
3133 return float64_sub(a, float32_to_float64(b, s), s);
3134}
3135
3136RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3137RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
8a085fb2 3138GEN_VEXT_VV_ENV(vfwsub_wv_h)
3139GEN_VEXT_VV_ENV(vfwsub_wv_w)
eeffab2e
LZ
3140RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3141RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
8a085fb2 3142GEN_VEXT_VF(vfwsub_wf_h)
3143GEN_VEXT_VF(vfwsub_wf_w)
0e0057cb
LZ
3144
3145/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3146RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3147RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3148RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
8a085fb2 3149GEN_VEXT_VV_ENV(vfmul_vv_h)
3150GEN_VEXT_VV_ENV(vfmul_vv_w)
3151GEN_VEXT_VV_ENV(vfmul_vv_d)
0e0057cb
LZ
3152RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3153RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3154RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
8a085fb2 3155GEN_VEXT_VF(vfmul_vf_h)
3156GEN_VEXT_VF(vfmul_vf_w)
3157GEN_VEXT_VF(vfmul_vf_d)
0e0057cb
LZ
3158
3159RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3160RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3161RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
8a085fb2 3162GEN_VEXT_VV_ENV(vfdiv_vv_h)
3163GEN_VEXT_VV_ENV(vfdiv_vv_w)
3164GEN_VEXT_VV_ENV(vfdiv_vv_d)
0e0057cb
LZ
3165RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3166RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3167RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
8a085fb2 3168GEN_VEXT_VF(vfdiv_vf_h)
3169GEN_VEXT_VF(vfdiv_vf_w)
3170GEN_VEXT_VF(vfdiv_vf_d)
0e0057cb
LZ
3171
3172static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3173{
3174 return float16_div(b, a, s);
3175}
3176
3177static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3178{
3179 return float32_div(b, a, s);
3180}
3181
3182static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3183{
3184 return float64_div(b, a, s);
3185}
3186
3187RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3188RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3189RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
8a085fb2 3190GEN_VEXT_VF(vfrdiv_vf_h)
3191GEN_VEXT_VF(vfrdiv_vf_w)
3192GEN_VEXT_VF(vfrdiv_vf_d)
f7c7b7cd
LZ
3193
3194/* Vector Widening Floating-Point Multiply */
3195static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3196{
3197 return float32_mul(float16_to_float32(a, true, s),
3198 float16_to_float32(b, true, s), s);
3199}
3200
3201static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3202{
3203 return float64_mul(float32_to_float64(a, s),
3204 float32_to_float64(b, s), s);
3205
3206}
3207RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3208RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
8a085fb2 3209GEN_VEXT_VV_ENV(vfwmul_vv_h)
3210GEN_VEXT_VV_ENV(vfwmul_vv_w)
f7c7b7cd
LZ
3211RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3212RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
8a085fb2 3213GEN_VEXT_VF(vfwmul_vf_h)
3214GEN_VEXT_VF(vfwmul_vf_w)
4aa5a8fe
LZ
3215
3216/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3217#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3218static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3219 CPURISCVState *env) \
3220{ \
3221 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3222 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3223 TD d = *((TD *)vd + HD(i)); \
3224 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3225}
3226
3227static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3228{
3229 return float16_muladd(a, b, d, 0, s);
3230}
3231
3232static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3233{
3234 return float32_muladd(a, b, d, 0, s);
3235}
3236
3237static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3238{
3239 return float64_muladd(a, b, d, 0, s);
3240}
3241
3242RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3243RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3244RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
8a085fb2 3245GEN_VEXT_VV_ENV(vfmacc_vv_h)
3246GEN_VEXT_VV_ENV(vfmacc_vv_w)
3247GEN_VEXT_VV_ENV(vfmacc_vv_d)
4aa5a8fe
LZ
3248
3249#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3250static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3251 CPURISCVState *env) \
3252{ \
3253 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3254 TD d = *((TD *)vd + HD(i)); \
3255 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3256}
3257
3258RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3259RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3260RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
8a085fb2 3261GEN_VEXT_VF(vfmacc_vf_h)
3262GEN_VEXT_VF(vfmacc_vf_w)
3263GEN_VEXT_VF(vfmacc_vf_d)
4aa5a8fe
LZ
3264
3265static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3266{
3267 return float16_muladd(a, b, d,
3268 float_muladd_negate_c | float_muladd_negate_product, s);
3269}
3270
3271static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3272{
3273 return float32_muladd(a, b, d,
3274 float_muladd_negate_c | float_muladd_negate_product, s);
3275}
3276
3277static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3278{
3279 return float64_muladd(a, b, d,
3280 float_muladd_negate_c | float_muladd_negate_product, s);
3281}
3282
3283RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3284RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3285RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
8a085fb2 3286GEN_VEXT_VV_ENV(vfnmacc_vv_h)
3287GEN_VEXT_VV_ENV(vfnmacc_vv_w)
3288GEN_VEXT_VV_ENV(vfnmacc_vv_d)
4aa5a8fe
LZ
3289RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3290RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3291RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
8a085fb2 3292GEN_VEXT_VF(vfnmacc_vf_h)
3293GEN_VEXT_VF(vfnmacc_vf_w)
3294GEN_VEXT_VF(vfnmacc_vf_d)
4aa5a8fe
LZ
3295
3296static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3297{
3298 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3299}
3300
3301static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3302{
3303 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3304}
3305
3306static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3307{
3308 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3309}
3310
3311RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3312RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3313RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
8a085fb2 3314GEN_VEXT_VV_ENV(vfmsac_vv_h)
3315GEN_VEXT_VV_ENV(vfmsac_vv_w)
3316GEN_VEXT_VV_ENV(vfmsac_vv_d)
4aa5a8fe
LZ
3317RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3318RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3319RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
8a085fb2 3320GEN_VEXT_VF(vfmsac_vf_h)
3321GEN_VEXT_VF(vfmsac_vf_w)
3322GEN_VEXT_VF(vfmsac_vf_d)
4aa5a8fe
LZ
3323
3324static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3325{
3326 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3327}
3328
3329static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3330{
3331 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3332}
3333
3334static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3335{
3336 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3337}
3338
3339RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3340RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3341RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
8a085fb2 3342GEN_VEXT_VV_ENV(vfnmsac_vv_h)
3343GEN_VEXT_VV_ENV(vfnmsac_vv_w)
3344GEN_VEXT_VV_ENV(vfnmsac_vv_d)
4aa5a8fe
LZ
3345RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3346RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3347RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
8a085fb2 3348GEN_VEXT_VF(vfnmsac_vf_h)
3349GEN_VEXT_VF(vfnmsac_vf_w)
3350GEN_VEXT_VF(vfnmsac_vf_d)
4aa5a8fe
LZ
3351
3352static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3353{
3354 return float16_muladd(d, b, a, 0, s);
3355}
3356
3357static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3358{
3359 return float32_muladd(d, b, a, 0, s);
3360}
3361
3362static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3363{
3364 return float64_muladd(d, b, a, 0, s);
3365}
3366
3367RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3368RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3369RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
8a085fb2 3370GEN_VEXT_VV_ENV(vfmadd_vv_h)
3371GEN_VEXT_VV_ENV(vfmadd_vv_w)
3372GEN_VEXT_VV_ENV(vfmadd_vv_d)
4aa5a8fe
LZ
3373RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3374RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3375RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
8a085fb2 3376GEN_VEXT_VF(vfmadd_vf_h)
3377GEN_VEXT_VF(vfmadd_vf_w)
3378GEN_VEXT_VF(vfmadd_vf_d)
4aa5a8fe
LZ
3379
3380static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3381{
3382 return float16_muladd(d, b, a,
3383 float_muladd_negate_c | float_muladd_negate_product, s);
3384}
3385
3386static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3387{
3388 return float32_muladd(d, b, a,
3389 float_muladd_negate_c | float_muladd_negate_product, s);
3390}
3391
3392static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3393{
3394 return float64_muladd(d, b, a,
3395 float_muladd_negate_c | float_muladd_negate_product, s);
3396}
3397
3398RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3399RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3400RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
8a085fb2 3401GEN_VEXT_VV_ENV(vfnmadd_vv_h)
3402GEN_VEXT_VV_ENV(vfnmadd_vv_w)
3403GEN_VEXT_VV_ENV(vfnmadd_vv_d)
4aa5a8fe
LZ
3404RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3405RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3406RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
8a085fb2 3407GEN_VEXT_VF(vfnmadd_vf_h)
3408GEN_VEXT_VF(vfnmadd_vf_w)
3409GEN_VEXT_VF(vfnmadd_vf_d)
4aa5a8fe
LZ
3410
3411static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3412{
3413 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3414}
3415
3416static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3417{
3418 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3419}
3420
3421static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3422{
3423 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3424}
3425
3426RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3427RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3428RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
8a085fb2 3429GEN_VEXT_VV_ENV(vfmsub_vv_h)
3430GEN_VEXT_VV_ENV(vfmsub_vv_w)
3431GEN_VEXT_VV_ENV(vfmsub_vv_d)
4aa5a8fe
LZ
3432RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3433RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3434RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
8a085fb2 3435GEN_VEXT_VF(vfmsub_vf_h)
3436GEN_VEXT_VF(vfmsub_vf_w)
3437GEN_VEXT_VF(vfmsub_vf_d)
4aa5a8fe
LZ
3438
3439static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3440{
3441 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3442}
3443
3444static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3445{
3446 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3447}
3448
3449static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3450{
3451 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3452}
3453
3454RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3455RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3456RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
8a085fb2 3457GEN_VEXT_VV_ENV(vfnmsub_vv_h)
3458GEN_VEXT_VV_ENV(vfnmsub_vv_w)
3459GEN_VEXT_VV_ENV(vfnmsub_vv_d)
4aa5a8fe
LZ
3460RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3461RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3462RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
8a085fb2 3463GEN_VEXT_VF(vfnmsub_vf_h)
3464GEN_VEXT_VF(vfnmsub_vf_w)
3465GEN_VEXT_VF(vfnmsub_vf_d)
0dd50959
LZ
3466
3467/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3468static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3469{
3470 return float32_muladd(float16_to_float32(a, true, s),
3471 float16_to_float32(b, true, s), d, 0, s);
3472}
3473
3474static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3475{
3476 return float64_muladd(float32_to_float64(a, s),
3477 float32_to_float64(b, s), d, 0, s);
3478}
3479
3480RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3481RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
8a085fb2 3482GEN_VEXT_VV_ENV(vfwmacc_vv_h)
3483GEN_VEXT_VV_ENV(vfwmacc_vv_w)
0dd50959
LZ
3484RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3485RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
8a085fb2 3486GEN_VEXT_VF(vfwmacc_vf_h)
3487GEN_VEXT_VF(vfwmacc_vf_w)
0dd50959
LZ
3488
3489static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3490{
3491 return float32_muladd(float16_to_float32(a, true, s),
3492 float16_to_float32(b, true, s), d,
3493 float_muladd_negate_c | float_muladd_negate_product, s);
3494}
3495
3496static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3497{
3498 return float64_muladd(float32_to_float64(a, s),
3499 float32_to_float64(b, s), d,
3500 float_muladd_negate_c | float_muladd_negate_product, s);
3501}
3502
3503RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3504RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
8a085fb2 3505GEN_VEXT_VV_ENV(vfwnmacc_vv_h)
3506GEN_VEXT_VV_ENV(vfwnmacc_vv_w)
0dd50959
LZ
3507RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3508RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
8a085fb2 3509GEN_VEXT_VF(vfwnmacc_vf_h)
3510GEN_VEXT_VF(vfwnmacc_vf_w)
0dd50959
LZ
3511
3512static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3513{
3514 return float32_muladd(float16_to_float32(a, true, s),
3515 float16_to_float32(b, true, s), d,
3516 float_muladd_negate_c, s);
3517}
3518
3519static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3520{
3521 return float64_muladd(float32_to_float64(a, s),
3522 float32_to_float64(b, s), d,
3523 float_muladd_negate_c, s);
3524}
3525
3526RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3527RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
8a085fb2 3528GEN_VEXT_VV_ENV(vfwmsac_vv_h)
3529GEN_VEXT_VV_ENV(vfwmsac_vv_w)
0dd50959
LZ
3530RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3531RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
8a085fb2 3532GEN_VEXT_VF(vfwmsac_vf_h)
3533GEN_VEXT_VF(vfwmsac_vf_w)
0dd50959
LZ
3534
3535static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3536{
3537 return float32_muladd(float16_to_float32(a, true, s),
3538 float16_to_float32(b, true, s), d,
3539 float_muladd_negate_product, s);
3540}
3541
3542static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3543{
3544 return float64_muladd(float32_to_float64(a, s),
3545 float32_to_float64(b, s), d,
3546 float_muladd_negate_product, s);
3547}
3548
3549RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3550RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
8a085fb2 3551GEN_VEXT_VV_ENV(vfwnmsac_vv_h)
3552GEN_VEXT_VV_ENV(vfwnmsac_vv_w)
0dd50959
LZ
3553RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3554RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
8a085fb2 3555GEN_VEXT_VF(vfwnmsac_vf_h)
3556GEN_VEXT_VF(vfwnmsac_vf_w)
d9e4ce72
LZ
3557
3558/* Vector Floating-Point Square-Root Instruction */
3559/* (TD, T2, TX2) */
3560#define OP_UU_H uint16_t, uint16_t, uint16_t
3561#define OP_UU_W uint32_t, uint32_t, uint32_t
3562#define OP_UU_D uint64_t, uint64_t, uint64_t
3563
3564#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3565static void do_##NAME(void *vd, void *vs2, int i, \
3566 CPURISCVState *env) \
3567{ \
3568 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3569 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3570}
3571
8a085fb2 3572#define GEN_VEXT_V_ENV(NAME) \
d9e4ce72
LZ
3573void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3574 CPURISCVState *env, uint32_t desc) \
3575{ \
d9e4ce72
LZ
3576 uint32_t vm = vext_vm(desc); \
3577 uint32_t vl = env->vl; \
3578 uint32_t i; \
3579 \
3580 if (vl == 0) { \
3581 return; \
3582 } \
f714361e 3583 for (i = env->vstart; i < vl; i++) { \
f9298de5 3584 if (!vm && !vext_elem_mask(v0, i)) { \
d9e4ce72
LZ
3585 continue; \
3586 } \
3587 do_##NAME(vd, vs2, i, env); \
3588 } \
f714361e 3589 env->vstart = 0; \
d9e4ce72
LZ
3590}
3591
3592RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3593RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3594RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
8a085fb2 3595GEN_VEXT_V_ENV(vfsqrt_v_h)
3596GEN_VEXT_V_ENV(vfsqrt_v_w)
3597GEN_VEXT_V_ENV(vfsqrt_v_d)
230b53dd 3598
e848a1e5
FC
3599/*
3600 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3601 *
3602 * Adapted from riscv-v-spec recip.c:
3603 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3604 */
3605static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3606{
3607 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3608 uint64_t exp = extract64(f, frac_size, exp_size);
3609 uint64_t frac = extract64(f, 0, frac_size);
3610
3611 const uint8_t lookup_table[] = {
3612 52, 51, 50, 48, 47, 46, 44, 43,
3613 42, 41, 40, 39, 38, 36, 35, 34,
3614 33, 32, 31, 30, 30, 29, 28, 27,
3615 26, 25, 24, 23, 23, 22, 21, 20,
3616 19, 19, 18, 17, 16, 16, 15, 14,
3617 14, 13, 12, 12, 11, 10, 10, 9,
3618 9, 8, 7, 7, 6, 6, 5, 4,
3619 4, 3, 3, 2, 2, 1, 1, 0,
3620 127, 125, 123, 121, 119, 118, 116, 114,
3621 113, 111, 109, 108, 106, 105, 103, 102,
3622 100, 99, 97, 96, 95, 93, 92, 91,
3623 90, 88, 87, 86, 85, 84, 83, 82,
3624 80, 79, 78, 77, 76, 75, 74, 73,
3625 72, 71, 70, 70, 69, 68, 67, 66,
3626 65, 64, 63, 63, 62, 61, 60, 59,
3627 59, 58, 57, 56, 56, 55, 54, 53
3628 };
3629 const int precision = 7;
3630
3631 if (exp == 0 && frac != 0) { /* subnormal */
3632 /* Normalize the subnormal. */
3633 while (extract64(frac, frac_size - 1, 1) == 0) {
3634 exp--;
3635 frac <<= 1;
3636 }
3637
3638 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3639 }
3640
3641 int idx = ((exp & 1) << (precision - 1)) |
3642 (frac >> (frac_size - precision + 1));
3643 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3644 (frac_size - precision);
3645 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3646
3647 uint64_t val = 0;
3648 val = deposit64(val, 0, frac_size, out_frac);
3649 val = deposit64(val, frac_size, exp_size, out_exp);
3650 val = deposit64(val, frac_size + exp_size, 1, sign);
3651 return val;
3652}
3653
3654static float16 frsqrt7_h(float16 f, float_status *s)
3655{
3656 int exp_size = 5, frac_size = 10;
3657 bool sign = float16_is_neg(f);
3658
3659 /*
3660 * frsqrt7(sNaN) = canonical NaN
3661 * frsqrt7(-inf) = canonical NaN
3662 * frsqrt7(-normal) = canonical NaN
3663 * frsqrt7(-subnormal) = canonical NaN
3664 */
3665 if (float16_is_signaling_nan(f, s) ||
3666 (float16_is_infinity(f) && sign) ||
3667 (float16_is_normal(f) && sign) ||
3668 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3669 s->float_exception_flags |= float_flag_invalid;
3670 return float16_default_nan(s);
3671 }
3672
3673 /* frsqrt7(qNaN) = canonical NaN */
3674 if (float16_is_quiet_nan(f, s)) {
3675 return float16_default_nan(s);
3676 }
3677
3678 /* frsqrt7(+-0) = +-inf */
3679 if (float16_is_zero(f)) {
3680 s->float_exception_flags |= float_flag_divbyzero;
3681 return float16_set_sign(float16_infinity, sign);
3682 }
3683
3684 /* frsqrt7(+inf) = +0 */
3685 if (float16_is_infinity(f) && !sign) {
3686 return float16_set_sign(float16_zero, sign);
3687 }
3688
3689 /* +normal, +subnormal */
3690 uint64_t val = frsqrt7(f, exp_size, frac_size);
3691 return make_float16(val);
3692}
3693
3694static float32 frsqrt7_s(float32 f, float_status *s)
3695{
3696 int exp_size = 8, frac_size = 23;
3697 bool sign = float32_is_neg(f);
3698
3699 /*
3700 * frsqrt7(sNaN) = canonical NaN
3701 * frsqrt7(-inf) = canonical NaN
3702 * frsqrt7(-normal) = canonical NaN
3703 * frsqrt7(-subnormal) = canonical NaN
3704 */
3705 if (float32_is_signaling_nan(f, s) ||
3706 (float32_is_infinity(f) && sign) ||
3707 (float32_is_normal(f) && sign) ||
3708 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3709 s->float_exception_flags |= float_flag_invalid;
3710 return float32_default_nan(s);
3711 }
3712
3713 /* frsqrt7(qNaN) = canonical NaN */
3714 if (float32_is_quiet_nan(f, s)) {
3715 return float32_default_nan(s);
3716 }
3717
3718 /* frsqrt7(+-0) = +-inf */
3719 if (float32_is_zero(f)) {
3720 s->float_exception_flags |= float_flag_divbyzero;
3721 return float32_set_sign(float32_infinity, sign);
3722 }
3723
3724 /* frsqrt7(+inf) = +0 */
3725 if (float32_is_infinity(f) && !sign) {
3726 return float32_set_sign(float32_zero, sign);
3727 }
3728
3729 /* +normal, +subnormal */
3730 uint64_t val = frsqrt7(f, exp_size, frac_size);
3731 return make_float32(val);
3732}
3733
3734static float64 frsqrt7_d(float64 f, float_status *s)
3735{
3736 int exp_size = 11, frac_size = 52;
3737 bool sign = float64_is_neg(f);
3738
3739 /*
3740 * frsqrt7(sNaN) = canonical NaN
3741 * frsqrt7(-inf) = canonical NaN
3742 * frsqrt7(-normal) = canonical NaN
3743 * frsqrt7(-subnormal) = canonical NaN
3744 */
3745 if (float64_is_signaling_nan(f, s) ||
3746 (float64_is_infinity(f) && sign) ||
3747 (float64_is_normal(f) && sign) ||
3748 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3749 s->float_exception_flags |= float_flag_invalid;
3750 return float64_default_nan(s);
3751 }
3752
3753 /* frsqrt7(qNaN) = canonical NaN */
3754 if (float64_is_quiet_nan(f, s)) {
3755 return float64_default_nan(s);
3756 }
3757
3758 /* frsqrt7(+-0) = +-inf */
3759 if (float64_is_zero(f)) {
3760 s->float_exception_flags |= float_flag_divbyzero;
3761 return float64_set_sign(float64_infinity, sign);
3762 }
3763
3764 /* frsqrt7(+inf) = +0 */
3765 if (float64_is_infinity(f) && !sign) {
3766 return float64_set_sign(float64_zero, sign);
3767 }
3768
3769 /* +normal, +subnormal */
3770 uint64_t val = frsqrt7(f, exp_size, frac_size);
3771 return make_float64(val);
3772}
3773
3774RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3775RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3776RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
8a085fb2 3777GEN_VEXT_V_ENV(vfrsqrt7_v_h)
3778GEN_VEXT_V_ENV(vfrsqrt7_v_w)
3779GEN_VEXT_V_ENV(vfrsqrt7_v_d)
e848a1e5 3780
55c35407
FC
3781/*
3782 * Vector Floating-Point Reciprocal Estimate Instruction
3783 *
3784 * Adapted from riscv-v-spec recip.c:
3785 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3786 */
3787static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3788 float_status *s)
3789{
3790 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3791 uint64_t exp = extract64(f, frac_size, exp_size);
3792 uint64_t frac = extract64(f, 0, frac_size);
3793
3794 const uint8_t lookup_table[] = {
3795 127, 125, 123, 121, 119, 117, 116, 114,
3796 112, 110, 109, 107, 105, 104, 102, 100,
3797 99, 97, 96, 94, 93, 91, 90, 88,
3798 87, 85, 84, 83, 81, 80, 79, 77,
3799 76, 75, 74, 72, 71, 70, 69, 68,
3800 66, 65, 64, 63, 62, 61, 60, 59,
3801 58, 57, 56, 55, 54, 53, 52, 51,
3802 50, 49, 48, 47, 46, 45, 44, 43,
3803 42, 41, 40, 40, 39, 38, 37, 36,
3804 35, 35, 34, 33, 32, 31, 31, 30,
3805 29, 28, 28, 27, 26, 25, 25, 24,
3806 23, 23, 22, 21, 21, 20, 19, 19,
3807 18, 17, 17, 16, 15, 15, 14, 14,
3808 13, 12, 12, 11, 11, 10, 9, 9,
3809 8, 8, 7, 7, 6, 5, 5, 4,
3810 4, 3, 3, 2, 2, 1, 1, 0
3811 };
3812 const int precision = 7;
3813
3814 if (exp == 0 && frac != 0) { /* subnormal */
3815 /* Normalize the subnormal. */
3816 while (extract64(frac, frac_size - 1, 1) == 0) {
3817 exp--;
3818 frac <<= 1;
3819 }
3820
3821 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3822
3823 if (exp != 0 && exp != UINT64_MAX) {
3824 /*
3825 * Overflow to inf or max value of same sign,
3826 * depending on sign and rounding mode.
3827 */
3828 s->float_exception_flags |= (float_flag_inexact |
3829 float_flag_overflow);
3830
3831 if ((s->float_rounding_mode == float_round_to_zero) ||
3832 ((s->float_rounding_mode == float_round_down) && !sign) ||
3833 ((s->float_rounding_mode == float_round_up) && sign)) {
3834 /* Return greatest/negative finite value. */
3835 return (sign << (exp_size + frac_size)) |
3836 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3837 } else {
3838 /* Return +-inf. */
3839 return (sign << (exp_size + frac_size)) |
3840 MAKE_64BIT_MASK(frac_size, exp_size);
3841 }
3842 }
3843 }
3844
3845 int idx = frac >> (frac_size - precision);
3846 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3847 (frac_size - precision);
3848 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3849
3850 if (out_exp == 0 || out_exp == UINT64_MAX) {
3851 /*
3852 * The result is subnormal, but don't raise the underflow exception,
3853 * because there's no additional loss of precision.
3854 */
3855 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3856 if (out_exp == UINT64_MAX) {
3857 out_frac >>= 1;
3858 out_exp = 0;
3859 }
3860 }
3861
3862 uint64_t val = 0;
3863 val = deposit64(val, 0, frac_size, out_frac);
3864 val = deposit64(val, frac_size, exp_size, out_exp);
3865 val = deposit64(val, frac_size + exp_size, 1, sign);
3866 return val;
3867}
3868
3869static float16 frec7_h(float16 f, float_status *s)
3870{
3871 int exp_size = 5, frac_size = 10;
3872 bool sign = float16_is_neg(f);
3873
3874 /* frec7(+-inf) = +-0 */
3875 if (float16_is_infinity(f)) {
3876 return float16_set_sign(float16_zero, sign);
3877 }
3878
3879 /* frec7(+-0) = +-inf */
3880 if (float16_is_zero(f)) {
3881 s->float_exception_flags |= float_flag_divbyzero;
3882 return float16_set_sign(float16_infinity, sign);
3883 }
3884
3885 /* frec7(sNaN) = canonical NaN */
3886 if (float16_is_signaling_nan(f, s)) {
3887 s->float_exception_flags |= float_flag_invalid;
3888 return float16_default_nan(s);
3889 }
3890
3891 /* frec7(qNaN) = canonical NaN */
3892 if (float16_is_quiet_nan(f, s)) {
3893 return float16_default_nan(s);
3894 }
3895
3896 /* +-normal, +-subnormal */
3897 uint64_t val = frec7(f, exp_size, frac_size, s);
3898 return make_float16(val);
3899}
3900
3901static float32 frec7_s(float32 f, float_status *s)
3902{
3903 int exp_size = 8, frac_size = 23;
3904 bool sign = float32_is_neg(f);
3905
3906 /* frec7(+-inf) = +-0 */
3907 if (float32_is_infinity(f)) {
3908 return float32_set_sign(float32_zero, sign);
3909 }
3910
3911 /* frec7(+-0) = +-inf */
3912 if (float32_is_zero(f)) {
3913 s->float_exception_flags |= float_flag_divbyzero;
3914 return float32_set_sign(float32_infinity, sign);
3915 }
3916
3917 /* frec7(sNaN) = canonical NaN */
3918 if (float32_is_signaling_nan(f, s)) {
3919 s->float_exception_flags |= float_flag_invalid;
3920 return float32_default_nan(s);
3921 }
3922
3923 /* frec7(qNaN) = canonical NaN */
3924 if (float32_is_quiet_nan(f, s)) {
3925 return float32_default_nan(s);
3926 }
3927
3928 /* +-normal, +-subnormal */
3929 uint64_t val = frec7(f, exp_size, frac_size, s);
3930 return make_float32(val);
3931}
3932
3933static float64 frec7_d(float64 f, float_status *s)
3934{
3935 int exp_size = 11, frac_size = 52;
3936 bool sign = float64_is_neg(f);
3937
3938 /* frec7(+-inf) = +-0 */
3939 if (float64_is_infinity(f)) {
3940 return float64_set_sign(float64_zero, sign);
3941 }
3942
3943 /* frec7(+-0) = +-inf */
3944 if (float64_is_zero(f)) {
3945 s->float_exception_flags |= float_flag_divbyzero;
3946 return float64_set_sign(float64_infinity, sign);
3947 }
3948
3949 /* frec7(sNaN) = canonical NaN */
3950 if (float64_is_signaling_nan(f, s)) {
3951 s->float_exception_flags |= float_flag_invalid;
3952 return float64_default_nan(s);
3953 }
3954
3955 /* frec7(qNaN) = canonical NaN */
3956 if (float64_is_quiet_nan(f, s)) {
3957 return float64_default_nan(s);
3958 }
3959
3960 /* +-normal, +-subnormal */
3961 uint64_t val = frec7(f, exp_size, frac_size, s);
3962 return make_float64(val);
3963}
3964
3965RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3966RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3967RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
8a085fb2 3968GEN_VEXT_V_ENV(vfrec7_v_h)
3969GEN_VEXT_V_ENV(vfrec7_v_w)
3970GEN_VEXT_V_ENV(vfrec7_v_d)
55c35407 3971
230b53dd 3972/* Vector Floating-Point MIN/MAX Instructions */
49c5611a
FC
3973RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3974RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3975RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
8a085fb2 3976GEN_VEXT_VV_ENV(vfmin_vv_h)
3977GEN_VEXT_VV_ENV(vfmin_vv_w)
3978GEN_VEXT_VV_ENV(vfmin_vv_d)
49c5611a
FC
3979RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3980RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3981RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
8a085fb2 3982GEN_VEXT_VF(vfmin_vf_h)
3983GEN_VEXT_VF(vfmin_vf_w)
3984GEN_VEXT_VF(vfmin_vf_d)
230b53dd 3985
49c5611a
FC
3986RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3987RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3988RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
8a085fb2 3989GEN_VEXT_VV_ENV(vfmax_vv_h)
3990GEN_VEXT_VV_ENV(vfmax_vv_w)
3991GEN_VEXT_VV_ENV(vfmax_vv_d)
49c5611a
FC
3992RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3993RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3994RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
8a085fb2 3995GEN_VEXT_VF(vfmax_vf_h)
3996GEN_VEXT_VF(vfmax_vf_w)
3997GEN_VEXT_VF(vfmax_vf_d)
1d426b81
LZ
3998
3999/* Vector Floating-Point Sign-Injection Instructions */
4000static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4001{
4002 return deposit64(b, 0, 15, a);
4003}
4004
4005static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4006{
4007 return deposit64(b, 0, 31, a);
4008}
4009
4010static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4011{
4012 return deposit64(b, 0, 63, a);
4013}
4014
4015RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4016RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4017RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
8a085fb2 4018GEN_VEXT_VV_ENV(vfsgnj_vv_h)
4019GEN_VEXT_VV_ENV(vfsgnj_vv_w)
4020GEN_VEXT_VV_ENV(vfsgnj_vv_d)
1d426b81
LZ
4021RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4022RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4023RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
8a085fb2 4024GEN_VEXT_VF(vfsgnj_vf_h)
4025GEN_VEXT_VF(vfsgnj_vf_w)
4026GEN_VEXT_VF(vfsgnj_vf_d)
1d426b81
LZ
4027
4028static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4029{
4030 return deposit64(~b, 0, 15, a);
4031}
4032
4033static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4034{
4035 return deposit64(~b, 0, 31, a);
4036}
4037
4038static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4039{
4040 return deposit64(~b, 0, 63, a);
4041}
4042
4043RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4044RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4045RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
8a085fb2 4046GEN_VEXT_VV_ENV(vfsgnjn_vv_h)
4047GEN_VEXT_VV_ENV(vfsgnjn_vv_w)
4048GEN_VEXT_VV_ENV(vfsgnjn_vv_d)
1d426b81
LZ
4049RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4050RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4051RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
8a085fb2 4052GEN_VEXT_VF(vfsgnjn_vf_h)
4053GEN_VEXT_VF(vfsgnjn_vf_w)
4054GEN_VEXT_VF(vfsgnjn_vf_d)
1d426b81
LZ
4055
4056static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4057{
4058 return deposit64(b ^ a, 0, 15, a);
4059}
4060
4061static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4062{
4063 return deposit64(b ^ a, 0, 31, a);
4064}
4065
4066static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4067{
4068 return deposit64(b ^ a, 0, 63, a);
4069}
4070
4071RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4072RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4073RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
8a085fb2 4074GEN_VEXT_VV_ENV(vfsgnjx_vv_h)
4075GEN_VEXT_VV_ENV(vfsgnjx_vv_w)
4076GEN_VEXT_VV_ENV(vfsgnjx_vv_d)
1d426b81
LZ
4077RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4078RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4079RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
8a085fb2 4080GEN_VEXT_VF(vfsgnjx_vf_h)
4081GEN_VEXT_VF(vfsgnjx_vf_w)
4082GEN_VEXT_VF(vfsgnjx_vf_d)
2a68e9e5
LZ
4083
4084/* Vector Floating-Point Compare Instructions */
4085#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4086void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4087 CPURISCVState *env, uint32_t desc) \
4088{ \
2a68e9e5
LZ
4089 uint32_t vm = vext_vm(desc); \
4090 uint32_t vl = env->vl; \
2a68e9e5
LZ
4091 uint32_t i; \
4092 \
f714361e 4093 for (i = env->vstart; i < vl; i++) { \
2a68e9e5
LZ
4094 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4095 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4096 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
4097 continue; \
4098 } \
f9298de5 4099 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4100 DO_OP(s2, s1, &env->fp_status)); \
4101 } \
f714361e 4102 env->vstart = 0; \
2a68e9e5
LZ
4103}
4104
2a68e9e5
LZ
4105GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4106GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4107GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4108
4109#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4110void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4111 CPURISCVState *env, uint32_t desc) \
4112{ \
2a68e9e5
LZ
4113 uint32_t vm = vext_vm(desc); \
4114 uint32_t vl = env->vl; \
2a68e9e5
LZ
4115 uint32_t i; \
4116 \
f714361e 4117 for (i = env->vstart; i < vl; i++) { \
2a68e9e5 4118 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
f9298de5 4119 if (!vm && !vext_elem_mask(v0, i)) { \
2a68e9e5
LZ
4120 continue; \
4121 } \
f9298de5 4122 vext_set_elem_mask(vd, i, \
2a68e9e5
LZ
4123 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4124 } \
f714361e 4125 env->vstart = 0; \
2a68e9e5
LZ
4126}
4127
4128GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4129GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4130GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4131
4132static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4133{
4134 FloatRelation compare = float16_compare_quiet(a, b, s);
4135 return compare != float_relation_equal;
4136}
4137
4138static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4139{
4140 FloatRelation compare = float32_compare_quiet(a, b, s);
4141 return compare != float_relation_equal;
4142}
4143
4144static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4145{
4146 FloatRelation compare = float64_compare_quiet(a, b, s);
4147 return compare != float_relation_equal;
4148}
4149
4150GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4151GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4152GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4153GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4154GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4155GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4156
2a68e9e5
LZ
4157GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4158GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4159GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4160GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4161GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4162GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4163
2a68e9e5
LZ
4164GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4165GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4166GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4167GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4168GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4169GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4170
4171static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4172{
4173 FloatRelation compare = float16_compare(a, b, s);
4174 return compare == float_relation_greater;
4175}
4176
4177static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4178{
4179 FloatRelation compare = float32_compare(a, b, s);
4180 return compare == float_relation_greater;
4181}
4182
4183static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4184{
4185 FloatRelation compare = float64_compare(a, b, s);
4186 return compare == float_relation_greater;
4187}
4188
4189GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4190GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4191GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4192
4193static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4194{
4195 FloatRelation compare = float16_compare(a, b, s);
4196 return compare == float_relation_greater ||
4197 compare == float_relation_equal;
4198}
4199
4200static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4201{
4202 FloatRelation compare = float32_compare(a, b, s);
4203 return compare == float_relation_greater ||
4204 compare == float_relation_equal;
4205}
4206
4207static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4208{
4209 FloatRelation compare = float64_compare(a, b, s);
4210 return compare == float_relation_greater ||
4211 compare == float_relation_equal;
4212}
4213
4214GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4215GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4216GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4217
121ddbb3
LZ
4218/* Vector Floating-Point Classify Instruction */
4219#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4220static void do_##NAME(void *vd, void *vs2, int i) \
4221{ \
4222 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4223 *((TD *)vd + HD(i)) = OP(s2); \
4224}
4225
8a085fb2 4226#define GEN_VEXT_V(NAME) \
121ddbb3
LZ
4227void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4228 CPURISCVState *env, uint32_t desc) \
4229{ \
121ddbb3
LZ
4230 uint32_t vm = vext_vm(desc); \
4231 uint32_t vl = env->vl; \
4232 uint32_t i; \
4233 \
f714361e 4234 for (i = env->vstart; i < vl; i++) { \
f9298de5 4235 if (!vm && !vext_elem_mask(v0, i)) { \
121ddbb3
LZ
4236 continue; \
4237 } \
4238 do_##NAME(vd, vs2, i); \
4239 } \
f714361e 4240 env->vstart = 0; \
121ddbb3
LZ
4241}
4242
4243target_ulong fclass_h(uint64_t frs1)
4244{
4245 float16 f = frs1;
4246 bool sign = float16_is_neg(f);
4247
4248 if (float16_is_infinity(f)) {
4249 return sign ? 1 << 0 : 1 << 7;
4250 } else if (float16_is_zero(f)) {
4251 return sign ? 1 << 3 : 1 << 4;
4252 } else if (float16_is_zero_or_denormal(f)) {
4253 return sign ? 1 << 2 : 1 << 5;
4254 } else if (float16_is_any_nan(f)) {
4255 float_status s = { }; /* for snan_bit_is_one */
4256 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4257 } else {
4258 return sign ? 1 << 1 : 1 << 6;
4259 }
4260}
4261
4262target_ulong fclass_s(uint64_t frs1)
4263{
4264 float32 f = frs1;
4265 bool sign = float32_is_neg(f);
4266
4267 if (float32_is_infinity(f)) {
4268 return sign ? 1 << 0 : 1 << 7;
4269 } else if (float32_is_zero(f)) {
4270 return sign ? 1 << 3 : 1 << 4;
4271 } else if (float32_is_zero_or_denormal(f)) {
4272 return sign ? 1 << 2 : 1 << 5;
4273 } else if (float32_is_any_nan(f)) {
4274 float_status s = { }; /* for snan_bit_is_one */
4275 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4276 } else {
4277 return sign ? 1 << 1 : 1 << 6;
4278 }
4279}
4280
4281target_ulong fclass_d(uint64_t frs1)
4282{
4283 float64 f = frs1;
4284 bool sign = float64_is_neg(f);
4285
4286 if (float64_is_infinity(f)) {
4287 return sign ? 1 << 0 : 1 << 7;
4288 } else if (float64_is_zero(f)) {
4289 return sign ? 1 << 3 : 1 << 4;
4290 } else if (float64_is_zero_or_denormal(f)) {
4291 return sign ? 1 << 2 : 1 << 5;
4292 } else if (float64_is_any_nan(f)) {
4293 float_status s = { }; /* for snan_bit_is_one */
4294 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4295 } else {
4296 return sign ? 1 << 1 : 1 << 6;
4297 }
4298}
4299
4300RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4301RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4302RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
8a085fb2 4303GEN_VEXT_V(vfclass_v_h)
4304GEN_VEXT_V(vfclass_v_w)
4305GEN_VEXT_V(vfclass_v_d)
64ab5846
LZ
4306
4307/* Vector Floating-Point Merge Instruction */
3479a814 4308#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
64ab5846
LZ
4309void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4310 CPURISCVState *env, uint32_t desc) \
4311{ \
64ab5846
LZ
4312 uint32_t vm = vext_vm(desc); \
4313 uint32_t vl = env->vl; \
64ab5846
LZ
4314 uint32_t i; \
4315 \
f714361e 4316 for (i = env->vstart; i < vl; i++) { \
64ab5846
LZ
4317 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4318 *((ETYPE *)vd + H(i)) \
f9298de5 4319 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
64ab5846 4320 } \
f714361e 4321 env->vstart = 0; \
64ab5846
LZ
4322}
4323
3479a814
FC
4324GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4325GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4326GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
92100973
LZ
4327
4328/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4329/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4330RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4331RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4332RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
8a085fb2 4333GEN_VEXT_V_ENV(vfcvt_xu_f_v_h)
4334GEN_VEXT_V_ENV(vfcvt_xu_f_v_w)
4335GEN_VEXT_V_ENV(vfcvt_xu_f_v_d)
92100973
LZ
4336
4337/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4338RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4339RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4340RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
8a085fb2 4341GEN_VEXT_V_ENV(vfcvt_x_f_v_h)
4342GEN_VEXT_V_ENV(vfcvt_x_f_v_w)
4343GEN_VEXT_V_ENV(vfcvt_x_f_v_d)
92100973
LZ
4344
4345/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4346RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4347RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4348RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
8a085fb2 4349GEN_VEXT_V_ENV(vfcvt_f_xu_v_h)
4350GEN_VEXT_V_ENV(vfcvt_f_xu_v_w)
4351GEN_VEXT_V_ENV(vfcvt_f_xu_v_d)
92100973
LZ
4352
4353/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4354RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4355RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4356RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
8a085fb2 4357GEN_VEXT_V_ENV(vfcvt_f_x_v_h)
4358GEN_VEXT_V_ENV(vfcvt_f_x_v_w)
4359GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
4514b7b1
LZ
4360
4361/* Widening Floating-Point/Integer Type-Convert Instructions */
4362/* (TD, T2, TX2) */
3ce4c09d 4363#define WOP_UU_B uint16_t, uint8_t, uint8_t
4514b7b1
LZ
4364#define WOP_UU_H uint32_t, uint16_t, uint16_t
4365#define WOP_UU_W uint64_t, uint32_t, uint32_t
4366/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4367RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4368RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
8a085fb2 4369GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h)
4370GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w)
4514b7b1
LZ
4371
4372/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4373RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4374RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
8a085fb2 4375GEN_VEXT_V_ENV(vfwcvt_x_f_v_h)
4376GEN_VEXT_V_ENV(vfwcvt_x_f_v_w)
4514b7b1
LZ
4377
4378/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
3ce4c09d 4379RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4514b7b1
LZ
4380RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4381RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
8a085fb2 4382GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b)
4383GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h)
4384GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w)
4514b7b1
LZ
4385
4386/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
3ce4c09d 4387RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4514b7b1
LZ
4388RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4389RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
8a085fb2 4390GEN_VEXT_V_ENV(vfwcvt_f_x_v_b)
4391GEN_VEXT_V_ENV(vfwcvt_f_x_v_h)
4392GEN_VEXT_V_ENV(vfwcvt_f_x_v_w)
4514b7b1
LZ
4393
4394/*
3ce4c09d 4395 * vfwcvt.f.f.v vd, vs2, vm
4514b7b1
LZ
4396 * Convert single-width float to double-width float.
4397 */
4398static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4399{
4400 return float16_to_float32(a, true, s);
4401}
4402
4403RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4404RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
8a085fb2 4405GEN_VEXT_V_ENV(vfwcvt_f_f_v_h)
4406GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
878d406e
LZ
4407
4408/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4409/* (TD, T2, TX2) */
ff679b58 4410#define NOP_UU_B uint8_t, uint16_t, uint32_t
878d406e
LZ
4411#define NOP_UU_H uint16_t, uint32_t, uint32_t
4412#define NOP_UU_W uint32_t, uint64_t, uint64_t
4413/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
ff679b58
FC
4414RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4415RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4416RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
8a085fb2 4417GEN_VEXT_V_ENV(vfncvt_xu_f_w_b)
4418GEN_VEXT_V_ENV(vfncvt_xu_f_w_h)
4419GEN_VEXT_V_ENV(vfncvt_xu_f_w_w)
878d406e
LZ
4420
4421/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
ff679b58
FC
4422RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4423RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4424RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
8a085fb2 4425GEN_VEXT_V_ENV(vfncvt_x_f_w_b)
4426GEN_VEXT_V_ENV(vfncvt_x_f_w_h)
4427GEN_VEXT_V_ENV(vfncvt_x_f_w_w)
878d406e
LZ
4428
4429/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
ff679b58
FC
4430RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4431RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
8a085fb2 4432GEN_VEXT_V_ENV(vfncvt_f_xu_w_h)
4433GEN_VEXT_V_ENV(vfncvt_f_xu_w_w)
878d406e
LZ
4434
4435/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
ff679b58
FC
4436RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4437RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
8a085fb2 4438GEN_VEXT_V_ENV(vfncvt_f_x_w_h)
4439GEN_VEXT_V_ENV(vfncvt_f_x_w_w)
878d406e
LZ
4440
4441/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4442static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4443{
4444 return float32_to_float16(a, true, s);
4445}
4446
ff679b58
FC
4447RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4448RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
8a085fb2 4449GEN_VEXT_V_ENV(vfncvt_f_f_w_h)
4450GEN_VEXT_V_ENV(vfncvt_f_f_w_w)
fe5c9ab1
LZ
4451
4452/*
4453 *** Vector Reduction Operations
4454 */
4455/* Vector Single-Width Integer Reduction Instructions */
3479a814 4456#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
fe5c9ab1
LZ
4457void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4458 void *vs2, CPURISCVState *env, uint32_t desc) \
4459{ \
fe5c9ab1
LZ
4460 uint32_t vm = vext_vm(desc); \
4461 uint32_t vl = env->vl; \
4462 uint32_t i; \
fe5c9ab1
LZ
4463 TD s1 = *((TD *)vs1 + HD(0)); \
4464 \
f714361e 4465 for (i = env->vstart; i < vl; i++) { \
fe5c9ab1 4466 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4467 if (!vm && !vext_elem_mask(v0, i)) { \
fe5c9ab1
LZ
4468 continue; \
4469 } \
4470 s1 = OP(s1, (TD)s2); \
4471 } \
4472 *((TD *)vd + HD(0)) = s1; \
f714361e 4473 env->vstart = 0; \
fe5c9ab1
LZ
4474}
4475
4476/* vd[0] = sum(vs1[0], vs2[*]) */
3479a814
FC
4477GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4478GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4479GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4480GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
fe5c9ab1
LZ
4481
4482/* vd[0] = maxu(vs1[0], vs2[*]) */
3479a814
FC
4483GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4484GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4485GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4486GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4487
4488/* vd[0] = max(vs1[0], vs2[*]) */
3479a814
FC
4489GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4490GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4491GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4492GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
fe5c9ab1
LZ
4493
4494/* vd[0] = minu(vs1[0], vs2[*]) */
3479a814
FC
4495GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4496GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4497GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4498GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4499
4500/* vd[0] = min(vs1[0], vs2[*]) */
3479a814
FC
4501GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4502GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4503GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4504GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
fe5c9ab1
LZ
4505
4506/* vd[0] = and(vs1[0], vs2[*]) */
3479a814
FC
4507GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4508GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4509GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4510GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
fe5c9ab1
LZ
4511
4512/* vd[0] = or(vs1[0], vs2[*]) */
3479a814
FC
4513GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4514GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4515GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4516GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
fe5c9ab1
LZ
4517
4518/* vd[0] = xor(vs1[0], vs2[*]) */
3479a814
FC
4519GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4520GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4521GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4522GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
bba71820
LZ
4523
4524/* Vector Widening Integer Reduction Instructions */
4525/* signed sum reduction into double-width accumulator */
3479a814
FC
4526GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4527GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4528GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
bba71820
LZ
4529
4530/* Unsigned sum reduction into double-width accumulator */
3479a814
FC
4531GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4532GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4533GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
523547f1
LZ
4534
4535/* Vector Single-Width Floating-Point Reduction Instructions */
3479a814 4536#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
523547f1
LZ
4537void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4538 void *vs2, CPURISCVState *env, \
4539 uint32_t desc) \
4540{ \
523547f1
LZ
4541 uint32_t vm = vext_vm(desc); \
4542 uint32_t vl = env->vl; \
4543 uint32_t i; \
523547f1
LZ
4544 TD s1 = *((TD *)vs1 + HD(0)); \
4545 \
f714361e 4546 for (i = env->vstart; i < vl; i++) { \
523547f1 4547 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
f9298de5 4548 if (!vm && !vext_elem_mask(v0, i)) { \
523547f1
LZ
4549 continue; \
4550 } \
4551 s1 = OP(s1, (TD)s2, &env->fp_status); \
4552 } \
4553 *((TD *)vd + HD(0)) = s1; \
f714361e 4554 env->vstart = 0; \
523547f1
LZ
4555}
4556
4557/* Unordered sum */
3479a814
FC
4558GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4559GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4560GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
523547f1
LZ
4561
4562/* Maximum value */
08b60eeb
FC
4563GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4564GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4565GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
523547f1
LZ
4566
4567/* Minimum value */
08b60eeb
FC
4568GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4569GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4570GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
696b0c26
LZ
4571
4572/* Vector Widening Floating-Point Reduction Instructions */
4573/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4574void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4575 void *vs2, CPURISCVState *env, uint32_t desc)
4576{
696b0c26
LZ
4577 uint32_t vm = vext_vm(desc);
4578 uint32_t vl = env->vl;
4579 uint32_t i;
696b0c26
LZ
4580 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4581
f714361e 4582 for (i = env->vstart; i < vl; i++) {
696b0c26 4583 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
f9298de5 4584 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4585 continue;
4586 }
4587 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4588 &env->fp_status);
4589 }
4590 *((uint32_t *)vd + H4(0)) = s1;
f714361e 4591 env->vstart = 0;
696b0c26
LZ
4592}
4593
4594void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4595 void *vs2, CPURISCVState *env, uint32_t desc)
4596{
696b0c26
LZ
4597 uint32_t vm = vext_vm(desc);
4598 uint32_t vl = env->vl;
4599 uint32_t i;
696b0c26
LZ
4600 uint64_t s1 = *((uint64_t *)vs1);
4601
f714361e 4602 for (i = env->vstart; i < vl; i++) {
696b0c26 4603 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
f9298de5 4604 if (!vm && !vext_elem_mask(v0, i)) {
696b0c26
LZ
4605 continue;
4606 }
4607 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4608 &env->fp_status);
4609 }
4610 *((uint64_t *)vd) = s1;
f714361e 4611 env->vstart = 0;
696b0c26 4612}
c21f34ae
LZ
4613
4614/*
4615 *** Vector Mask Operations
4616 */
4617/* Vector Mask-Register Logical Instructions */
4618#define GEN_VEXT_MASK_VV(NAME, OP) \
4619void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4620 void *vs2, CPURISCVState *env, \
4621 uint32_t desc) \
4622{ \
c21f34ae
LZ
4623 uint32_t vl = env->vl; \
4624 uint32_t i; \
4625 int a, b; \
4626 \
f714361e 4627 for (i = env->vstart; i < vl; i++) { \
f9298de5
FC
4628 a = vext_elem_mask(vs1, i); \
4629 b = vext_elem_mask(vs2, i); \
4630 vext_set_elem_mask(vd, i, OP(b, a)); \
c21f34ae 4631 } \
f714361e 4632 env->vstart = 0; \
c21f34ae
LZ
4633}
4634
4635#define DO_NAND(N, M) (!(N & M))
4636#define DO_ANDNOT(N, M) (N & !M)
4637#define DO_NOR(N, M) (!(N | M))
4638#define DO_ORNOT(N, M) (N | !M)
4639#define DO_XNOR(N, M) (!(N ^ M))
4640
4641GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4642GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
9c0d2559 4643GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
c21f34ae
LZ
4644GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4645GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4646GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
9c0d2559 4647GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
c21f34ae 4648GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
2e88f551 4649
0014aa74
FC
4650/* Vector count population in mask vcpop */
4651target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4652 uint32_t desc)
2e88f551
LZ
4653{
4654 target_ulong cnt = 0;
2e88f551
LZ
4655 uint32_t vm = vext_vm(desc);
4656 uint32_t vl = env->vl;
4657 int i;
4658
f714361e 4659 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4660 if (vm || vext_elem_mask(v0, i)) {
4661 if (vext_elem_mask(vs2, i)) {
2e88f551
LZ
4662 cnt++;
4663 }
4664 }
4665 }
f714361e 4666 env->vstart = 0;
2e88f551
LZ
4667 return cnt;
4668}
0db67e1c 4669
d71a24fc
FC
4670/* vfirst find-first-set mask bit*/
4671target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4672 uint32_t desc)
0db67e1c 4673{
0db67e1c
LZ
4674 uint32_t vm = vext_vm(desc);
4675 uint32_t vl = env->vl;
4676 int i;
4677
f714361e 4678 for (i = env->vstart; i < vl; i++) {
f9298de5
FC
4679 if (vm || vext_elem_mask(v0, i)) {
4680 if (vext_elem_mask(vs2, i)) {
0db67e1c
LZ
4681 return i;
4682 }
4683 }
4684 }
f714361e 4685 env->vstart = 0;
0db67e1c
LZ
4686 return -1LL;
4687}
81fbf7da
LZ
4688
4689enum set_mask_type {
4690 ONLY_FIRST = 1,
4691 INCLUDE_FIRST,
4692 BEFORE_FIRST,
4693};
4694
4695static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4696 uint32_t desc, enum set_mask_type type)
4697{
81fbf7da
LZ
4698 uint32_t vm = vext_vm(desc);
4699 uint32_t vl = env->vl;
4700 int i;
4701 bool first_mask_bit = false;
4702
f714361e 4703 for (i = env->vstart; i < vl; i++) {
f9298de5 4704 if (!vm && !vext_elem_mask(v0, i)) {
81fbf7da
LZ
4705 continue;
4706 }
4707 /* write a zero to all following active elements */
4708 if (first_mask_bit) {
f9298de5 4709 vext_set_elem_mask(vd, i, 0);
81fbf7da
LZ
4710 continue;
4711 }
f9298de5 4712 if (vext_elem_mask(vs2, i)) {
81fbf7da
LZ
4713 first_mask_bit = true;
4714 if (type == BEFORE_FIRST) {
f9298de5 4715 vext_set_elem_mask(vd, i, 0);
81fbf7da 4716 } else {
f9298de5 4717 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4718 }
4719 } else {
4720 if (type == ONLY_FIRST) {
f9298de5 4721 vext_set_elem_mask(vd, i, 0);
81fbf7da 4722 } else {
f9298de5 4723 vext_set_elem_mask(vd, i, 1);
81fbf7da
LZ
4724 }
4725 }
4726 }
f714361e 4727 env->vstart = 0;
81fbf7da
LZ
4728}
4729
4730void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4731 uint32_t desc)
4732{
4733 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4734}
4735
4736void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4737 uint32_t desc)
4738{
4739 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4740}
4741
4742void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4743 uint32_t desc)
4744{
4745 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4746}
78d90cfe
LZ
4747
4748/* Vector Iota Instruction */
3479a814 4749#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
78d90cfe
LZ
4750void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4751 uint32_t desc) \
4752{ \
78d90cfe
LZ
4753 uint32_t vm = vext_vm(desc); \
4754 uint32_t vl = env->vl; \
4755 uint32_t sum = 0; \
4756 int i; \
4757 \
f714361e 4758 for (i = env->vstart; i < vl; i++) { \
f9298de5 4759 if (!vm && !vext_elem_mask(v0, i)) { \
78d90cfe
LZ
4760 continue; \
4761 } \
4762 *((ETYPE *)vd + H(i)) = sum; \
f9298de5 4763 if (vext_elem_mask(vs2, i)) { \
78d90cfe
LZ
4764 sum++; \
4765 } \
4766 } \
f714361e 4767 env->vstart = 0; \
78d90cfe
LZ
4768}
4769
3479a814
FC
4770GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4771GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4772GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4773GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
126bec3f
LZ
4774
4775/* Vector Element Index Instruction */
3479a814 4776#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
126bec3f
LZ
4777void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4778{ \
126bec3f
LZ
4779 uint32_t vm = vext_vm(desc); \
4780 uint32_t vl = env->vl; \
4781 int i; \
4782 \
f714361e 4783 for (i = env->vstart; i < vl; i++) { \
f9298de5 4784 if (!vm && !vext_elem_mask(v0, i)) { \
126bec3f
LZ
4785 continue; \
4786 } \
4787 *((ETYPE *)vd + H(i)) = i; \
4788 } \
f714361e 4789 env->vstart = 0; \
126bec3f
LZ
4790}
4791
3479a814
FC
4792GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4793GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4794GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4795GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
ec17e036
LZ
4796
4797/*
4798 *** Vector Permutation Instructions
4799 */
4800
4801/* Vector Slide Instructions */
3479a814 4802#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
ec17e036
LZ
4803void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4804 CPURISCVState *env, uint32_t desc) \
4805{ \
ec17e036
LZ
4806 uint32_t vm = vext_vm(desc); \
4807 uint32_t vl = env->vl; \
f714361e 4808 target_ulong offset = s1, i_min, i; \
ec17e036 4809 \
f714361e
FC
4810 i_min = MAX(env->vstart, offset); \
4811 for (i = i_min; i < vl; i++) { \
f9298de5 4812 if (!vm && !vext_elem_mask(v0, i)) { \
ec17e036
LZ
4813 continue; \
4814 } \
4815 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4816 } \
ec17e036
LZ
4817}
4818
4819/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
3479a814
FC
4820GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4821GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4822GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4823GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
ec17e036 4824
3479a814 4825#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
ec17e036
LZ
4826void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4827 CPURISCVState *env, uint32_t desc) \
4828{ \
6438ed61 4829 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
ec17e036
LZ
4830 uint32_t vm = vext_vm(desc); \
4831 uint32_t vl = env->vl; \
6438ed61 4832 target_ulong i_max, i; \
ec17e036 4833 \
f714361e
FC
4834 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4835 for (i = env->vstart; i < i_max; ++i) { \
6438ed61
FC
4836 if (vm || vext_elem_mask(v0, i)) { \
4837 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4838 } \
4839 } \
4840 \
4841 for (i = i_max; i < vl; ++i) { \
4842 if (vm || vext_elem_mask(v0, i)) { \
4843 *((ETYPE *)vd + H(i)) = 0; \
ec17e036 4844 } \
ec17e036 4845 } \
f714361e
FC
4846 \
4847 env->vstart = 0; \
ec17e036
LZ
4848}
4849
4850/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
3479a814
FC
4851GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4852GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4853GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4854GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
ec17e036 4855
c7b8a421 4856#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
4857static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4858 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 4859{ \
c7b8a421 4860 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4861 uint32_t vm = vext_vm(desc); \
4862 uint32_t vl = env->vl; \
4863 uint32_t i; \
4864 \
f714361e 4865 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
4866 if (!vm && !vext_elem_mask(v0, i)) { \
4867 continue; \
4868 } \
4869 if (i == 0) { \
4870 *((ETYPE *)vd + H(i)) = s1; \
4871 } else { \
4872 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4873 } \
4874 } \
f714361e 4875 env->vstart = 0; \
8500d4ab
FC
4876}
4877
4878GEN_VEXT_VSLIE1UP(8, H1)
4879GEN_VEXT_VSLIE1UP(16, H2)
4880GEN_VEXT_VSLIE1UP(32, H4)
4881GEN_VEXT_VSLIE1UP(64, H8)
4882
c7b8a421 4883#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
8500d4ab
FC
4884void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4885 CPURISCVState *env, uint32_t desc) \
4886{ \
c7b8a421 4887 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4888}
4889
4890/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
8500d4ab
FC
4891GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4892GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4893GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4894GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4895
c7b8a421 4896#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
4897static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4898 void *vs2, CPURISCVState *env, uint32_t desc) \
8500d4ab 4899{ \
c7b8a421 4900 typedef uint##BITWIDTH##_t ETYPE; \
8500d4ab
FC
4901 uint32_t vm = vext_vm(desc); \
4902 uint32_t vl = env->vl; \
4903 uint32_t i; \
4904 \
f714361e 4905 for (i = env->vstart; i < vl; i++) { \
8500d4ab
FC
4906 if (!vm && !vext_elem_mask(v0, i)) { \
4907 continue; \
4908 } \
4909 if (i == vl - 1) { \
4910 *((ETYPE *)vd + H(i)) = s1; \
4911 } else { \
4912 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4913 } \
4914 } \
f714361e 4915 env->vstart = 0; \
8500d4ab
FC
4916}
4917
4918GEN_VEXT_VSLIDE1DOWN(8, H1)
4919GEN_VEXT_VSLIDE1DOWN(16, H2)
4920GEN_VEXT_VSLIDE1DOWN(32, H4)
4921GEN_VEXT_VSLIDE1DOWN(64, H8)
4922
c7b8a421 4923#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
8500d4ab
FC
4924void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4925 CPURISCVState *env, uint32_t desc) \
4926{ \
c7b8a421 4927 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
ec17e036
LZ
4928}
4929
4930/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
8500d4ab
FC
4931GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4932GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4933GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4934GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4935
4936/* Vector Floating-Point Slide Instructions */
c7b8a421 4937#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
8500d4ab
FC
4938void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4939 CPURISCVState *env, uint32_t desc) \
4940{ \
c7b8a421 4941 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
4942}
4943
4944/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4945GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4946GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4947GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4948
c7b8a421 4949#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
8500d4ab
FC
4950void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4951 CPURISCVState *env, uint32_t desc) \
4952{ \
c7b8a421 4953 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
8500d4ab
FC
4954}
4955
4956/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4957GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4958GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4959GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
e4b83d5c
LZ
4960
4961/* Vector Register Gather Instruction */
50bfb45b 4962#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
e4b83d5c
LZ
4963void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4964 CPURISCVState *env, uint32_t desc) \
4965{ \
f714361e 4966 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
e4b83d5c
LZ
4967 uint32_t vm = vext_vm(desc); \
4968 uint32_t vl = env->vl; \
b11e84b8
FC
4969 uint64_t index; \
4970 uint32_t i; \
e4b83d5c 4971 \
f714361e 4972 for (i = env->vstart; i < vl; i++) { \
f9298de5 4973 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
4974 continue; \
4975 } \
50bfb45b 4976 index = *((TS1 *)vs1 + HS1(i)); \
e4b83d5c 4977 if (index >= vlmax) { \
50bfb45b 4978 *((TS2 *)vd + HS2(i)) = 0; \
e4b83d5c 4979 } else { \
50bfb45b 4980 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
e4b83d5c
LZ
4981 } \
4982 } \
f714361e 4983 env->vstart = 0; \
e4b83d5c
LZ
4984}
4985
4986/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
50bfb45b
FC
4987GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4988GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4989GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4990GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4991
4992GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
4993GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4994GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4995GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
e4b83d5c 4996
3479a814 4997#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
e4b83d5c
LZ
4998void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4999 CPURISCVState *env, uint32_t desc) \
5000{ \
5a9f8e15 5001 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
e4b83d5c
LZ
5002 uint32_t vm = vext_vm(desc); \
5003 uint32_t vl = env->vl; \
b11e84b8
FC
5004 uint64_t index = s1; \
5005 uint32_t i; \
e4b83d5c 5006 \
f714361e 5007 for (i = env->vstart; i < vl; i++) { \
f9298de5 5008 if (!vm && !vext_elem_mask(v0, i)) { \
e4b83d5c
LZ
5009 continue; \
5010 } \
5011 if (index >= vlmax) { \
5012 *((ETYPE *)vd + H(i)) = 0; \
5013 } else { \
5014 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5015 } \
5016 } \
f714361e 5017 env->vstart = 0; \
e4b83d5c
LZ
5018}
5019
5020/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
3479a814
FC
5021GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5022GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5023GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5024GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
31bf42a2
LZ
5025
5026/* Vector Compress Instruction */
3479a814 5027#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
31bf42a2
LZ
5028void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5029 CPURISCVState *env, uint32_t desc) \
5030{ \
31bf42a2
LZ
5031 uint32_t vl = env->vl; \
5032 uint32_t num = 0, i; \
5033 \
f714361e 5034 for (i = env->vstart; i < vl; i++) { \
f9298de5 5035 if (!vext_elem_mask(vs1, i)) { \
31bf42a2
LZ
5036 continue; \
5037 } \
5038 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5039 num++; \
5040 } \
f714361e 5041 env->vstart = 0; \
31bf42a2
LZ
5042}
5043
5044/* Compress into vd elements of vs2 where vs1 is enabled */
3479a814
FC
5045GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5046GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5047GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5048GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
cd01340e 5049
f714361e 5050/* Vector Whole Register Move */
f32d82f6
WL
5051void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5052{
f06193c4 5053 /* EEW = SEW */
f32d82f6 5054 uint32_t maxsz = simd_maxsz(desc);
f06193c4
WL
5055 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5056 uint32_t startb = env->vstart * sewb;
5057 uint32_t i = startb;
f32d82f6
WL
5058
5059 memcpy((uint8_t *)vd + H1(i),
5060 (uint8_t *)vs2 + H1(i),
f06193c4 5061 maxsz - startb);
f714361e 5062
f32d82f6
WL
5063 env->vstart = 0;
5064}
f714361e 5065
cd01340e
FC
5066/* Vector Integer Extension */
5067#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5068void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5069 CPURISCVState *env, uint32_t desc) \
5070{ \
5071 uint32_t vl = env->vl; \
5072 uint32_t vm = vext_vm(desc); \
5073 uint32_t i; \
5074 \
f714361e 5075 for (i = env->vstart; i < vl; i++) { \
cd01340e
FC
5076 if (!vm && !vext_elem_mask(v0, i)) { \
5077 continue; \
5078 } \
5079 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5080 } \
f714361e 5081 env->vstart = 0; \
cd01340e
FC
5082}
5083
5084GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5085GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5086GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5087GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5088GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5089GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5090
5091GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5092GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5093GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5094GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5095GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5096GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)