2 * RISC-V Vector Extension Helpers for QEMU.
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
31 target_ulong
HELPER(vsetvl
)(CPURISCVState
*env
, target_ulong s1
,
35 RISCVCPU
*cpu
= env_archcpu(env
);
36 uint64_t lmul
= FIELD_EX64(s2
, VTYPE
, VLMUL
);
37 uint16_t sew
= 8 << FIELD_EX64(s2
, VTYPE
, VSEW
);
38 uint8_t ediv
= FIELD_EX64(s2
, VTYPE
, VEDIV
);
39 int xlen
= riscv_cpu_xlen(env
);
40 bool vill
= (s2
>> (xlen
- 1)) & 0x1;
41 target_ulong reserved
= s2
&
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT
,
43 xlen
- 1 - R_VTYPE_RESERVED_SHIFT
);
46 /* Fractional LMUL. */
48 cpu
->cfg
.elen
>> (8 - lmul
) < sew
) {
53 if ((sew
> cpu
->cfg
.elen
)
57 /* only set vill bit. */
65 vlmax
= vext_get_vlmax(cpu
, s2
);
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
83 #define H1(x) ((x) ^ 7)
84 #define H1_2(x) ((x) ^ 6)
85 #define H1_4(x) ((x) ^ 4)
86 #define H2(x) ((x) ^ 3)
87 #define H4(x) ((x) ^ 1)
98 static inline uint32_t vext_nf(uint32_t desc
)
100 return FIELD_EX32(simd_data(desc
), VDATA
, NF
);
103 static inline uint32_t vext_vm(uint32_t desc
)
105 return FIELD_EX32(simd_data(desc
), VDATA
, VM
);
109 * Encode LMUL to lmul as following:
120 static inline int32_t vext_lmul(uint32_t desc
)
122 return sextract32(FIELD_EX32(simd_data(desc
), VDATA
, LMUL
), 0, 3);
125 static inline uint32_t vext_vta(uint32_t desc
)
127 return FIELD_EX32(simd_data(desc
), VDATA
, VTA
);
131 * Get the maximum number of elements can be operated.
133 * log2_esz: log2 of element size in bytes.
135 static inline uint32_t vext_max_elems(uint32_t desc
, uint32_t log2_esz
)
138 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
139 * so vlen in bytes (vlenb) is encoded as maxsz.
141 uint32_t vlenb
= simd_maxsz(desc
);
144 int scale
= vext_lmul(desc
) - log2_esz
;
145 return scale
< 0 ? vlenb
>> -scale
: vlenb
<< scale
;
149 * Get number of total elements, including prestart, body and tail elements.
150 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
151 * are held in the same vector register.
153 static inline uint32_t vext_get_total_elems(CPURISCVState
*env
, uint32_t desc
,
156 uint32_t vlenb
= simd_maxsz(desc
);
157 uint32_t sew
= 1 << FIELD_EX64(env
->vtype
, VTYPE
, VSEW
);
158 int8_t emul
= ctzl(esz
) - ctzl(sew
) + vext_lmul(desc
) < 0 ? 0 :
159 ctzl(esz
) - ctzl(sew
) + vext_lmul(desc
);
160 return (vlenb
<< emul
) / esz
;
163 static inline target_ulong
adjust_addr(CPURISCVState
*env
, target_ulong addr
)
165 return (addr
& env
->cur_pmmask
) | env
->cur_pmbase
;
169 * This function checks watchpoint before real load operation.
171 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
172 * In user mode, there is no watchpoint support now.
174 * It will trigger an exception if there is no mapping in TLB
175 * and page table walk can't fill the TLB entry. Then the guest
176 * software can return here after process the exception or never return.
178 static void probe_pages(CPURISCVState
*env
, target_ulong addr
,
179 target_ulong len
, uintptr_t ra
,
180 MMUAccessType access_type
)
182 target_ulong pagelen
= -(addr
| TARGET_PAGE_MASK
);
183 target_ulong curlen
= MIN(pagelen
, len
);
185 probe_access(env
, adjust_addr(env
, addr
), curlen
, access_type
,
186 cpu_mmu_index(env
, false), ra
);
189 curlen
= len
- curlen
;
190 probe_access(env
, adjust_addr(env
, addr
), curlen
, access_type
,
191 cpu_mmu_index(env
, false), ra
);
195 /* set agnostic elements to 1s */
196 static void vext_set_elems_1s(void *base
, uint32_t is_agnostic
, uint32_t cnt
,
199 if (is_agnostic
== 0) {
200 /* policy undisturbed */
203 if (tot
- cnt
== 0) {
206 memset(base
+ cnt
, -1, tot
- cnt
);
209 static inline void vext_set_elem_mask(void *v0
, int index
,
212 int idx
= index
/ 64;
213 int pos
= index
% 64;
214 uint64_t old
= ((uint64_t *)v0
)[idx
];
215 ((uint64_t *)v0
)[idx
] = deposit64(old
, pos
, 1, value
);
219 * Earlier designs (pre-0.9) had a varying number of bits
220 * per mask value (MLEN). In the 0.9 design, MLEN=1.
223 static inline int vext_elem_mask(void *v0
, int index
)
225 int idx
= index
/ 64;
226 int pos
= index
% 64;
227 return (((uint64_t *)v0
)[idx
] >> pos
) & 1;
230 /* elements operations for load and store */
231 typedef void vext_ldst_elem_fn(CPURISCVState
*env
, target_ulong addr
,
232 uint32_t idx
, void *vd
, uintptr_t retaddr
);
234 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
235 static void NAME(CPURISCVState *env, abi_ptr addr, \
236 uint32_t idx, void *vd, uintptr_t retaddr)\
238 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
239 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
242 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
243 GEN_VEXT_LD_ELEM(lde_h
, int16_t, H2
, ldsw
)
244 GEN_VEXT_LD_ELEM(lde_w
, int32_t, H4
, ldl
)
245 GEN_VEXT_LD_ELEM(lde_d
, int64_t, H8
, ldq
)
247 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
248 static void NAME(CPURISCVState *env, abi_ptr addr, \
249 uint32_t idx, void *vd, uintptr_t retaddr)\
251 ETYPE data = *((ETYPE *)vd + H(idx)); \
252 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
255 GEN_VEXT_ST_ELEM(ste_b
, int8_t, H1
, stb
)
256 GEN_VEXT_ST_ELEM(ste_h
, int16_t, H2
, stw
)
257 GEN_VEXT_ST_ELEM(ste_w
, int32_t, H4
, stl
)
258 GEN_VEXT_ST_ELEM(ste_d
, int64_t, H8
, stq
)
261 *** stride: access vector element from strided memory
264 vext_ldst_stride(void *vd
, void *v0
, target_ulong base
,
265 target_ulong stride
, CPURISCVState
*env
,
266 uint32_t desc
, uint32_t vm
,
267 vext_ldst_elem_fn
*ldst_elem
,
268 uint32_t log2_esz
, uintptr_t ra
)
271 uint32_t nf
= vext_nf(desc
);
272 uint32_t max_elems
= vext_max_elems(desc
, log2_esz
);
273 uint32_t esz
= 1 << log2_esz
;
274 uint32_t total_elems
= vext_get_total_elems(env
, desc
, esz
);
275 uint32_t vta
= vext_vta(desc
);
277 for (i
= env
->vstart
; i
< env
->vl
; i
++, env
->vstart
++) {
278 if (!vm
&& !vext_elem_mask(v0
, i
)) {
284 target_ulong addr
= base
+ stride
* i
+ (k
<< log2_esz
);
285 ldst_elem(env
, adjust_addr(env
, addr
), i
+ k
* max_elems
, vd
, ra
);
290 /* set tail elements to 1s */
291 for (k
= 0; k
< nf
; ++k
) {
292 vext_set_elems_1s(vd
, vta
, (k
* max_elems
+ env
->vl
) * esz
,
293 (k
* max_elems
+ max_elems
) * esz
);
295 if (nf
* max_elems
% total_elems
!= 0) {
296 uint32_t vlenb
= env_archcpu(env
)->cfg
.vlen
>> 3;
297 uint32_t registers_used
=
298 ((nf
* max_elems
) * esz
+ (vlenb
- 1)) / vlenb
;
299 vext_set_elems_1s(vd
, vta
, (nf
* max_elems
) * esz
,
300 registers_used
* vlenb
);
304 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
305 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
306 target_ulong stride, CPURISCVState *env, \
309 uint32_t vm = vext_vm(desc); \
310 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
311 ctzl(sizeof(ETYPE)), GETPC()); \
314 GEN_VEXT_LD_STRIDE(vlse8_v
, int8_t, lde_b
)
315 GEN_VEXT_LD_STRIDE(vlse16_v
, int16_t, lde_h
)
316 GEN_VEXT_LD_STRIDE(vlse32_v
, int32_t, lde_w
)
317 GEN_VEXT_LD_STRIDE(vlse64_v
, int64_t, lde_d
)
319 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
320 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
321 target_ulong stride, CPURISCVState *env, \
324 uint32_t vm = vext_vm(desc); \
325 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
326 ctzl(sizeof(ETYPE)), GETPC()); \
329 GEN_VEXT_ST_STRIDE(vsse8_v
, int8_t, ste_b
)
330 GEN_VEXT_ST_STRIDE(vsse16_v
, int16_t, ste_h
)
331 GEN_VEXT_ST_STRIDE(vsse32_v
, int32_t, ste_w
)
332 GEN_VEXT_ST_STRIDE(vsse64_v
, int64_t, ste_d
)
335 *** unit-stride: access elements stored contiguously in memory
338 /* unmasked unit-stride load and store operation*/
340 vext_ldst_us(void *vd
, target_ulong base
, CPURISCVState
*env
, uint32_t desc
,
341 vext_ldst_elem_fn
*ldst_elem
, uint32_t log2_esz
, uint32_t evl
,
345 uint32_t nf
= vext_nf(desc
);
346 uint32_t max_elems
= vext_max_elems(desc
, log2_esz
);
347 uint32_t esz
= 1 << log2_esz
;
348 uint32_t total_elems
= vext_get_total_elems(env
, desc
, esz
);
349 uint32_t vta
= vext_vta(desc
);
351 /* load bytes from guest memory */
352 for (i
= env
->vstart
; i
< evl
; i
++, env
->vstart
++) {
355 target_ulong addr
= base
+ ((i
* nf
+ k
) << log2_esz
);
356 ldst_elem(env
, adjust_addr(env
, addr
), i
+ k
* max_elems
, vd
, ra
);
361 /* set tail elements to 1s */
362 for (k
= 0; k
< nf
; ++k
) {
363 vext_set_elems_1s(vd
, vta
, (k
* max_elems
+ evl
) * esz
,
364 (k
* max_elems
+ max_elems
) * esz
);
366 if (nf
* max_elems
% total_elems
!= 0) {
367 uint32_t vlenb
= env_archcpu(env
)->cfg
.vlen
>> 3;
368 uint32_t registers_used
=
369 ((nf
* max_elems
) * esz
+ (vlenb
- 1)) / vlenb
;
370 vext_set_elems_1s(vd
, vta
, (nf
* max_elems
) * esz
,
371 registers_used
* vlenb
);
376 * masked unit-stride load and store operation will be a special case of stride,
377 * stride = NF * sizeof (MTYPE)
380 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
381 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
382 CPURISCVState *env, uint32_t desc) \
384 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
385 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
386 ctzl(sizeof(ETYPE)), GETPC()); \
389 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
390 CPURISCVState *env, uint32_t desc) \
392 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
393 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
396 GEN_VEXT_LD_US(vle8_v
, int8_t, lde_b
)
397 GEN_VEXT_LD_US(vle16_v
, int16_t, lde_h
)
398 GEN_VEXT_LD_US(vle32_v
, int32_t, lde_w
)
399 GEN_VEXT_LD_US(vle64_v
, int64_t, lde_d
)
401 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
402 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
403 CPURISCVState *env, uint32_t desc) \
405 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
406 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
407 ctzl(sizeof(ETYPE)), GETPC()); \
410 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
411 CPURISCVState *env, uint32_t desc) \
413 vext_ldst_us(vd, base, env, desc, STORE_FN, \
414 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
417 GEN_VEXT_ST_US(vse8_v
, int8_t, ste_b
)
418 GEN_VEXT_ST_US(vse16_v
, int16_t, ste_h
)
419 GEN_VEXT_ST_US(vse32_v
, int32_t, ste_w
)
420 GEN_VEXT_ST_US(vse64_v
, int64_t, ste_d
)
423 *** unit stride mask load and store, EEW = 1
425 void HELPER(vlm_v
)(void *vd
, void *v0
, target_ulong base
,
426 CPURISCVState
*env
, uint32_t desc
)
428 /* evl = ceil(vl/8) */
429 uint8_t evl
= (env
->vl
+ 7) >> 3;
430 vext_ldst_us(vd
, base
, env
, desc
, lde_b
,
434 void HELPER(vsm_v
)(void *vd
, void *v0
, target_ulong base
,
435 CPURISCVState
*env
, uint32_t desc
)
437 /* evl = ceil(vl/8) */
438 uint8_t evl
= (env
->vl
+ 7) >> 3;
439 vext_ldst_us(vd
, base
, env
, desc
, ste_b
,
444 *** index: access vector element from indexed memory
446 typedef target_ulong
vext_get_index_addr(target_ulong base
,
447 uint32_t idx
, void *vs2
);
449 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
450 static target_ulong NAME(target_ulong base, \
451 uint32_t idx, void *vs2) \
453 return (base + *((ETYPE *)vs2 + H(idx))); \
456 GEN_VEXT_GET_INDEX_ADDR(idx_b
, uint8_t, H1
)
457 GEN_VEXT_GET_INDEX_ADDR(idx_h
, uint16_t, H2
)
458 GEN_VEXT_GET_INDEX_ADDR(idx_w
, uint32_t, H4
)
459 GEN_VEXT_GET_INDEX_ADDR(idx_d
, uint64_t, H8
)
462 vext_ldst_index(void *vd
, void *v0
, target_ulong base
,
463 void *vs2
, CPURISCVState
*env
, uint32_t desc
,
464 vext_get_index_addr get_index_addr
,
465 vext_ldst_elem_fn
*ldst_elem
,
466 uint32_t log2_esz
, uintptr_t ra
)
469 uint32_t nf
= vext_nf(desc
);
470 uint32_t vm
= vext_vm(desc
);
471 uint32_t max_elems
= vext_max_elems(desc
, log2_esz
);
472 uint32_t esz
= 1 << log2_esz
;
473 uint32_t total_elems
= vext_get_total_elems(env
, desc
, esz
);
474 uint32_t vta
= vext_vta(desc
);
476 /* load bytes from guest memory */
477 for (i
= env
->vstart
; i
< env
->vl
; i
++, env
->vstart
++) {
478 if (!vm
&& !vext_elem_mask(v0
, i
)) {
484 abi_ptr addr
= get_index_addr(base
, i
, vs2
) + (k
<< log2_esz
);
485 ldst_elem(env
, adjust_addr(env
, addr
), i
+ k
* max_elems
, vd
, ra
);
490 /* set tail elements to 1s */
491 for (k
= 0; k
< nf
; ++k
) {
492 vext_set_elems_1s(vd
, vta
, (k
* max_elems
+ env
->vl
) * esz
,
493 (k
* max_elems
+ max_elems
) * esz
);
495 if (nf
* max_elems
% total_elems
!= 0) {
496 uint32_t vlenb
= env_archcpu(env
)->cfg
.vlen
>> 3;
497 uint32_t registers_used
=
498 ((nf
* max_elems
) * esz
+ (vlenb
- 1)) / vlenb
;
499 vext_set_elems_1s(vd
, vta
, (nf
* max_elems
) * esz
,
500 registers_used
* vlenb
);
504 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
505 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
506 void *vs2, CPURISCVState *env, uint32_t desc) \
508 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
509 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
512 GEN_VEXT_LD_INDEX(vlxei8_8_v
, int8_t, idx_b
, lde_b
)
513 GEN_VEXT_LD_INDEX(vlxei8_16_v
, int16_t, idx_b
, lde_h
)
514 GEN_VEXT_LD_INDEX(vlxei8_32_v
, int32_t, idx_b
, lde_w
)
515 GEN_VEXT_LD_INDEX(vlxei8_64_v
, int64_t, idx_b
, lde_d
)
516 GEN_VEXT_LD_INDEX(vlxei16_8_v
, int8_t, idx_h
, lde_b
)
517 GEN_VEXT_LD_INDEX(vlxei16_16_v
, int16_t, idx_h
, lde_h
)
518 GEN_VEXT_LD_INDEX(vlxei16_32_v
, int32_t, idx_h
, lde_w
)
519 GEN_VEXT_LD_INDEX(vlxei16_64_v
, int64_t, idx_h
, lde_d
)
520 GEN_VEXT_LD_INDEX(vlxei32_8_v
, int8_t, idx_w
, lde_b
)
521 GEN_VEXT_LD_INDEX(vlxei32_16_v
, int16_t, idx_w
, lde_h
)
522 GEN_VEXT_LD_INDEX(vlxei32_32_v
, int32_t, idx_w
, lde_w
)
523 GEN_VEXT_LD_INDEX(vlxei32_64_v
, int64_t, idx_w
, lde_d
)
524 GEN_VEXT_LD_INDEX(vlxei64_8_v
, int8_t, idx_d
, lde_b
)
525 GEN_VEXT_LD_INDEX(vlxei64_16_v
, int16_t, idx_d
, lde_h
)
526 GEN_VEXT_LD_INDEX(vlxei64_32_v
, int32_t, idx_d
, lde_w
)
527 GEN_VEXT_LD_INDEX(vlxei64_64_v
, int64_t, idx_d
, lde_d
)
529 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
530 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
531 void *vs2, CPURISCVState *env, uint32_t desc) \
533 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
534 STORE_FN, ctzl(sizeof(ETYPE)), \
538 GEN_VEXT_ST_INDEX(vsxei8_8_v
, int8_t, idx_b
, ste_b
)
539 GEN_VEXT_ST_INDEX(vsxei8_16_v
, int16_t, idx_b
, ste_h
)
540 GEN_VEXT_ST_INDEX(vsxei8_32_v
, int32_t, idx_b
, ste_w
)
541 GEN_VEXT_ST_INDEX(vsxei8_64_v
, int64_t, idx_b
, ste_d
)
542 GEN_VEXT_ST_INDEX(vsxei16_8_v
, int8_t, idx_h
, ste_b
)
543 GEN_VEXT_ST_INDEX(vsxei16_16_v
, int16_t, idx_h
, ste_h
)
544 GEN_VEXT_ST_INDEX(vsxei16_32_v
, int32_t, idx_h
, ste_w
)
545 GEN_VEXT_ST_INDEX(vsxei16_64_v
, int64_t, idx_h
, ste_d
)
546 GEN_VEXT_ST_INDEX(vsxei32_8_v
, int8_t, idx_w
, ste_b
)
547 GEN_VEXT_ST_INDEX(vsxei32_16_v
, int16_t, idx_w
, ste_h
)
548 GEN_VEXT_ST_INDEX(vsxei32_32_v
, int32_t, idx_w
, ste_w
)
549 GEN_VEXT_ST_INDEX(vsxei32_64_v
, int64_t, idx_w
, ste_d
)
550 GEN_VEXT_ST_INDEX(vsxei64_8_v
, int8_t, idx_d
, ste_b
)
551 GEN_VEXT_ST_INDEX(vsxei64_16_v
, int16_t, idx_d
, ste_h
)
552 GEN_VEXT_ST_INDEX(vsxei64_32_v
, int32_t, idx_d
, ste_w
)
553 GEN_VEXT_ST_INDEX(vsxei64_64_v
, int64_t, idx_d
, ste_d
)
556 *** unit-stride fault-only-fisrt load instructions
559 vext_ldff(void *vd
, void *v0
, target_ulong base
,
560 CPURISCVState
*env
, uint32_t desc
,
561 vext_ldst_elem_fn
*ldst_elem
,
562 uint32_t log2_esz
, uintptr_t ra
)
565 uint32_t i
, k
, vl
= 0;
566 uint32_t nf
= vext_nf(desc
);
567 uint32_t vm
= vext_vm(desc
);
568 uint32_t max_elems
= vext_max_elems(desc
, log2_esz
);
569 uint32_t esz
= 1 << log2_esz
;
570 uint32_t total_elems
= vext_get_total_elems(env
, desc
, esz
);
571 uint32_t vta
= vext_vta(desc
);
572 target_ulong addr
, offset
, remain
;
574 /* probe every access*/
575 for (i
= env
->vstart
; i
< env
->vl
; i
++) {
576 if (!vm
&& !vext_elem_mask(v0
, i
)) {
579 addr
= adjust_addr(env
, base
+ i
* (nf
<< log2_esz
));
581 probe_pages(env
, addr
, nf
<< log2_esz
, ra
, MMU_DATA_LOAD
);
583 /* if it triggers an exception, no need to check watchpoint */
584 remain
= nf
<< log2_esz
;
586 offset
= -(addr
| TARGET_PAGE_MASK
);
587 host
= tlb_vaddr_to_host(env
, addr
, MMU_DATA_LOAD
,
588 cpu_mmu_index(env
, false));
590 #ifdef CONFIG_USER_ONLY
591 if (page_check_range(addr
, offset
, PAGE_READ
) < 0) {
596 probe_pages(env
, addr
, offset
, ra
, MMU_DATA_LOAD
);
602 if (remain
<= offset
) {
606 addr
= adjust_addr(env
, addr
+ offset
);
611 /* load bytes from guest memory */
615 for (i
= env
->vstart
; i
< env
->vl
; i
++) {
617 if (!vm
&& !vext_elem_mask(v0
, i
)) {
621 target_ulong addr
= base
+ ((i
* nf
+ k
) << log2_esz
);
622 ldst_elem(env
, adjust_addr(env
, addr
), i
+ k
* max_elems
, vd
, ra
);
627 /* set tail elements to 1s */
628 for (k
= 0; k
< nf
; ++k
) {
629 vext_set_elems_1s(vd
, vta
, (k
* max_elems
+ env
->vl
) * esz
,
630 (k
* max_elems
+ max_elems
) * esz
);
632 if (nf
* max_elems
% total_elems
!= 0) {
633 uint32_t vlenb
= env_archcpu(env
)->cfg
.vlen
>> 3;
634 uint32_t registers_used
=
635 ((nf
* max_elems
) * esz
+ (vlenb
- 1)) / vlenb
;
636 vext_set_elems_1s(vd
, vta
, (nf
* max_elems
) * esz
,
637 registers_used
* vlenb
);
641 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
642 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
643 CPURISCVState *env, uint32_t desc) \
645 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
646 ctzl(sizeof(ETYPE)), GETPC()); \
649 GEN_VEXT_LDFF(vle8ff_v
, int8_t, lde_b
)
650 GEN_VEXT_LDFF(vle16ff_v
, int16_t, lde_h
)
651 GEN_VEXT_LDFF(vle32ff_v
, int32_t, lde_w
)
652 GEN_VEXT_LDFF(vle64ff_v
, int64_t, lde_d
)
654 #define DO_SWAP(N, M) (M)
655 #define DO_AND(N, M) (N & M)
656 #define DO_XOR(N, M) (N ^ M)
657 #define DO_OR(N, M) (N | M)
658 #define DO_ADD(N, M) (N + M)
661 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
662 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
664 /* Unsigned min/max */
665 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
666 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
669 *** load and store whole register instructions
672 vext_ldst_whole(void *vd
, target_ulong base
, CPURISCVState
*env
, uint32_t desc
,
673 vext_ldst_elem_fn
*ldst_elem
, uint32_t log2_esz
, uintptr_t ra
)
675 uint32_t i
, k
, off
, pos
;
676 uint32_t nf
= vext_nf(desc
);
677 uint32_t vlenb
= env_archcpu(env
)->cfg
.vlen
>> 3;
678 uint32_t max_elems
= vlenb
>> log2_esz
;
680 k
= env
->vstart
/ max_elems
;
681 off
= env
->vstart
% max_elems
;
684 /* load/store rest of elements of current segment pointed by vstart */
685 for (pos
= off
; pos
< max_elems
; pos
++, env
->vstart
++) {
686 target_ulong addr
= base
+ ((pos
+ k
* max_elems
) << log2_esz
);
687 ldst_elem(env
, adjust_addr(env
, addr
), pos
+ k
* max_elems
, vd
, ra
);
692 /* load/store elements for rest of segments */
693 for (; k
< nf
; k
++) {
694 for (i
= 0; i
< max_elems
; i
++, env
->vstart
++) {
695 target_ulong addr
= base
+ ((i
+ k
* max_elems
) << log2_esz
);
696 ldst_elem(env
, adjust_addr(env
, addr
), i
+ k
* max_elems
, vd
, ra
);
703 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
704 void HELPER(NAME)(void *vd, target_ulong base, \
705 CPURISCVState *env, uint32_t desc) \
707 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
708 ctzl(sizeof(ETYPE)), GETPC()); \
711 GEN_VEXT_LD_WHOLE(vl1re8_v
, int8_t, lde_b
)
712 GEN_VEXT_LD_WHOLE(vl1re16_v
, int16_t, lde_h
)
713 GEN_VEXT_LD_WHOLE(vl1re32_v
, int32_t, lde_w
)
714 GEN_VEXT_LD_WHOLE(vl1re64_v
, int64_t, lde_d
)
715 GEN_VEXT_LD_WHOLE(vl2re8_v
, int8_t, lde_b
)
716 GEN_VEXT_LD_WHOLE(vl2re16_v
, int16_t, lde_h
)
717 GEN_VEXT_LD_WHOLE(vl2re32_v
, int32_t, lde_w
)
718 GEN_VEXT_LD_WHOLE(vl2re64_v
, int64_t, lde_d
)
719 GEN_VEXT_LD_WHOLE(vl4re8_v
, int8_t, lde_b
)
720 GEN_VEXT_LD_WHOLE(vl4re16_v
, int16_t, lde_h
)
721 GEN_VEXT_LD_WHOLE(vl4re32_v
, int32_t, lde_w
)
722 GEN_VEXT_LD_WHOLE(vl4re64_v
, int64_t, lde_d
)
723 GEN_VEXT_LD_WHOLE(vl8re8_v
, int8_t, lde_b
)
724 GEN_VEXT_LD_WHOLE(vl8re16_v
, int16_t, lde_h
)
725 GEN_VEXT_LD_WHOLE(vl8re32_v
, int32_t, lde_w
)
726 GEN_VEXT_LD_WHOLE(vl8re64_v
, int64_t, lde_d
)
728 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
729 void HELPER(NAME)(void *vd, target_ulong base, \
730 CPURISCVState *env, uint32_t desc) \
732 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
733 ctzl(sizeof(ETYPE)), GETPC()); \
736 GEN_VEXT_ST_WHOLE(vs1r_v
, int8_t, ste_b
)
737 GEN_VEXT_ST_WHOLE(vs2r_v
, int8_t, ste_b
)
738 GEN_VEXT_ST_WHOLE(vs4r_v
, int8_t, ste_b
)
739 GEN_VEXT_ST_WHOLE(vs8r_v
, int8_t, ste_b
)
742 *** Vector Integer Arithmetic Instructions
745 /* expand macro args before macro */
746 #define RVVCALL(macro, ...) macro(__VA_ARGS__)
748 /* (TD, T1, T2, TX1, TX2) */
749 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
750 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
751 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
752 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
753 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
754 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
755 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
756 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
757 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
758 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
759 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
760 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
761 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
762 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
763 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
764 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
765 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
766 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
767 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
768 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
769 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
770 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
771 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
772 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
773 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
774 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
775 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
776 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
777 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
778 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
780 /* operation of two vector elements */
781 typedef void opivv2_fn(void *vd
, void *vs1
, void *vs2
, int i
);
783 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
784 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
786 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
787 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
788 *((TD *)vd + HD(i)) = OP(s2, s1); \
790 #define DO_SUB(N, M) (N - M)
791 #define DO_RSUB(N, M) (M - N)
793 RVVCALL(OPIVV2
, vadd_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_ADD
)
794 RVVCALL(OPIVV2
, vadd_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_ADD
)
795 RVVCALL(OPIVV2
, vadd_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_ADD
)
796 RVVCALL(OPIVV2
, vadd_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_ADD
)
797 RVVCALL(OPIVV2
, vsub_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_SUB
)
798 RVVCALL(OPIVV2
, vsub_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_SUB
)
799 RVVCALL(OPIVV2
, vsub_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_SUB
)
800 RVVCALL(OPIVV2
, vsub_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_SUB
)
802 static void do_vext_vv(void *vd
, void *v0
, void *vs1
, void *vs2
,
803 CPURISCVState
*env
, uint32_t desc
,
804 opivv2_fn
*fn
, uint32_t esz
)
806 uint32_t vm
= vext_vm(desc
);
807 uint32_t vl
= env
->vl
;
808 uint32_t total_elems
= vext_get_total_elems(env
, desc
, esz
);
809 uint32_t vta
= vext_vta(desc
);
812 for (i
= env
->vstart
; i
< vl
; i
++) {
813 if (!vm
&& !vext_elem_mask(v0
, i
)) {
819 /* set tail elements to 1s */
820 vext_set_elems_1s(vd
, vta
, vl
* esz
, total_elems
* esz
);
823 /* generate the helpers for OPIVV */
824 #define GEN_VEXT_VV(NAME, ESZ) \
825 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
826 void *vs2, CPURISCVState *env, \
829 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
833 GEN_VEXT_VV(vadd_vv_b
, 1)
834 GEN_VEXT_VV(vadd_vv_h
, 2)
835 GEN_VEXT_VV(vadd_vv_w
, 4)
836 GEN_VEXT_VV(vadd_vv_d
, 8)
837 GEN_VEXT_VV(vsub_vv_b
, 1)
838 GEN_VEXT_VV(vsub_vv_h
, 2)
839 GEN_VEXT_VV(vsub_vv_w
, 4)
840 GEN_VEXT_VV(vsub_vv_d
, 8)
842 typedef void opivx2_fn(void *vd
, target_long s1
, void *vs2
, int i
);
845 * (T1)s1 gives the real operator type.
846 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
848 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
849 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
851 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
852 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
855 RVVCALL(OPIVX2
, vadd_vx_b
, OP_SSS_B
, H1
, H1
, DO_ADD
)
856 RVVCALL(OPIVX2
, vadd_vx_h
, OP_SSS_H
, H2
, H2
, DO_ADD
)
857 RVVCALL(OPIVX2
, vadd_vx_w
, OP_SSS_W
, H4
, H4
, DO_ADD
)
858 RVVCALL(OPIVX2
, vadd_vx_d
, OP_SSS_D
, H8
, H8
, DO_ADD
)
859 RVVCALL(OPIVX2
, vsub_vx_b
, OP_SSS_B
, H1
, H1
, DO_SUB
)
860 RVVCALL(OPIVX2
, vsub_vx_h
, OP_SSS_H
, H2
, H2
, DO_SUB
)
861 RVVCALL(OPIVX2
, vsub_vx_w
, OP_SSS_W
, H4
, H4
, DO_SUB
)
862 RVVCALL(OPIVX2
, vsub_vx_d
, OP_SSS_D
, H8
, H8
, DO_SUB
)
863 RVVCALL(OPIVX2
, vrsub_vx_b
, OP_SSS_B
, H1
, H1
, DO_RSUB
)
864 RVVCALL(OPIVX2
, vrsub_vx_h
, OP_SSS_H
, H2
, H2
, DO_RSUB
)
865 RVVCALL(OPIVX2
, vrsub_vx_w
, OP_SSS_W
, H4
, H4
, DO_RSUB
)
866 RVVCALL(OPIVX2
, vrsub_vx_d
, OP_SSS_D
, H8
, H8
, DO_RSUB
)
868 static void do_vext_vx(void *vd
, void *v0
, target_long s1
, void *vs2
,
869 CPURISCVState
*env
, uint32_t desc
,
872 uint32_t vm
= vext_vm(desc
);
873 uint32_t vl
= env
->vl
;
876 for (i
= env
->vstart
; i
< vl
; i
++) {
877 if (!vm
&& !vext_elem_mask(v0
, i
)) {
885 /* generate the helpers for OPIVX */
886 #define GEN_VEXT_VX(NAME) \
887 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
888 void *vs2, CPURISCVState *env, \
891 do_vext_vx(vd, v0, s1, vs2, env, desc, \
895 GEN_VEXT_VX(vadd_vx_b
)
896 GEN_VEXT_VX(vadd_vx_h
)
897 GEN_VEXT_VX(vadd_vx_w
)
898 GEN_VEXT_VX(vadd_vx_d
)
899 GEN_VEXT_VX(vsub_vx_b
)
900 GEN_VEXT_VX(vsub_vx_h
)
901 GEN_VEXT_VX(vsub_vx_w
)
902 GEN_VEXT_VX(vsub_vx_d
)
903 GEN_VEXT_VX(vrsub_vx_b
)
904 GEN_VEXT_VX(vrsub_vx_h
)
905 GEN_VEXT_VX(vrsub_vx_w
)
906 GEN_VEXT_VX(vrsub_vx_d
)
908 void HELPER(vec_rsubs8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
910 intptr_t oprsz
= simd_oprsz(desc
);
913 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
914 *(uint8_t *)(d
+ i
) = (uint8_t)b
- *(uint8_t *)(a
+ i
);
918 void HELPER(vec_rsubs16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
920 intptr_t oprsz
= simd_oprsz(desc
);
923 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
924 *(uint16_t *)(d
+ i
) = (uint16_t)b
- *(uint16_t *)(a
+ i
);
928 void HELPER(vec_rsubs32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
930 intptr_t oprsz
= simd_oprsz(desc
);
933 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
934 *(uint32_t *)(d
+ i
) = (uint32_t)b
- *(uint32_t *)(a
+ i
);
938 void HELPER(vec_rsubs64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
940 intptr_t oprsz
= simd_oprsz(desc
);
943 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
944 *(uint64_t *)(d
+ i
) = b
- *(uint64_t *)(a
+ i
);
948 /* Vector Widening Integer Add/Subtract */
949 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
950 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
951 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
952 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
953 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
954 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
955 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
956 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
957 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
958 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
959 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
960 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
961 RVVCALL(OPIVV2
, vwaddu_vv_b
, WOP_UUU_B
, H2
, H1
, H1
, DO_ADD
)
962 RVVCALL(OPIVV2
, vwaddu_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, DO_ADD
)
963 RVVCALL(OPIVV2
, vwaddu_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, DO_ADD
)
964 RVVCALL(OPIVV2
, vwsubu_vv_b
, WOP_UUU_B
, H2
, H1
, H1
, DO_SUB
)
965 RVVCALL(OPIVV2
, vwsubu_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, DO_SUB
)
966 RVVCALL(OPIVV2
, vwsubu_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, DO_SUB
)
967 RVVCALL(OPIVV2
, vwadd_vv_b
, WOP_SSS_B
, H2
, H1
, H1
, DO_ADD
)
968 RVVCALL(OPIVV2
, vwadd_vv_h
, WOP_SSS_H
, H4
, H2
, H2
, DO_ADD
)
969 RVVCALL(OPIVV2
, vwadd_vv_w
, WOP_SSS_W
, H8
, H4
, H4
, DO_ADD
)
970 RVVCALL(OPIVV2
, vwsub_vv_b
, WOP_SSS_B
, H2
, H1
, H1
, DO_SUB
)
971 RVVCALL(OPIVV2
, vwsub_vv_h
, WOP_SSS_H
, H4
, H2
, H2
, DO_SUB
)
972 RVVCALL(OPIVV2
, vwsub_vv_w
, WOP_SSS_W
, H8
, H4
, H4
, DO_SUB
)
973 RVVCALL(OPIVV2
, vwaddu_wv_b
, WOP_WUUU_B
, H2
, H1
, H1
, DO_ADD
)
974 RVVCALL(OPIVV2
, vwaddu_wv_h
, WOP_WUUU_H
, H4
, H2
, H2
, DO_ADD
)
975 RVVCALL(OPIVV2
, vwaddu_wv_w
, WOP_WUUU_W
, H8
, H4
, H4
, DO_ADD
)
976 RVVCALL(OPIVV2
, vwsubu_wv_b
, WOP_WUUU_B
, H2
, H1
, H1
, DO_SUB
)
977 RVVCALL(OPIVV2
, vwsubu_wv_h
, WOP_WUUU_H
, H4
, H2
, H2
, DO_SUB
)
978 RVVCALL(OPIVV2
, vwsubu_wv_w
, WOP_WUUU_W
, H8
, H4
, H4
, DO_SUB
)
979 RVVCALL(OPIVV2
, vwadd_wv_b
, WOP_WSSS_B
, H2
, H1
, H1
, DO_ADD
)
980 RVVCALL(OPIVV2
, vwadd_wv_h
, WOP_WSSS_H
, H4
, H2
, H2
, DO_ADD
)
981 RVVCALL(OPIVV2
, vwadd_wv_w
, WOP_WSSS_W
, H8
, H4
, H4
, DO_ADD
)
982 RVVCALL(OPIVV2
, vwsub_wv_b
, WOP_WSSS_B
, H2
, H1
, H1
, DO_SUB
)
983 RVVCALL(OPIVV2
, vwsub_wv_h
, WOP_WSSS_H
, H4
, H2
, H2
, DO_SUB
)
984 RVVCALL(OPIVV2
, vwsub_wv_w
, WOP_WSSS_W
, H8
, H4
, H4
, DO_SUB
)
985 GEN_VEXT_VV(vwaddu_vv_b
, 2)
986 GEN_VEXT_VV(vwaddu_vv_h
, 4)
987 GEN_VEXT_VV(vwaddu_vv_w
, 8)
988 GEN_VEXT_VV(vwsubu_vv_b
, 2)
989 GEN_VEXT_VV(vwsubu_vv_h
, 4)
990 GEN_VEXT_VV(vwsubu_vv_w
, 8)
991 GEN_VEXT_VV(vwadd_vv_b
, 2)
992 GEN_VEXT_VV(vwadd_vv_h
, 4)
993 GEN_VEXT_VV(vwadd_vv_w
, 8)
994 GEN_VEXT_VV(vwsub_vv_b
, 2)
995 GEN_VEXT_VV(vwsub_vv_h
, 4)
996 GEN_VEXT_VV(vwsub_vv_w
, 8)
997 GEN_VEXT_VV(vwaddu_wv_b
, 2)
998 GEN_VEXT_VV(vwaddu_wv_h
, 4)
999 GEN_VEXT_VV(vwaddu_wv_w
, 8)
1000 GEN_VEXT_VV(vwsubu_wv_b
, 2)
1001 GEN_VEXT_VV(vwsubu_wv_h
, 4)
1002 GEN_VEXT_VV(vwsubu_wv_w
, 8)
1003 GEN_VEXT_VV(vwadd_wv_b
, 2)
1004 GEN_VEXT_VV(vwadd_wv_h
, 4)
1005 GEN_VEXT_VV(vwadd_wv_w
, 8)
1006 GEN_VEXT_VV(vwsub_wv_b
, 2)
1007 GEN_VEXT_VV(vwsub_wv_h
, 4)
1008 GEN_VEXT_VV(vwsub_wv_w
, 8)
1010 RVVCALL(OPIVX2
, vwaddu_vx_b
, WOP_UUU_B
, H2
, H1
, DO_ADD
)
1011 RVVCALL(OPIVX2
, vwaddu_vx_h
, WOP_UUU_H
, H4
, H2
, DO_ADD
)
1012 RVVCALL(OPIVX2
, vwaddu_vx_w
, WOP_UUU_W
, H8
, H4
, DO_ADD
)
1013 RVVCALL(OPIVX2
, vwsubu_vx_b
, WOP_UUU_B
, H2
, H1
, DO_SUB
)
1014 RVVCALL(OPIVX2
, vwsubu_vx_h
, WOP_UUU_H
, H4
, H2
, DO_SUB
)
1015 RVVCALL(OPIVX2
, vwsubu_vx_w
, WOP_UUU_W
, H8
, H4
, DO_SUB
)
1016 RVVCALL(OPIVX2
, vwadd_vx_b
, WOP_SSS_B
, H2
, H1
, DO_ADD
)
1017 RVVCALL(OPIVX2
, vwadd_vx_h
, WOP_SSS_H
, H4
, H2
, DO_ADD
)
1018 RVVCALL(OPIVX2
, vwadd_vx_w
, WOP_SSS_W
, H8
, H4
, DO_ADD
)
1019 RVVCALL(OPIVX2
, vwsub_vx_b
, WOP_SSS_B
, H2
, H1
, DO_SUB
)
1020 RVVCALL(OPIVX2
, vwsub_vx_h
, WOP_SSS_H
, H4
, H2
, DO_SUB
)
1021 RVVCALL(OPIVX2
, vwsub_vx_w
, WOP_SSS_W
, H8
, H4
, DO_SUB
)
1022 RVVCALL(OPIVX2
, vwaddu_wx_b
, WOP_WUUU_B
, H2
, H1
, DO_ADD
)
1023 RVVCALL(OPIVX2
, vwaddu_wx_h
, WOP_WUUU_H
, H4
, H2
, DO_ADD
)
1024 RVVCALL(OPIVX2
, vwaddu_wx_w
, WOP_WUUU_W
, H8
, H4
, DO_ADD
)
1025 RVVCALL(OPIVX2
, vwsubu_wx_b
, WOP_WUUU_B
, H2
, H1
, DO_SUB
)
1026 RVVCALL(OPIVX2
, vwsubu_wx_h
, WOP_WUUU_H
, H4
, H2
, DO_SUB
)
1027 RVVCALL(OPIVX2
, vwsubu_wx_w
, WOP_WUUU_W
, H8
, H4
, DO_SUB
)
1028 RVVCALL(OPIVX2
, vwadd_wx_b
, WOP_WSSS_B
, H2
, H1
, DO_ADD
)
1029 RVVCALL(OPIVX2
, vwadd_wx_h
, WOP_WSSS_H
, H4
, H2
, DO_ADD
)
1030 RVVCALL(OPIVX2
, vwadd_wx_w
, WOP_WSSS_W
, H8
, H4
, DO_ADD
)
1031 RVVCALL(OPIVX2
, vwsub_wx_b
, WOP_WSSS_B
, H2
, H1
, DO_SUB
)
1032 RVVCALL(OPIVX2
, vwsub_wx_h
, WOP_WSSS_H
, H4
, H2
, DO_SUB
)
1033 RVVCALL(OPIVX2
, vwsub_wx_w
, WOP_WSSS_W
, H8
, H4
, DO_SUB
)
1034 GEN_VEXT_VX(vwaddu_vx_b
)
1035 GEN_VEXT_VX(vwaddu_vx_h
)
1036 GEN_VEXT_VX(vwaddu_vx_w
)
1037 GEN_VEXT_VX(vwsubu_vx_b
)
1038 GEN_VEXT_VX(vwsubu_vx_h
)
1039 GEN_VEXT_VX(vwsubu_vx_w
)
1040 GEN_VEXT_VX(vwadd_vx_b
)
1041 GEN_VEXT_VX(vwadd_vx_h
)
1042 GEN_VEXT_VX(vwadd_vx_w
)
1043 GEN_VEXT_VX(vwsub_vx_b
)
1044 GEN_VEXT_VX(vwsub_vx_h
)
1045 GEN_VEXT_VX(vwsub_vx_w
)
1046 GEN_VEXT_VX(vwaddu_wx_b
)
1047 GEN_VEXT_VX(vwaddu_wx_h
)
1048 GEN_VEXT_VX(vwaddu_wx_w
)
1049 GEN_VEXT_VX(vwsubu_wx_b
)
1050 GEN_VEXT_VX(vwsubu_wx_h
)
1051 GEN_VEXT_VX(vwsubu_wx_w
)
1052 GEN_VEXT_VX(vwadd_wx_b
)
1053 GEN_VEXT_VX(vwadd_wx_h
)
1054 GEN_VEXT_VX(vwadd_wx_w
)
1055 GEN_VEXT_VX(vwsub_wx_b
)
1056 GEN_VEXT_VX(vwsub_wx_h
)
1057 GEN_VEXT_VX(vwsub_wx_w
)
1059 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1060 #define DO_VADC(N, M, C) (N + M + C)
1061 #define DO_VSBC(N, M, C) (N - M - C)
1063 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1064 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1065 CPURISCVState *env, uint32_t desc) \
1067 uint32_t vl = env->vl; \
1070 for (i = env->vstart; i < vl; i++) { \
1071 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1072 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1073 ETYPE carry = vext_elem_mask(v0, i); \
1075 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1080 GEN_VEXT_VADC_VVM(vadc_vvm_b
, uint8_t, H1
, DO_VADC
)
1081 GEN_VEXT_VADC_VVM(vadc_vvm_h
, uint16_t, H2
, DO_VADC
)
1082 GEN_VEXT_VADC_VVM(vadc_vvm_w
, uint32_t, H4
, DO_VADC
)
1083 GEN_VEXT_VADC_VVM(vadc_vvm_d
, uint64_t, H8
, DO_VADC
)
1085 GEN_VEXT_VADC_VVM(vsbc_vvm_b
, uint8_t, H1
, DO_VSBC
)
1086 GEN_VEXT_VADC_VVM(vsbc_vvm_h
, uint16_t, H2
, DO_VSBC
)
1087 GEN_VEXT_VADC_VVM(vsbc_vvm_w
, uint32_t, H4
, DO_VSBC
)
1088 GEN_VEXT_VADC_VVM(vsbc_vvm_d
, uint64_t, H8
, DO_VSBC
)
1090 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1091 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1092 CPURISCVState *env, uint32_t desc) \
1094 uint32_t vl = env->vl; \
1097 for (i = env->vstart; i < vl; i++) { \
1098 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1099 ETYPE carry = vext_elem_mask(v0, i); \
1101 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1106 GEN_VEXT_VADC_VXM(vadc_vxm_b
, uint8_t, H1
, DO_VADC
)
1107 GEN_VEXT_VADC_VXM(vadc_vxm_h
, uint16_t, H2
, DO_VADC
)
1108 GEN_VEXT_VADC_VXM(vadc_vxm_w
, uint32_t, H4
, DO_VADC
)
1109 GEN_VEXT_VADC_VXM(vadc_vxm_d
, uint64_t, H8
, DO_VADC
)
1111 GEN_VEXT_VADC_VXM(vsbc_vxm_b
, uint8_t, H1
, DO_VSBC
)
1112 GEN_VEXT_VADC_VXM(vsbc_vxm_h
, uint16_t, H2
, DO_VSBC
)
1113 GEN_VEXT_VADC_VXM(vsbc_vxm_w
, uint32_t, H4
, DO_VSBC
)
1114 GEN_VEXT_VADC_VXM(vsbc_vxm_d
, uint64_t, H8
, DO_VSBC
)
1116 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1117 (__typeof(N))(N + M) < N)
1118 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1120 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1121 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1122 CPURISCVState *env, uint32_t desc) \
1124 uint32_t vl = env->vl; \
1125 uint32_t vm = vext_vm(desc); \
1128 for (i = env->vstart; i < vl; i++) { \
1129 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1130 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1131 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1132 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1137 GEN_VEXT_VMADC_VVM(vmadc_vvm_b
, uint8_t, H1
, DO_MADC
)
1138 GEN_VEXT_VMADC_VVM(vmadc_vvm_h
, uint16_t, H2
, DO_MADC
)
1139 GEN_VEXT_VMADC_VVM(vmadc_vvm_w
, uint32_t, H4
, DO_MADC
)
1140 GEN_VEXT_VMADC_VVM(vmadc_vvm_d
, uint64_t, H8
, DO_MADC
)
1142 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b
, uint8_t, H1
, DO_MSBC
)
1143 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h
, uint16_t, H2
, DO_MSBC
)
1144 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w
, uint32_t, H4
, DO_MSBC
)
1145 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d
, uint64_t, H8
, DO_MSBC
)
1147 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1148 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1149 void *vs2, CPURISCVState *env, uint32_t desc) \
1151 uint32_t vl = env->vl; \
1152 uint32_t vm = vext_vm(desc); \
1155 for (i = env->vstart; i < vl; i++) { \
1156 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1157 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1158 vext_set_elem_mask(vd, i, \
1159 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1164 GEN_VEXT_VMADC_VXM(vmadc_vxm_b
, uint8_t, H1
, DO_MADC
)
1165 GEN_VEXT_VMADC_VXM(vmadc_vxm_h
, uint16_t, H2
, DO_MADC
)
1166 GEN_VEXT_VMADC_VXM(vmadc_vxm_w
, uint32_t, H4
, DO_MADC
)
1167 GEN_VEXT_VMADC_VXM(vmadc_vxm_d
, uint64_t, H8
, DO_MADC
)
1169 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b
, uint8_t, H1
, DO_MSBC
)
1170 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h
, uint16_t, H2
, DO_MSBC
)
1171 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w
, uint32_t, H4
, DO_MSBC
)
1172 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d
, uint64_t, H8
, DO_MSBC
)
1174 /* Vector Bitwise Logical Instructions */
1175 RVVCALL(OPIVV2
, vand_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_AND
)
1176 RVVCALL(OPIVV2
, vand_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_AND
)
1177 RVVCALL(OPIVV2
, vand_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_AND
)
1178 RVVCALL(OPIVV2
, vand_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_AND
)
1179 RVVCALL(OPIVV2
, vor_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_OR
)
1180 RVVCALL(OPIVV2
, vor_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_OR
)
1181 RVVCALL(OPIVV2
, vor_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_OR
)
1182 RVVCALL(OPIVV2
, vor_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_OR
)
1183 RVVCALL(OPIVV2
, vxor_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_XOR
)
1184 RVVCALL(OPIVV2
, vxor_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_XOR
)
1185 RVVCALL(OPIVV2
, vxor_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_XOR
)
1186 RVVCALL(OPIVV2
, vxor_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_XOR
)
1187 GEN_VEXT_VV(vand_vv_b
, 1)
1188 GEN_VEXT_VV(vand_vv_h
, 2)
1189 GEN_VEXT_VV(vand_vv_w
, 4)
1190 GEN_VEXT_VV(vand_vv_d
, 8)
1191 GEN_VEXT_VV(vor_vv_b
, 1)
1192 GEN_VEXT_VV(vor_vv_h
, 2)
1193 GEN_VEXT_VV(vor_vv_w
, 4)
1194 GEN_VEXT_VV(vor_vv_d
, 8)
1195 GEN_VEXT_VV(vxor_vv_b
, 1)
1196 GEN_VEXT_VV(vxor_vv_h
, 2)
1197 GEN_VEXT_VV(vxor_vv_w
, 4)
1198 GEN_VEXT_VV(vxor_vv_d
, 8)
1200 RVVCALL(OPIVX2
, vand_vx_b
, OP_SSS_B
, H1
, H1
, DO_AND
)
1201 RVVCALL(OPIVX2
, vand_vx_h
, OP_SSS_H
, H2
, H2
, DO_AND
)
1202 RVVCALL(OPIVX2
, vand_vx_w
, OP_SSS_W
, H4
, H4
, DO_AND
)
1203 RVVCALL(OPIVX2
, vand_vx_d
, OP_SSS_D
, H8
, H8
, DO_AND
)
1204 RVVCALL(OPIVX2
, vor_vx_b
, OP_SSS_B
, H1
, H1
, DO_OR
)
1205 RVVCALL(OPIVX2
, vor_vx_h
, OP_SSS_H
, H2
, H2
, DO_OR
)
1206 RVVCALL(OPIVX2
, vor_vx_w
, OP_SSS_W
, H4
, H4
, DO_OR
)
1207 RVVCALL(OPIVX2
, vor_vx_d
, OP_SSS_D
, H8
, H8
, DO_OR
)
1208 RVVCALL(OPIVX2
, vxor_vx_b
, OP_SSS_B
, H1
, H1
, DO_XOR
)
1209 RVVCALL(OPIVX2
, vxor_vx_h
, OP_SSS_H
, H2
, H2
, DO_XOR
)
1210 RVVCALL(OPIVX2
, vxor_vx_w
, OP_SSS_W
, H4
, H4
, DO_XOR
)
1211 RVVCALL(OPIVX2
, vxor_vx_d
, OP_SSS_D
, H8
, H8
, DO_XOR
)
1212 GEN_VEXT_VX(vand_vx_b
)
1213 GEN_VEXT_VX(vand_vx_h
)
1214 GEN_VEXT_VX(vand_vx_w
)
1215 GEN_VEXT_VX(vand_vx_d
)
1216 GEN_VEXT_VX(vor_vx_b
)
1217 GEN_VEXT_VX(vor_vx_h
)
1218 GEN_VEXT_VX(vor_vx_w
)
1219 GEN_VEXT_VX(vor_vx_d
)
1220 GEN_VEXT_VX(vxor_vx_b
)
1221 GEN_VEXT_VX(vxor_vx_h
)
1222 GEN_VEXT_VX(vxor_vx_w
)
1223 GEN_VEXT_VX(vxor_vx_d
)
1225 /* Vector Single-Width Bit Shift Instructions */
1226 #define DO_SLL(N, M) (N << (M))
1227 #define DO_SRL(N, M) (N >> (M))
1229 /* generate the helpers for shift instructions with two vector operators */
1230 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1231 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1232 void *vs2, CPURISCVState *env, uint32_t desc) \
1234 uint32_t vm = vext_vm(desc); \
1235 uint32_t vl = env->vl; \
1238 for (i = env->vstart; i < vl; i++) { \
1239 if (!vm && !vext_elem_mask(v0, i)) { \
1242 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1243 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1244 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1249 GEN_VEXT_SHIFT_VV(vsll_vv_b
, uint8_t, uint8_t, H1
, H1
, DO_SLL
, 0x7)
1250 GEN_VEXT_SHIFT_VV(vsll_vv_h
, uint16_t, uint16_t, H2
, H2
, DO_SLL
, 0xf)
1251 GEN_VEXT_SHIFT_VV(vsll_vv_w
, uint32_t, uint32_t, H4
, H4
, DO_SLL
, 0x1f)
1252 GEN_VEXT_SHIFT_VV(vsll_vv_d
, uint64_t, uint64_t, H8
, H8
, DO_SLL
, 0x3f)
1254 GEN_VEXT_SHIFT_VV(vsrl_vv_b
, uint8_t, uint8_t, H1
, H1
, DO_SRL
, 0x7)
1255 GEN_VEXT_SHIFT_VV(vsrl_vv_h
, uint16_t, uint16_t, H2
, H2
, DO_SRL
, 0xf)
1256 GEN_VEXT_SHIFT_VV(vsrl_vv_w
, uint32_t, uint32_t, H4
, H4
, DO_SRL
, 0x1f)
1257 GEN_VEXT_SHIFT_VV(vsrl_vv_d
, uint64_t, uint64_t, H8
, H8
, DO_SRL
, 0x3f)
1259 GEN_VEXT_SHIFT_VV(vsra_vv_b
, uint8_t, int8_t, H1
, H1
, DO_SRL
, 0x7)
1260 GEN_VEXT_SHIFT_VV(vsra_vv_h
, uint16_t, int16_t, H2
, H2
, DO_SRL
, 0xf)
1261 GEN_VEXT_SHIFT_VV(vsra_vv_w
, uint32_t, int32_t, H4
, H4
, DO_SRL
, 0x1f)
1262 GEN_VEXT_SHIFT_VV(vsra_vv_d
, uint64_t, int64_t, H8
, H8
, DO_SRL
, 0x3f)
1264 /* generate the helpers for shift instructions with one vector and one scalar */
1265 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1266 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1267 void *vs2, CPURISCVState *env, uint32_t desc) \
1269 uint32_t vm = vext_vm(desc); \
1270 uint32_t vl = env->vl; \
1273 for (i = env->vstart; i < vl; i++) { \
1274 if (!vm && !vext_elem_mask(v0, i)) { \
1277 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1278 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1283 GEN_VEXT_SHIFT_VX(vsll_vx_b
, uint8_t, int8_t, H1
, H1
, DO_SLL
, 0x7)
1284 GEN_VEXT_SHIFT_VX(vsll_vx_h
, uint16_t, int16_t, H2
, H2
, DO_SLL
, 0xf)
1285 GEN_VEXT_SHIFT_VX(vsll_vx_w
, uint32_t, int32_t, H4
, H4
, DO_SLL
, 0x1f)
1286 GEN_VEXT_SHIFT_VX(vsll_vx_d
, uint64_t, int64_t, H8
, H8
, DO_SLL
, 0x3f)
1288 GEN_VEXT_SHIFT_VX(vsrl_vx_b
, uint8_t, uint8_t, H1
, H1
, DO_SRL
, 0x7)
1289 GEN_VEXT_SHIFT_VX(vsrl_vx_h
, uint16_t, uint16_t, H2
, H2
, DO_SRL
, 0xf)
1290 GEN_VEXT_SHIFT_VX(vsrl_vx_w
, uint32_t, uint32_t, H4
, H4
, DO_SRL
, 0x1f)
1291 GEN_VEXT_SHIFT_VX(vsrl_vx_d
, uint64_t, uint64_t, H8
, H8
, DO_SRL
, 0x3f)
1293 GEN_VEXT_SHIFT_VX(vsra_vx_b
, int8_t, int8_t, H1
, H1
, DO_SRL
, 0x7)
1294 GEN_VEXT_SHIFT_VX(vsra_vx_h
, int16_t, int16_t, H2
, H2
, DO_SRL
, 0xf)
1295 GEN_VEXT_SHIFT_VX(vsra_vx_w
, int32_t, int32_t, H4
, H4
, DO_SRL
, 0x1f)
1296 GEN_VEXT_SHIFT_VX(vsra_vx_d
, int64_t, int64_t, H8
, H8
, DO_SRL
, 0x3f)
1298 /* Vector Narrowing Integer Right Shift Instructions */
1299 GEN_VEXT_SHIFT_VV(vnsrl_wv_b
, uint8_t, uint16_t, H1
, H2
, DO_SRL
, 0xf)
1300 GEN_VEXT_SHIFT_VV(vnsrl_wv_h
, uint16_t, uint32_t, H2
, H4
, DO_SRL
, 0x1f)
1301 GEN_VEXT_SHIFT_VV(vnsrl_wv_w
, uint32_t, uint64_t, H4
, H8
, DO_SRL
, 0x3f)
1302 GEN_VEXT_SHIFT_VV(vnsra_wv_b
, uint8_t, int16_t, H1
, H2
, DO_SRL
, 0xf)
1303 GEN_VEXT_SHIFT_VV(vnsra_wv_h
, uint16_t, int32_t, H2
, H4
, DO_SRL
, 0x1f)
1304 GEN_VEXT_SHIFT_VV(vnsra_wv_w
, uint32_t, int64_t, H4
, H8
, DO_SRL
, 0x3f)
1305 GEN_VEXT_SHIFT_VX(vnsrl_wx_b
, uint8_t, uint16_t, H1
, H2
, DO_SRL
, 0xf)
1306 GEN_VEXT_SHIFT_VX(vnsrl_wx_h
, uint16_t, uint32_t, H2
, H4
, DO_SRL
, 0x1f)
1307 GEN_VEXT_SHIFT_VX(vnsrl_wx_w
, uint32_t, uint64_t, H4
, H8
, DO_SRL
, 0x3f)
1308 GEN_VEXT_SHIFT_VX(vnsra_wx_b
, int8_t, int16_t, H1
, H2
, DO_SRL
, 0xf)
1309 GEN_VEXT_SHIFT_VX(vnsra_wx_h
, int16_t, int32_t, H2
, H4
, DO_SRL
, 0x1f)
1310 GEN_VEXT_SHIFT_VX(vnsra_wx_w
, int32_t, int64_t, H4
, H8
, DO_SRL
, 0x3f)
1312 /* Vector Integer Comparison Instructions */
1313 #define DO_MSEQ(N, M) (N == M)
1314 #define DO_MSNE(N, M) (N != M)
1315 #define DO_MSLT(N, M) (N < M)
1316 #define DO_MSLE(N, M) (N <= M)
1317 #define DO_MSGT(N, M) (N > M)
1319 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1320 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1321 CPURISCVState *env, uint32_t desc) \
1323 uint32_t vm = vext_vm(desc); \
1324 uint32_t vl = env->vl; \
1327 for (i = env->vstart; i < vl; i++) { \
1328 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1329 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1330 if (!vm && !vext_elem_mask(v0, i)) { \
1333 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1338 GEN_VEXT_CMP_VV(vmseq_vv_b
, uint8_t, H1
, DO_MSEQ
)
1339 GEN_VEXT_CMP_VV(vmseq_vv_h
, uint16_t, H2
, DO_MSEQ
)
1340 GEN_VEXT_CMP_VV(vmseq_vv_w
, uint32_t, H4
, DO_MSEQ
)
1341 GEN_VEXT_CMP_VV(vmseq_vv_d
, uint64_t, H8
, DO_MSEQ
)
1343 GEN_VEXT_CMP_VV(vmsne_vv_b
, uint8_t, H1
, DO_MSNE
)
1344 GEN_VEXT_CMP_VV(vmsne_vv_h
, uint16_t, H2
, DO_MSNE
)
1345 GEN_VEXT_CMP_VV(vmsne_vv_w
, uint32_t, H4
, DO_MSNE
)
1346 GEN_VEXT_CMP_VV(vmsne_vv_d
, uint64_t, H8
, DO_MSNE
)
1348 GEN_VEXT_CMP_VV(vmsltu_vv_b
, uint8_t, H1
, DO_MSLT
)
1349 GEN_VEXT_CMP_VV(vmsltu_vv_h
, uint16_t, H2
, DO_MSLT
)
1350 GEN_VEXT_CMP_VV(vmsltu_vv_w
, uint32_t, H4
, DO_MSLT
)
1351 GEN_VEXT_CMP_VV(vmsltu_vv_d
, uint64_t, H8
, DO_MSLT
)
1353 GEN_VEXT_CMP_VV(vmslt_vv_b
, int8_t, H1
, DO_MSLT
)
1354 GEN_VEXT_CMP_VV(vmslt_vv_h
, int16_t, H2
, DO_MSLT
)
1355 GEN_VEXT_CMP_VV(vmslt_vv_w
, int32_t, H4
, DO_MSLT
)
1356 GEN_VEXT_CMP_VV(vmslt_vv_d
, int64_t, H8
, DO_MSLT
)
1358 GEN_VEXT_CMP_VV(vmsleu_vv_b
, uint8_t, H1
, DO_MSLE
)
1359 GEN_VEXT_CMP_VV(vmsleu_vv_h
, uint16_t, H2
, DO_MSLE
)
1360 GEN_VEXT_CMP_VV(vmsleu_vv_w
, uint32_t, H4
, DO_MSLE
)
1361 GEN_VEXT_CMP_VV(vmsleu_vv_d
, uint64_t, H8
, DO_MSLE
)
1363 GEN_VEXT_CMP_VV(vmsle_vv_b
, int8_t, H1
, DO_MSLE
)
1364 GEN_VEXT_CMP_VV(vmsle_vv_h
, int16_t, H2
, DO_MSLE
)
1365 GEN_VEXT_CMP_VV(vmsle_vv_w
, int32_t, H4
, DO_MSLE
)
1366 GEN_VEXT_CMP_VV(vmsle_vv_d
, int64_t, H8
, DO_MSLE
)
1368 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1369 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1370 CPURISCVState *env, uint32_t desc) \
1372 uint32_t vm = vext_vm(desc); \
1373 uint32_t vl = env->vl; \
1376 for (i = env->vstart; i < vl; i++) { \
1377 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1378 if (!vm && !vext_elem_mask(v0, i)) { \
1381 vext_set_elem_mask(vd, i, \
1382 DO_OP(s2, (ETYPE)(target_long)s1)); \
1387 GEN_VEXT_CMP_VX(vmseq_vx_b
, uint8_t, H1
, DO_MSEQ
)
1388 GEN_VEXT_CMP_VX(vmseq_vx_h
, uint16_t, H2
, DO_MSEQ
)
1389 GEN_VEXT_CMP_VX(vmseq_vx_w
, uint32_t, H4
, DO_MSEQ
)
1390 GEN_VEXT_CMP_VX(vmseq_vx_d
, uint64_t, H8
, DO_MSEQ
)
1392 GEN_VEXT_CMP_VX(vmsne_vx_b
, uint8_t, H1
, DO_MSNE
)
1393 GEN_VEXT_CMP_VX(vmsne_vx_h
, uint16_t, H2
, DO_MSNE
)
1394 GEN_VEXT_CMP_VX(vmsne_vx_w
, uint32_t, H4
, DO_MSNE
)
1395 GEN_VEXT_CMP_VX(vmsne_vx_d
, uint64_t, H8
, DO_MSNE
)
1397 GEN_VEXT_CMP_VX(vmsltu_vx_b
, uint8_t, H1
, DO_MSLT
)
1398 GEN_VEXT_CMP_VX(vmsltu_vx_h
, uint16_t, H2
, DO_MSLT
)
1399 GEN_VEXT_CMP_VX(vmsltu_vx_w
, uint32_t, H4
, DO_MSLT
)
1400 GEN_VEXT_CMP_VX(vmsltu_vx_d
, uint64_t, H8
, DO_MSLT
)
1402 GEN_VEXT_CMP_VX(vmslt_vx_b
, int8_t, H1
, DO_MSLT
)
1403 GEN_VEXT_CMP_VX(vmslt_vx_h
, int16_t, H2
, DO_MSLT
)
1404 GEN_VEXT_CMP_VX(vmslt_vx_w
, int32_t, H4
, DO_MSLT
)
1405 GEN_VEXT_CMP_VX(vmslt_vx_d
, int64_t, H8
, DO_MSLT
)
1407 GEN_VEXT_CMP_VX(vmsleu_vx_b
, uint8_t, H1
, DO_MSLE
)
1408 GEN_VEXT_CMP_VX(vmsleu_vx_h
, uint16_t, H2
, DO_MSLE
)
1409 GEN_VEXT_CMP_VX(vmsleu_vx_w
, uint32_t, H4
, DO_MSLE
)
1410 GEN_VEXT_CMP_VX(vmsleu_vx_d
, uint64_t, H8
, DO_MSLE
)
1412 GEN_VEXT_CMP_VX(vmsle_vx_b
, int8_t, H1
, DO_MSLE
)
1413 GEN_VEXT_CMP_VX(vmsle_vx_h
, int16_t, H2
, DO_MSLE
)
1414 GEN_VEXT_CMP_VX(vmsle_vx_w
, int32_t, H4
, DO_MSLE
)
1415 GEN_VEXT_CMP_VX(vmsle_vx_d
, int64_t, H8
, DO_MSLE
)
1417 GEN_VEXT_CMP_VX(vmsgtu_vx_b
, uint8_t, H1
, DO_MSGT
)
1418 GEN_VEXT_CMP_VX(vmsgtu_vx_h
, uint16_t, H2
, DO_MSGT
)
1419 GEN_VEXT_CMP_VX(vmsgtu_vx_w
, uint32_t, H4
, DO_MSGT
)
1420 GEN_VEXT_CMP_VX(vmsgtu_vx_d
, uint64_t, H8
, DO_MSGT
)
1422 GEN_VEXT_CMP_VX(vmsgt_vx_b
, int8_t, H1
, DO_MSGT
)
1423 GEN_VEXT_CMP_VX(vmsgt_vx_h
, int16_t, H2
, DO_MSGT
)
1424 GEN_VEXT_CMP_VX(vmsgt_vx_w
, int32_t, H4
, DO_MSGT
)
1425 GEN_VEXT_CMP_VX(vmsgt_vx_d
, int64_t, H8
, DO_MSGT
)
1427 /* Vector Integer Min/Max Instructions */
1428 RVVCALL(OPIVV2
, vminu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, DO_MIN
)
1429 RVVCALL(OPIVV2
, vminu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, DO_MIN
)
1430 RVVCALL(OPIVV2
, vminu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, DO_MIN
)
1431 RVVCALL(OPIVV2
, vminu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, DO_MIN
)
1432 RVVCALL(OPIVV2
, vmin_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_MIN
)
1433 RVVCALL(OPIVV2
, vmin_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_MIN
)
1434 RVVCALL(OPIVV2
, vmin_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_MIN
)
1435 RVVCALL(OPIVV2
, vmin_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_MIN
)
1436 RVVCALL(OPIVV2
, vmaxu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, DO_MAX
)
1437 RVVCALL(OPIVV2
, vmaxu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, DO_MAX
)
1438 RVVCALL(OPIVV2
, vmaxu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, DO_MAX
)
1439 RVVCALL(OPIVV2
, vmaxu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, DO_MAX
)
1440 RVVCALL(OPIVV2
, vmax_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_MAX
)
1441 RVVCALL(OPIVV2
, vmax_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_MAX
)
1442 RVVCALL(OPIVV2
, vmax_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_MAX
)
1443 RVVCALL(OPIVV2
, vmax_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_MAX
)
1444 GEN_VEXT_VV(vminu_vv_b
, 1)
1445 GEN_VEXT_VV(vminu_vv_h
, 2)
1446 GEN_VEXT_VV(vminu_vv_w
, 4)
1447 GEN_VEXT_VV(vminu_vv_d
, 8)
1448 GEN_VEXT_VV(vmin_vv_b
, 1)
1449 GEN_VEXT_VV(vmin_vv_h
, 2)
1450 GEN_VEXT_VV(vmin_vv_w
, 4)
1451 GEN_VEXT_VV(vmin_vv_d
, 8)
1452 GEN_VEXT_VV(vmaxu_vv_b
, 1)
1453 GEN_VEXT_VV(vmaxu_vv_h
, 2)
1454 GEN_VEXT_VV(vmaxu_vv_w
, 4)
1455 GEN_VEXT_VV(vmaxu_vv_d
, 8)
1456 GEN_VEXT_VV(vmax_vv_b
, 1)
1457 GEN_VEXT_VV(vmax_vv_h
, 2)
1458 GEN_VEXT_VV(vmax_vv_w
, 4)
1459 GEN_VEXT_VV(vmax_vv_d
, 8)
1461 RVVCALL(OPIVX2
, vminu_vx_b
, OP_UUU_B
, H1
, H1
, DO_MIN
)
1462 RVVCALL(OPIVX2
, vminu_vx_h
, OP_UUU_H
, H2
, H2
, DO_MIN
)
1463 RVVCALL(OPIVX2
, vminu_vx_w
, OP_UUU_W
, H4
, H4
, DO_MIN
)
1464 RVVCALL(OPIVX2
, vminu_vx_d
, OP_UUU_D
, H8
, H8
, DO_MIN
)
1465 RVVCALL(OPIVX2
, vmin_vx_b
, OP_SSS_B
, H1
, H1
, DO_MIN
)
1466 RVVCALL(OPIVX2
, vmin_vx_h
, OP_SSS_H
, H2
, H2
, DO_MIN
)
1467 RVVCALL(OPIVX2
, vmin_vx_w
, OP_SSS_W
, H4
, H4
, DO_MIN
)
1468 RVVCALL(OPIVX2
, vmin_vx_d
, OP_SSS_D
, H8
, H8
, DO_MIN
)
1469 RVVCALL(OPIVX2
, vmaxu_vx_b
, OP_UUU_B
, H1
, H1
, DO_MAX
)
1470 RVVCALL(OPIVX2
, vmaxu_vx_h
, OP_UUU_H
, H2
, H2
, DO_MAX
)
1471 RVVCALL(OPIVX2
, vmaxu_vx_w
, OP_UUU_W
, H4
, H4
, DO_MAX
)
1472 RVVCALL(OPIVX2
, vmaxu_vx_d
, OP_UUU_D
, H8
, H8
, DO_MAX
)
1473 RVVCALL(OPIVX2
, vmax_vx_b
, OP_SSS_B
, H1
, H1
, DO_MAX
)
1474 RVVCALL(OPIVX2
, vmax_vx_h
, OP_SSS_H
, H2
, H2
, DO_MAX
)
1475 RVVCALL(OPIVX2
, vmax_vx_w
, OP_SSS_W
, H4
, H4
, DO_MAX
)
1476 RVVCALL(OPIVX2
, vmax_vx_d
, OP_SSS_D
, H8
, H8
, DO_MAX
)
1477 GEN_VEXT_VX(vminu_vx_b
)
1478 GEN_VEXT_VX(vminu_vx_h
)
1479 GEN_VEXT_VX(vminu_vx_w
)
1480 GEN_VEXT_VX(vminu_vx_d
)
1481 GEN_VEXT_VX(vmin_vx_b
)
1482 GEN_VEXT_VX(vmin_vx_h
)
1483 GEN_VEXT_VX(vmin_vx_w
)
1484 GEN_VEXT_VX(vmin_vx_d
)
1485 GEN_VEXT_VX(vmaxu_vx_b
)
1486 GEN_VEXT_VX(vmaxu_vx_h
)
1487 GEN_VEXT_VX(vmaxu_vx_w
)
1488 GEN_VEXT_VX(vmaxu_vx_d
)
1489 GEN_VEXT_VX(vmax_vx_b
)
1490 GEN_VEXT_VX(vmax_vx_h
)
1491 GEN_VEXT_VX(vmax_vx_w
)
1492 GEN_VEXT_VX(vmax_vx_d
)
1494 /* Vector Single-Width Integer Multiply Instructions */
1495 #define DO_MUL(N, M) (N * M)
1496 RVVCALL(OPIVV2
, vmul_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_MUL
)
1497 RVVCALL(OPIVV2
, vmul_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_MUL
)
1498 RVVCALL(OPIVV2
, vmul_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_MUL
)
1499 RVVCALL(OPIVV2
, vmul_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_MUL
)
1500 GEN_VEXT_VV(vmul_vv_b
, 1)
1501 GEN_VEXT_VV(vmul_vv_h
, 2)
1502 GEN_VEXT_VV(vmul_vv_w
, 4)
1503 GEN_VEXT_VV(vmul_vv_d
, 8)
1505 static int8_t do_mulh_b(int8_t s2
, int8_t s1
)
1507 return (int16_t)s2
* (int16_t)s1
>> 8;
1510 static int16_t do_mulh_h(int16_t s2
, int16_t s1
)
1512 return (int32_t)s2
* (int32_t)s1
>> 16;
1515 static int32_t do_mulh_w(int32_t s2
, int32_t s1
)
1517 return (int64_t)s2
* (int64_t)s1
>> 32;
1520 static int64_t do_mulh_d(int64_t s2
, int64_t s1
)
1522 uint64_t hi_64
, lo_64
;
1524 muls64(&lo_64
, &hi_64
, s1
, s2
);
1528 static uint8_t do_mulhu_b(uint8_t s2
, uint8_t s1
)
1530 return (uint16_t)s2
* (uint16_t)s1
>> 8;
1533 static uint16_t do_mulhu_h(uint16_t s2
, uint16_t s1
)
1535 return (uint32_t)s2
* (uint32_t)s1
>> 16;
1538 static uint32_t do_mulhu_w(uint32_t s2
, uint32_t s1
)
1540 return (uint64_t)s2
* (uint64_t)s1
>> 32;
1543 static uint64_t do_mulhu_d(uint64_t s2
, uint64_t s1
)
1545 uint64_t hi_64
, lo_64
;
1547 mulu64(&lo_64
, &hi_64
, s2
, s1
);
1551 static int8_t do_mulhsu_b(int8_t s2
, uint8_t s1
)
1553 return (int16_t)s2
* (uint16_t)s1
>> 8;
1556 static int16_t do_mulhsu_h(int16_t s2
, uint16_t s1
)
1558 return (int32_t)s2
* (uint32_t)s1
>> 16;
1561 static int32_t do_mulhsu_w(int32_t s2
, uint32_t s1
)
1563 return (int64_t)s2
* (uint64_t)s1
>> 32;
1567 * Let A = signed operand,
1568 * B = unsigned operand
1569 * P = mulu64(A, B), unsigned product
1571 * LET X = 2 ** 64 - A, 2's complement of A
1572 * SP = signed product
1576 * = -(2 ** 64 - A) * B
1577 * = A * B - 2 ** 64 * B
1582 * HI_P -= (A < 0 ? B : 0)
1585 static int64_t do_mulhsu_d(int64_t s2
, uint64_t s1
)
1587 uint64_t hi_64
, lo_64
;
1589 mulu64(&lo_64
, &hi_64
, s2
, s1
);
1591 hi_64
-= s2
< 0 ? s1
: 0;
1595 RVVCALL(OPIVV2
, vmulh_vv_b
, OP_SSS_B
, H1
, H1
, H1
, do_mulh_b
)
1596 RVVCALL(OPIVV2
, vmulh_vv_h
, OP_SSS_H
, H2
, H2
, H2
, do_mulh_h
)
1597 RVVCALL(OPIVV2
, vmulh_vv_w
, OP_SSS_W
, H4
, H4
, H4
, do_mulh_w
)
1598 RVVCALL(OPIVV2
, vmulh_vv_d
, OP_SSS_D
, H8
, H8
, H8
, do_mulh_d
)
1599 RVVCALL(OPIVV2
, vmulhu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, do_mulhu_b
)
1600 RVVCALL(OPIVV2
, vmulhu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, do_mulhu_h
)
1601 RVVCALL(OPIVV2
, vmulhu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, do_mulhu_w
)
1602 RVVCALL(OPIVV2
, vmulhu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, do_mulhu_d
)
1603 RVVCALL(OPIVV2
, vmulhsu_vv_b
, OP_SUS_B
, H1
, H1
, H1
, do_mulhsu_b
)
1604 RVVCALL(OPIVV2
, vmulhsu_vv_h
, OP_SUS_H
, H2
, H2
, H2
, do_mulhsu_h
)
1605 RVVCALL(OPIVV2
, vmulhsu_vv_w
, OP_SUS_W
, H4
, H4
, H4
, do_mulhsu_w
)
1606 RVVCALL(OPIVV2
, vmulhsu_vv_d
, OP_SUS_D
, H8
, H8
, H8
, do_mulhsu_d
)
1607 GEN_VEXT_VV(vmulh_vv_b
, 1)
1608 GEN_VEXT_VV(vmulh_vv_h
, 2)
1609 GEN_VEXT_VV(vmulh_vv_w
, 4)
1610 GEN_VEXT_VV(vmulh_vv_d
, 8)
1611 GEN_VEXT_VV(vmulhu_vv_b
, 1)
1612 GEN_VEXT_VV(vmulhu_vv_h
, 2)
1613 GEN_VEXT_VV(vmulhu_vv_w
, 4)
1614 GEN_VEXT_VV(vmulhu_vv_d
, 8)
1615 GEN_VEXT_VV(vmulhsu_vv_b
, 1)
1616 GEN_VEXT_VV(vmulhsu_vv_h
, 2)
1617 GEN_VEXT_VV(vmulhsu_vv_w
, 4)
1618 GEN_VEXT_VV(vmulhsu_vv_d
, 8)
1620 RVVCALL(OPIVX2
, vmul_vx_b
, OP_SSS_B
, H1
, H1
, DO_MUL
)
1621 RVVCALL(OPIVX2
, vmul_vx_h
, OP_SSS_H
, H2
, H2
, DO_MUL
)
1622 RVVCALL(OPIVX2
, vmul_vx_w
, OP_SSS_W
, H4
, H4
, DO_MUL
)
1623 RVVCALL(OPIVX2
, vmul_vx_d
, OP_SSS_D
, H8
, H8
, DO_MUL
)
1624 RVVCALL(OPIVX2
, vmulh_vx_b
, OP_SSS_B
, H1
, H1
, do_mulh_b
)
1625 RVVCALL(OPIVX2
, vmulh_vx_h
, OP_SSS_H
, H2
, H2
, do_mulh_h
)
1626 RVVCALL(OPIVX2
, vmulh_vx_w
, OP_SSS_W
, H4
, H4
, do_mulh_w
)
1627 RVVCALL(OPIVX2
, vmulh_vx_d
, OP_SSS_D
, H8
, H8
, do_mulh_d
)
1628 RVVCALL(OPIVX2
, vmulhu_vx_b
, OP_UUU_B
, H1
, H1
, do_mulhu_b
)
1629 RVVCALL(OPIVX2
, vmulhu_vx_h
, OP_UUU_H
, H2
, H2
, do_mulhu_h
)
1630 RVVCALL(OPIVX2
, vmulhu_vx_w
, OP_UUU_W
, H4
, H4
, do_mulhu_w
)
1631 RVVCALL(OPIVX2
, vmulhu_vx_d
, OP_UUU_D
, H8
, H8
, do_mulhu_d
)
1632 RVVCALL(OPIVX2
, vmulhsu_vx_b
, OP_SUS_B
, H1
, H1
, do_mulhsu_b
)
1633 RVVCALL(OPIVX2
, vmulhsu_vx_h
, OP_SUS_H
, H2
, H2
, do_mulhsu_h
)
1634 RVVCALL(OPIVX2
, vmulhsu_vx_w
, OP_SUS_W
, H4
, H4
, do_mulhsu_w
)
1635 RVVCALL(OPIVX2
, vmulhsu_vx_d
, OP_SUS_D
, H8
, H8
, do_mulhsu_d
)
1636 GEN_VEXT_VX(vmul_vx_b
)
1637 GEN_VEXT_VX(vmul_vx_h
)
1638 GEN_VEXT_VX(vmul_vx_w
)
1639 GEN_VEXT_VX(vmul_vx_d
)
1640 GEN_VEXT_VX(vmulh_vx_b
)
1641 GEN_VEXT_VX(vmulh_vx_h
)
1642 GEN_VEXT_VX(vmulh_vx_w
)
1643 GEN_VEXT_VX(vmulh_vx_d
)
1644 GEN_VEXT_VX(vmulhu_vx_b
)
1645 GEN_VEXT_VX(vmulhu_vx_h
)
1646 GEN_VEXT_VX(vmulhu_vx_w
)
1647 GEN_VEXT_VX(vmulhu_vx_d
)
1648 GEN_VEXT_VX(vmulhsu_vx_b
)
1649 GEN_VEXT_VX(vmulhsu_vx_h
)
1650 GEN_VEXT_VX(vmulhsu_vx_w
)
1651 GEN_VEXT_VX(vmulhsu_vx_d
)
1653 /* Vector Integer Divide Instructions */
1654 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1655 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1656 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1657 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1658 #define DO_REM(N, M) (unlikely(M == 0) ? N :\
1659 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1661 RVVCALL(OPIVV2
, vdivu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, DO_DIVU
)
1662 RVVCALL(OPIVV2
, vdivu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, DO_DIVU
)
1663 RVVCALL(OPIVV2
, vdivu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, DO_DIVU
)
1664 RVVCALL(OPIVV2
, vdivu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, DO_DIVU
)
1665 RVVCALL(OPIVV2
, vdiv_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_DIV
)
1666 RVVCALL(OPIVV2
, vdiv_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_DIV
)
1667 RVVCALL(OPIVV2
, vdiv_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_DIV
)
1668 RVVCALL(OPIVV2
, vdiv_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_DIV
)
1669 RVVCALL(OPIVV2
, vremu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, DO_REMU
)
1670 RVVCALL(OPIVV2
, vremu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, DO_REMU
)
1671 RVVCALL(OPIVV2
, vremu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, DO_REMU
)
1672 RVVCALL(OPIVV2
, vremu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, DO_REMU
)
1673 RVVCALL(OPIVV2
, vrem_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_REM
)
1674 RVVCALL(OPIVV2
, vrem_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_REM
)
1675 RVVCALL(OPIVV2
, vrem_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_REM
)
1676 RVVCALL(OPIVV2
, vrem_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_REM
)
1677 GEN_VEXT_VV(vdivu_vv_b
, 1)
1678 GEN_VEXT_VV(vdivu_vv_h
, 2)
1679 GEN_VEXT_VV(vdivu_vv_w
, 4)
1680 GEN_VEXT_VV(vdivu_vv_d
, 8)
1681 GEN_VEXT_VV(vdiv_vv_b
, 1)
1682 GEN_VEXT_VV(vdiv_vv_h
, 2)
1683 GEN_VEXT_VV(vdiv_vv_w
, 4)
1684 GEN_VEXT_VV(vdiv_vv_d
, 8)
1685 GEN_VEXT_VV(vremu_vv_b
, 1)
1686 GEN_VEXT_VV(vremu_vv_h
, 2)
1687 GEN_VEXT_VV(vremu_vv_w
, 4)
1688 GEN_VEXT_VV(vremu_vv_d
, 8)
1689 GEN_VEXT_VV(vrem_vv_b
, 1)
1690 GEN_VEXT_VV(vrem_vv_h
, 2)
1691 GEN_VEXT_VV(vrem_vv_w
, 4)
1692 GEN_VEXT_VV(vrem_vv_d
, 8)
1694 RVVCALL(OPIVX2
, vdivu_vx_b
, OP_UUU_B
, H1
, H1
, DO_DIVU
)
1695 RVVCALL(OPIVX2
, vdivu_vx_h
, OP_UUU_H
, H2
, H2
, DO_DIVU
)
1696 RVVCALL(OPIVX2
, vdivu_vx_w
, OP_UUU_W
, H4
, H4
, DO_DIVU
)
1697 RVVCALL(OPIVX2
, vdivu_vx_d
, OP_UUU_D
, H8
, H8
, DO_DIVU
)
1698 RVVCALL(OPIVX2
, vdiv_vx_b
, OP_SSS_B
, H1
, H1
, DO_DIV
)
1699 RVVCALL(OPIVX2
, vdiv_vx_h
, OP_SSS_H
, H2
, H2
, DO_DIV
)
1700 RVVCALL(OPIVX2
, vdiv_vx_w
, OP_SSS_W
, H4
, H4
, DO_DIV
)
1701 RVVCALL(OPIVX2
, vdiv_vx_d
, OP_SSS_D
, H8
, H8
, DO_DIV
)
1702 RVVCALL(OPIVX2
, vremu_vx_b
, OP_UUU_B
, H1
, H1
, DO_REMU
)
1703 RVVCALL(OPIVX2
, vremu_vx_h
, OP_UUU_H
, H2
, H2
, DO_REMU
)
1704 RVVCALL(OPIVX2
, vremu_vx_w
, OP_UUU_W
, H4
, H4
, DO_REMU
)
1705 RVVCALL(OPIVX2
, vremu_vx_d
, OP_UUU_D
, H8
, H8
, DO_REMU
)
1706 RVVCALL(OPIVX2
, vrem_vx_b
, OP_SSS_B
, H1
, H1
, DO_REM
)
1707 RVVCALL(OPIVX2
, vrem_vx_h
, OP_SSS_H
, H2
, H2
, DO_REM
)
1708 RVVCALL(OPIVX2
, vrem_vx_w
, OP_SSS_W
, H4
, H4
, DO_REM
)
1709 RVVCALL(OPIVX2
, vrem_vx_d
, OP_SSS_D
, H8
, H8
, DO_REM
)
1710 GEN_VEXT_VX(vdivu_vx_b
)
1711 GEN_VEXT_VX(vdivu_vx_h
)
1712 GEN_VEXT_VX(vdivu_vx_w
)
1713 GEN_VEXT_VX(vdivu_vx_d
)
1714 GEN_VEXT_VX(vdiv_vx_b
)
1715 GEN_VEXT_VX(vdiv_vx_h
)
1716 GEN_VEXT_VX(vdiv_vx_w
)
1717 GEN_VEXT_VX(vdiv_vx_d
)
1718 GEN_VEXT_VX(vremu_vx_b
)
1719 GEN_VEXT_VX(vremu_vx_h
)
1720 GEN_VEXT_VX(vremu_vx_w
)
1721 GEN_VEXT_VX(vremu_vx_d
)
1722 GEN_VEXT_VX(vrem_vx_b
)
1723 GEN_VEXT_VX(vrem_vx_h
)
1724 GEN_VEXT_VX(vrem_vx_w
)
1725 GEN_VEXT_VX(vrem_vx_d
)
1727 /* Vector Widening Integer Multiply Instructions */
1728 RVVCALL(OPIVV2
, vwmul_vv_b
, WOP_SSS_B
, H2
, H1
, H1
, DO_MUL
)
1729 RVVCALL(OPIVV2
, vwmul_vv_h
, WOP_SSS_H
, H4
, H2
, H2
, DO_MUL
)
1730 RVVCALL(OPIVV2
, vwmul_vv_w
, WOP_SSS_W
, H8
, H4
, H4
, DO_MUL
)
1731 RVVCALL(OPIVV2
, vwmulu_vv_b
, WOP_UUU_B
, H2
, H1
, H1
, DO_MUL
)
1732 RVVCALL(OPIVV2
, vwmulu_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, DO_MUL
)
1733 RVVCALL(OPIVV2
, vwmulu_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, DO_MUL
)
1734 RVVCALL(OPIVV2
, vwmulsu_vv_b
, WOP_SUS_B
, H2
, H1
, H1
, DO_MUL
)
1735 RVVCALL(OPIVV2
, vwmulsu_vv_h
, WOP_SUS_H
, H4
, H2
, H2
, DO_MUL
)
1736 RVVCALL(OPIVV2
, vwmulsu_vv_w
, WOP_SUS_W
, H8
, H4
, H4
, DO_MUL
)
1737 GEN_VEXT_VV(vwmul_vv_b
, 2)
1738 GEN_VEXT_VV(vwmul_vv_h
, 4)
1739 GEN_VEXT_VV(vwmul_vv_w
, 8)
1740 GEN_VEXT_VV(vwmulu_vv_b
, 2)
1741 GEN_VEXT_VV(vwmulu_vv_h
, 4)
1742 GEN_VEXT_VV(vwmulu_vv_w
, 8)
1743 GEN_VEXT_VV(vwmulsu_vv_b
, 2)
1744 GEN_VEXT_VV(vwmulsu_vv_h
, 4)
1745 GEN_VEXT_VV(vwmulsu_vv_w
, 8)
1747 RVVCALL(OPIVX2
, vwmul_vx_b
, WOP_SSS_B
, H2
, H1
, DO_MUL
)
1748 RVVCALL(OPIVX2
, vwmul_vx_h
, WOP_SSS_H
, H4
, H2
, DO_MUL
)
1749 RVVCALL(OPIVX2
, vwmul_vx_w
, WOP_SSS_W
, H8
, H4
, DO_MUL
)
1750 RVVCALL(OPIVX2
, vwmulu_vx_b
, WOP_UUU_B
, H2
, H1
, DO_MUL
)
1751 RVVCALL(OPIVX2
, vwmulu_vx_h
, WOP_UUU_H
, H4
, H2
, DO_MUL
)
1752 RVVCALL(OPIVX2
, vwmulu_vx_w
, WOP_UUU_W
, H8
, H4
, DO_MUL
)
1753 RVVCALL(OPIVX2
, vwmulsu_vx_b
, WOP_SUS_B
, H2
, H1
, DO_MUL
)
1754 RVVCALL(OPIVX2
, vwmulsu_vx_h
, WOP_SUS_H
, H4
, H2
, DO_MUL
)
1755 RVVCALL(OPIVX2
, vwmulsu_vx_w
, WOP_SUS_W
, H8
, H4
, DO_MUL
)
1756 GEN_VEXT_VX(vwmul_vx_b
)
1757 GEN_VEXT_VX(vwmul_vx_h
)
1758 GEN_VEXT_VX(vwmul_vx_w
)
1759 GEN_VEXT_VX(vwmulu_vx_b
)
1760 GEN_VEXT_VX(vwmulu_vx_h
)
1761 GEN_VEXT_VX(vwmulu_vx_w
)
1762 GEN_VEXT_VX(vwmulsu_vx_b
)
1763 GEN_VEXT_VX(vwmulsu_vx_h
)
1764 GEN_VEXT_VX(vwmulsu_vx_w
)
1766 /* Vector Single-Width Integer Multiply-Add Instructions */
1767 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1768 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1770 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1771 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1772 TD d = *((TD *)vd + HD(i)); \
1773 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1776 #define DO_MACC(N, M, D) (M * N + D)
1777 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1778 #define DO_MADD(N, M, D) (M * D + N)
1779 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1780 RVVCALL(OPIVV3
, vmacc_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_MACC
)
1781 RVVCALL(OPIVV3
, vmacc_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_MACC
)
1782 RVVCALL(OPIVV3
, vmacc_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_MACC
)
1783 RVVCALL(OPIVV3
, vmacc_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_MACC
)
1784 RVVCALL(OPIVV3
, vnmsac_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_NMSAC
)
1785 RVVCALL(OPIVV3
, vnmsac_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_NMSAC
)
1786 RVVCALL(OPIVV3
, vnmsac_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_NMSAC
)
1787 RVVCALL(OPIVV3
, vnmsac_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_NMSAC
)
1788 RVVCALL(OPIVV3
, vmadd_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_MADD
)
1789 RVVCALL(OPIVV3
, vmadd_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_MADD
)
1790 RVVCALL(OPIVV3
, vmadd_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_MADD
)
1791 RVVCALL(OPIVV3
, vmadd_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_MADD
)
1792 RVVCALL(OPIVV3
, vnmsub_vv_b
, OP_SSS_B
, H1
, H1
, H1
, DO_NMSUB
)
1793 RVVCALL(OPIVV3
, vnmsub_vv_h
, OP_SSS_H
, H2
, H2
, H2
, DO_NMSUB
)
1794 RVVCALL(OPIVV3
, vnmsub_vv_w
, OP_SSS_W
, H4
, H4
, H4
, DO_NMSUB
)
1795 RVVCALL(OPIVV3
, vnmsub_vv_d
, OP_SSS_D
, H8
, H8
, H8
, DO_NMSUB
)
1796 GEN_VEXT_VV(vmacc_vv_b
, 1)
1797 GEN_VEXT_VV(vmacc_vv_h
, 2)
1798 GEN_VEXT_VV(vmacc_vv_w
, 4)
1799 GEN_VEXT_VV(vmacc_vv_d
, 8)
1800 GEN_VEXT_VV(vnmsac_vv_b
, 1)
1801 GEN_VEXT_VV(vnmsac_vv_h
, 2)
1802 GEN_VEXT_VV(vnmsac_vv_w
, 4)
1803 GEN_VEXT_VV(vnmsac_vv_d
, 8)
1804 GEN_VEXT_VV(vmadd_vv_b
, 1)
1805 GEN_VEXT_VV(vmadd_vv_h
, 2)
1806 GEN_VEXT_VV(vmadd_vv_w
, 4)
1807 GEN_VEXT_VV(vmadd_vv_d
, 8)
1808 GEN_VEXT_VV(vnmsub_vv_b
, 1)
1809 GEN_VEXT_VV(vnmsub_vv_h
, 2)
1810 GEN_VEXT_VV(vnmsub_vv_w
, 4)
1811 GEN_VEXT_VV(vnmsub_vv_d
, 8)
1813 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1814 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1816 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1817 TD d = *((TD *)vd + HD(i)); \
1818 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1821 RVVCALL(OPIVX3
, vmacc_vx_b
, OP_SSS_B
, H1
, H1
, DO_MACC
)
1822 RVVCALL(OPIVX3
, vmacc_vx_h
, OP_SSS_H
, H2
, H2
, DO_MACC
)
1823 RVVCALL(OPIVX3
, vmacc_vx_w
, OP_SSS_W
, H4
, H4
, DO_MACC
)
1824 RVVCALL(OPIVX3
, vmacc_vx_d
, OP_SSS_D
, H8
, H8
, DO_MACC
)
1825 RVVCALL(OPIVX3
, vnmsac_vx_b
, OP_SSS_B
, H1
, H1
, DO_NMSAC
)
1826 RVVCALL(OPIVX3
, vnmsac_vx_h
, OP_SSS_H
, H2
, H2
, DO_NMSAC
)
1827 RVVCALL(OPIVX3
, vnmsac_vx_w
, OP_SSS_W
, H4
, H4
, DO_NMSAC
)
1828 RVVCALL(OPIVX3
, vnmsac_vx_d
, OP_SSS_D
, H8
, H8
, DO_NMSAC
)
1829 RVVCALL(OPIVX3
, vmadd_vx_b
, OP_SSS_B
, H1
, H1
, DO_MADD
)
1830 RVVCALL(OPIVX3
, vmadd_vx_h
, OP_SSS_H
, H2
, H2
, DO_MADD
)
1831 RVVCALL(OPIVX3
, vmadd_vx_w
, OP_SSS_W
, H4
, H4
, DO_MADD
)
1832 RVVCALL(OPIVX3
, vmadd_vx_d
, OP_SSS_D
, H8
, H8
, DO_MADD
)
1833 RVVCALL(OPIVX3
, vnmsub_vx_b
, OP_SSS_B
, H1
, H1
, DO_NMSUB
)
1834 RVVCALL(OPIVX3
, vnmsub_vx_h
, OP_SSS_H
, H2
, H2
, DO_NMSUB
)
1835 RVVCALL(OPIVX3
, vnmsub_vx_w
, OP_SSS_W
, H4
, H4
, DO_NMSUB
)
1836 RVVCALL(OPIVX3
, vnmsub_vx_d
, OP_SSS_D
, H8
, H8
, DO_NMSUB
)
1837 GEN_VEXT_VX(vmacc_vx_b
)
1838 GEN_VEXT_VX(vmacc_vx_h
)
1839 GEN_VEXT_VX(vmacc_vx_w
)
1840 GEN_VEXT_VX(vmacc_vx_d
)
1841 GEN_VEXT_VX(vnmsac_vx_b
)
1842 GEN_VEXT_VX(vnmsac_vx_h
)
1843 GEN_VEXT_VX(vnmsac_vx_w
)
1844 GEN_VEXT_VX(vnmsac_vx_d
)
1845 GEN_VEXT_VX(vmadd_vx_b
)
1846 GEN_VEXT_VX(vmadd_vx_h
)
1847 GEN_VEXT_VX(vmadd_vx_w
)
1848 GEN_VEXT_VX(vmadd_vx_d
)
1849 GEN_VEXT_VX(vnmsub_vx_b
)
1850 GEN_VEXT_VX(vnmsub_vx_h
)
1851 GEN_VEXT_VX(vnmsub_vx_w
)
1852 GEN_VEXT_VX(vnmsub_vx_d
)
1854 /* Vector Widening Integer Multiply-Add Instructions */
1855 RVVCALL(OPIVV3
, vwmaccu_vv_b
, WOP_UUU_B
, H2
, H1
, H1
, DO_MACC
)
1856 RVVCALL(OPIVV3
, vwmaccu_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, DO_MACC
)
1857 RVVCALL(OPIVV3
, vwmaccu_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, DO_MACC
)
1858 RVVCALL(OPIVV3
, vwmacc_vv_b
, WOP_SSS_B
, H2
, H1
, H1
, DO_MACC
)
1859 RVVCALL(OPIVV3
, vwmacc_vv_h
, WOP_SSS_H
, H4
, H2
, H2
, DO_MACC
)
1860 RVVCALL(OPIVV3
, vwmacc_vv_w
, WOP_SSS_W
, H8
, H4
, H4
, DO_MACC
)
1861 RVVCALL(OPIVV3
, vwmaccsu_vv_b
, WOP_SSU_B
, H2
, H1
, H1
, DO_MACC
)
1862 RVVCALL(OPIVV3
, vwmaccsu_vv_h
, WOP_SSU_H
, H4
, H2
, H2
, DO_MACC
)
1863 RVVCALL(OPIVV3
, vwmaccsu_vv_w
, WOP_SSU_W
, H8
, H4
, H4
, DO_MACC
)
1864 GEN_VEXT_VV(vwmaccu_vv_b
, 2)
1865 GEN_VEXT_VV(vwmaccu_vv_h
, 4)
1866 GEN_VEXT_VV(vwmaccu_vv_w
, 8)
1867 GEN_VEXT_VV(vwmacc_vv_b
, 2)
1868 GEN_VEXT_VV(vwmacc_vv_h
, 4)
1869 GEN_VEXT_VV(vwmacc_vv_w
, 8)
1870 GEN_VEXT_VV(vwmaccsu_vv_b
, 2)
1871 GEN_VEXT_VV(vwmaccsu_vv_h
, 4)
1872 GEN_VEXT_VV(vwmaccsu_vv_w
, 8)
1874 RVVCALL(OPIVX3
, vwmaccu_vx_b
, WOP_UUU_B
, H2
, H1
, DO_MACC
)
1875 RVVCALL(OPIVX3
, vwmaccu_vx_h
, WOP_UUU_H
, H4
, H2
, DO_MACC
)
1876 RVVCALL(OPIVX3
, vwmaccu_vx_w
, WOP_UUU_W
, H8
, H4
, DO_MACC
)
1877 RVVCALL(OPIVX3
, vwmacc_vx_b
, WOP_SSS_B
, H2
, H1
, DO_MACC
)
1878 RVVCALL(OPIVX3
, vwmacc_vx_h
, WOP_SSS_H
, H4
, H2
, DO_MACC
)
1879 RVVCALL(OPIVX3
, vwmacc_vx_w
, WOP_SSS_W
, H8
, H4
, DO_MACC
)
1880 RVVCALL(OPIVX3
, vwmaccsu_vx_b
, WOP_SSU_B
, H2
, H1
, DO_MACC
)
1881 RVVCALL(OPIVX3
, vwmaccsu_vx_h
, WOP_SSU_H
, H4
, H2
, DO_MACC
)
1882 RVVCALL(OPIVX3
, vwmaccsu_vx_w
, WOP_SSU_W
, H8
, H4
, DO_MACC
)
1883 RVVCALL(OPIVX3
, vwmaccus_vx_b
, WOP_SUS_B
, H2
, H1
, DO_MACC
)
1884 RVVCALL(OPIVX3
, vwmaccus_vx_h
, WOP_SUS_H
, H4
, H2
, DO_MACC
)
1885 RVVCALL(OPIVX3
, vwmaccus_vx_w
, WOP_SUS_W
, H8
, H4
, DO_MACC
)
1886 GEN_VEXT_VX(vwmaccu_vx_b
)
1887 GEN_VEXT_VX(vwmaccu_vx_h
)
1888 GEN_VEXT_VX(vwmaccu_vx_w
)
1889 GEN_VEXT_VX(vwmacc_vx_b
)
1890 GEN_VEXT_VX(vwmacc_vx_h
)
1891 GEN_VEXT_VX(vwmacc_vx_w
)
1892 GEN_VEXT_VX(vwmaccsu_vx_b
)
1893 GEN_VEXT_VX(vwmaccsu_vx_h
)
1894 GEN_VEXT_VX(vwmaccsu_vx_w
)
1895 GEN_VEXT_VX(vwmaccus_vx_b
)
1896 GEN_VEXT_VX(vwmaccus_vx_h
)
1897 GEN_VEXT_VX(vwmaccus_vx_w
)
1899 /* Vector Integer Merge and Move Instructions */
1900 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
1901 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1904 uint32_t vl = env->vl; \
1907 for (i = env->vstart; i < vl; i++) { \
1908 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1909 *((ETYPE *)vd + H(i)) = s1; \
1914 GEN_VEXT_VMV_VV(vmv_v_v_b
, int8_t, H1
)
1915 GEN_VEXT_VMV_VV(vmv_v_v_h
, int16_t, H2
)
1916 GEN_VEXT_VMV_VV(vmv_v_v_w
, int32_t, H4
)
1917 GEN_VEXT_VMV_VV(vmv_v_v_d
, int64_t, H8
)
1919 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
1920 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1923 uint32_t vl = env->vl; \
1926 for (i = env->vstart; i < vl; i++) { \
1927 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1932 GEN_VEXT_VMV_VX(vmv_v_x_b
, int8_t, H1
)
1933 GEN_VEXT_VMV_VX(vmv_v_x_h
, int16_t, H2
)
1934 GEN_VEXT_VMV_VX(vmv_v_x_w
, int32_t, H4
)
1935 GEN_VEXT_VMV_VX(vmv_v_x_d
, int64_t, H8
)
1937 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
1938 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1939 CPURISCVState *env, uint32_t desc) \
1941 uint32_t vl = env->vl; \
1944 for (i = env->vstart; i < vl; i++) { \
1945 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
1946 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1951 GEN_VEXT_VMERGE_VV(vmerge_vvm_b
, int8_t, H1
)
1952 GEN_VEXT_VMERGE_VV(vmerge_vvm_h
, int16_t, H2
)
1953 GEN_VEXT_VMERGE_VV(vmerge_vvm_w
, int32_t, H4
)
1954 GEN_VEXT_VMERGE_VV(vmerge_vvm_d
, int64_t, H8
)
1956 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
1957 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1958 void *vs2, CPURISCVState *env, uint32_t desc) \
1960 uint32_t vl = env->vl; \
1963 for (i = env->vstart; i < vl; i++) { \
1964 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1965 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
1966 (ETYPE)(target_long)s1); \
1967 *((ETYPE *)vd + H(i)) = d; \
1972 GEN_VEXT_VMERGE_VX(vmerge_vxm_b
, int8_t, H1
)
1973 GEN_VEXT_VMERGE_VX(vmerge_vxm_h
, int16_t, H2
)
1974 GEN_VEXT_VMERGE_VX(vmerge_vxm_w
, int32_t, H4
)
1975 GEN_VEXT_VMERGE_VX(vmerge_vxm_d
, int64_t, H8
)
1978 *** Vector Fixed-Point Arithmetic Instructions
1981 /* Vector Single-Width Saturating Add and Subtract */
1984 * As fixed point instructions probably have round mode and saturation,
1985 * define common macros for fixed point here.
1987 typedef void opivv2_rm_fn(void *vd
, void *vs1
, void *vs2
, int i
,
1988 CPURISCVState
*env
, int vxrm
);
1990 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1991 static inline void \
1992 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1993 CPURISCVState *env, int vxrm) \
1995 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1996 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1997 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2001 vext_vv_rm_1(void *vd
, void *v0
, void *vs1
, void *vs2
,
2003 uint32_t vl
, uint32_t vm
, int vxrm
,
2006 for (uint32_t i
= env
->vstart
; i
< vl
; i
++) {
2007 if (!vm
&& !vext_elem_mask(v0
, i
)) {
2010 fn(vd
, vs1
, vs2
, i
, env
, vxrm
);
2016 vext_vv_rm_2(void *vd
, void *v0
, void *vs1
, void *vs2
,
2021 uint32_t vm
= vext_vm(desc
);
2022 uint32_t vl
= env
->vl
;
2024 switch (env
->vxrm
) {
2026 vext_vv_rm_1(vd
, v0
, vs1
, vs2
,
2027 env
, vl
, vm
, 0, fn
);
2030 vext_vv_rm_1(vd
, v0
, vs1
, vs2
,
2031 env
, vl
, vm
, 1, fn
);
2034 vext_vv_rm_1(vd
, v0
, vs1
, vs2
,
2035 env
, vl
, vm
, 2, fn
);
2038 vext_vv_rm_1(vd
, v0
, vs1
, vs2
,
2039 env
, vl
, vm
, 3, fn
);
2044 /* generate helpers for fixed point instructions with OPIVV format */
2045 #define GEN_VEXT_VV_RM(NAME) \
2046 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2047 CPURISCVState *env, uint32_t desc) \
2049 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2053 static inline uint8_t saddu8(CPURISCVState
*env
, int vxrm
, uint8_t a
, uint8_t b
)
2055 uint8_t res
= a
+ b
;
2063 static inline uint16_t saddu16(CPURISCVState
*env
, int vxrm
, uint16_t a
,
2066 uint16_t res
= a
+ b
;
2074 static inline uint32_t saddu32(CPURISCVState
*env
, int vxrm
, uint32_t a
,
2077 uint32_t res
= a
+ b
;
2085 static inline uint64_t saddu64(CPURISCVState
*env
, int vxrm
, uint64_t a
,
2088 uint64_t res
= a
+ b
;
2096 RVVCALL(OPIVV2_RM
, vsaddu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, saddu8
)
2097 RVVCALL(OPIVV2_RM
, vsaddu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, saddu16
)
2098 RVVCALL(OPIVV2_RM
, vsaddu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, saddu32
)
2099 RVVCALL(OPIVV2_RM
, vsaddu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, saddu64
)
2100 GEN_VEXT_VV_RM(vsaddu_vv_b
)
2101 GEN_VEXT_VV_RM(vsaddu_vv_h
)
2102 GEN_VEXT_VV_RM(vsaddu_vv_w
)
2103 GEN_VEXT_VV_RM(vsaddu_vv_d
)
2105 typedef void opivx2_rm_fn(void *vd
, target_long s1
, void *vs2
, int i
,
2106 CPURISCVState
*env
, int vxrm
);
2108 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2109 static inline void \
2110 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2111 CPURISCVState *env, int vxrm) \
2113 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2114 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2118 vext_vx_rm_1(void *vd
, void *v0
, target_long s1
, void *vs2
,
2120 uint32_t vl
, uint32_t vm
, int vxrm
,
2123 for (uint32_t i
= env
->vstart
; i
< vl
; i
++) {
2124 if (!vm
&& !vext_elem_mask(v0
, i
)) {
2127 fn(vd
, s1
, vs2
, i
, env
, vxrm
);
2133 vext_vx_rm_2(void *vd
, void *v0
, target_long s1
, void *vs2
,
2138 uint32_t vm
= vext_vm(desc
);
2139 uint32_t vl
= env
->vl
;
2141 switch (env
->vxrm
) {
2143 vext_vx_rm_1(vd
, v0
, s1
, vs2
,
2144 env
, vl
, vm
, 0, fn
);
2147 vext_vx_rm_1(vd
, v0
, s1
, vs2
,
2148 env
, vl
, vm
, 1, fn
);
2151 vext_vx_rm_1(vd
, v0
, s1
, vs2
,
2152 env
, vl
, vm
, 2, fn
);
2155 vext_vx_rm_1(vd
, v0
, s1
, vs2
,
2156 env
, vl
, vm
, 3, fn
);
2161 /* generate helpers for fixed point instructions with OPIVX format */
2162 #define GEN_VEXT_VX_RM(NAME) \
2163 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2164 void *vs2, CPURISCVState *env, uint32_t desc) \
2166 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2170 RVVCALL(OPIVX2_RM
, vsaddu_vx_b
, OP_UUU_B
, H1
, H1
, saddu8
)
2171 RVVCALL(OPIVX2_RM
, vsaddu_vx_h
, OP_UUU_H
, H2
, H2
, saddu16
)
2172 RVVCALL(OPIVX2_RM
, vsaddu_vx_w
, OP_UUU_W
, H4
, H4
, saddu32
)
2173 RVVCALL(OPIVX2_RM
, vsaddu_vx_d
, OP_UUU_D
, H8
, H8
, saddu64
)
2174 GEN_VEXT_VX_RM(vsaddu_vx_b
)
2175 GEN_VEXT_VX_RM(vsaddu_vx_h
)
2176 GEN_VEXT_VX_RM(vsaddu_vx_w
)
2177 GEN_VEXT_VX_RM(vsaddu_vx_d
)
2179 static inline int8_t sadd8(CPURISCVState
*env
, int vxrm
, int8_t a
, int8_t b
)
2182 if ((res
^ a
) & (res
^ b
) & INT8_MIN
) {
2183 res
= a
> 0 ? INT8_MAX
: INT8_MIN
;
2189 static inline int16_t sadd16(CPURISCVState
*env
, int vxrm
, int16_t a
, int16_t b
)
2191 int16_t res
= a
+ b
;
2192 if ((res
^ a
) & (res
^ b
) & INT16_MIN
) {
2193 res
= a
> 0 ? INT16_MAX
: INT16_MIN
;
2199 static inline int32_t sadd32(CPURISCVState
*env
, int vxrm
, int32_t a
, int32_t b
)
2201 int32_t res
= a
+ b
;
2202 if ((res
^ a
) & (res
^ b
) & INT32_MIN
) {
2203 res
= a
> 0 ? INT32_MAX
: INT32_MIN
;
2209 static inline int64_t sadd64(CPURISCVState
*env
, int vxrm
, int64_t a
, int64_t b
)
2211 int64_t res
= a
+ b
;
2212 if ((res
^ a
) & (res
^ b
) & INT64_MIN
) {
2213 res
= a
> 0 ? INT64_MAX
: INT64_MIN
;
2219 RVVCALL(OPIVV2_RM
, vsadd_vv_b
, OP_SSS_B
, H1
, H1
, H1
, sadd8
)
2220 RVVCALL(OPIVV2_RM
, vsadd_vv_h
, OP_SSS_H
, H2
, H2
, H2
, sadd16
)
2221 RVVCALL(OPIVV2_RM
, vsadd_vv_w
, OP_SSS_W
, H4
, H4
, H4
, sadd32
)
2222 RVVCALL(OPIVV2_RM
, vsadd_vv_d
, OP_SSS_D
, H8
, H8
, H8
, sadd64
)
2223 GEN_VEXT_VV_RM(vsadd_vv_b
)
2224 GEN_VEXT_VV_RM(vsadd_vv_h
)
2225 GEN_VEXT_VV_RM(vsadd_vv_w
)
2226 GEN_VEXT_VV_RM(vsadd_vv_d
)
2228 RVVCALL(OPIVX2_RM
, vsadd_vx_b
, OP_SSS_B
, H1
, H1
, sadd8
)
2229 RVVCALL(OPIVX2_RM
, vsadd_vx_h
, OP_SSS_H
, H2
, H2
, sadd16
)
2230 RVVCALL(OPIVX2_RM
, vsadd_vx_w
, OP_SSS_W
, H4
, H4
, sadd32
)
2231 RVVCALL(OPIVX2_RM
, vsadd_vx_d
, OP_SSS_D
, H8
, H8
, sadd64
)
2232 GEN_VEXT_VX_RM(vsadd_vx_b
)
2233 GEN_VEXT_VX_RM(vsadd_vx_h
)
2234 GEN_VEXT_VX_RM(vsadd_vx_w
)
2235 GEN_VEXT_VX_RM(vsadd_vx_d
)
2237 static inline uint8_t ssubu8(CPURISCVState
*env
, int vxrm
, uint8_t a
, uint8_t b
)
2239 uint8_t res
= a
- b
;
2247 static inline uint16_t ssubu16(CPURISCVState
*env
, int vxrm
, uint16_t a
,
2250 uint16_t res
= a
- b
;
2258 static inline uint32_t ssubu32(CPURISCVState
*env
, int vxrm
, uint32_t a
,
2261 uint32_t res
= a
- b
;
2269 static inline uint64_t ssubu64(CPURISCVState
*env
, int vxrm
, uint64_t a
,
2272 uint64_t res
= a
- b
;
2280 RVVCALL(OPIVV2_RM
, vssubu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, ssubu8
)
2281 RVVCALL(OPIVV2_RM
, vssubu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, ssubu16
)
2282 RVVCALL(OPIVV2_RM
, vssubu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, ssubu32
)
2283 RVVCALL(OPIVV2_RM
, vssubu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, ssubu64
)
2284 GEN_VEXT_VV_RM(vssubu_vv_b
)
2285 GEN_VEXT_VV_RM(vssubu_vv_h
)
2286 GEN_VEXT_VV_RM(vssubu_vv_w
)
2287 GEN_VEXT_VV_RM(vssubu_vv_d
)
2289 RVVCALL(OPIVX2_RM
, vssubu_vx_b
, OP_UUU_B
, H1
, H1
, ssubu8
)
2290 RVVCALL(OPIVX2_RM
, vssubu_vx_h
, OP_UUU_H
, H2
, H2
, ssubu16
)
2291 RVVCALL(OPIVX2_RM
, vssubu_vx_w
, OP_UUU_W
, H4
, H4
, ssubu32
)
2292 RVVCALL(OPIVX2_RM
, vssubu_vx_d
, OP_UUU_D
, H8
, H8
, ssubu64
)
2293 GEN_VEXT_VX_RM(vssubu_vx_b
)
2294 GEN_VEXT_VX_RM(vssubu_vx_h
)
2295 GEN_VEXT_VX_RM(vssubu_vx_w
)
2296 GEN_VEXT_VX_RM(vssubu_vx_d
)
2298 static inline int8_t ssub8(CPURISCVState
*env
, int vxrm
, int8_t a
, int8_t b
)
2301 if ((res
^ a
) & (a
^ b
) & INT8_MIN
) {
2302 res
= a
>= 0 ? INT8_MAX
: INT8_MIN
;
2308 static inline int16_t ssub16(CPURISCVState
*env
, int vxrm
, int16_t a
, int16_t b
)
2310 int16_t res
= a
- b
;
2311 if ((res
^ a
) & (a
^ b
) & INT16_MIN
) {
2312 res
= a
>= 0 ? INT16_MAX
: INT16_MIN
;
2318 static inline int32_t ssub32(CPURISCVState
*env
, int vxrm
, int32_t a
, int32_t b
)
2320 int32_t res
= a
- b
;
2321 if ((res
^ a
) & (a
^ b
) & INT32_MIN
) {
2322 res
= a
>= 0 ? INT32_MAX
: INT32_MIN
;
2328 static inline int64_t ssub64(CPURISCVState
*env
, int vxrm
, int64_t a
, int64_t b
)
2330 int64_t res
= a
- b
;
2331 if ((res
^ a
) & (a
^ b
) & INT64_MIN
) {
2332 res
= a
>= 0 ? INT64_MAX
: INT64_MIN
;
2338 RVVCALL(OPIVV2_RM
, vssub_vv_b
, OP_SSS_B
, H1
, H1
, H1
, ssub8
)
2339 RVVCALL(OPIVV2_RM
, vssub_vv_h
, OP_SSS_H
, H2
, H2
, H2
, ssub16
)
2340 RVVCALL(OPIVV2_RM
, vssub_vv_w
, OP_SSS_W
, H4
, H4
, H4
, ssub32
)
2341 RVVCALL(OPIVV2_RM
, vssub_vv_d
, OP_SSS_D
, H8
, H8
, H8
, ssub64
)
2342 GEN_VEXT_VV_RM(vssub_vv_b
)
2343 GEN_VEXT_VV_RM(vssub_vv_h
)
2344 GEN_VEXT_VV_RM(vssub_vv_w
)
2345 GEN_VEXT_VV_RM(vssub_vv_d
)
2347 RVVCALL(OPIVX2_RM
, vssub_vx_b
, OP_SSS_B
, H1
, H1
, ssub8
)
2348 RVVCALL(OPIVX2_RM
, vssub_vx_h
, OP_SSS_H
, H2
, H2
, ssub16
)
2349 RVVCALL(OPIVX2_RM
, vssub_vx_w
, OP_SSS_W
, H4
, H4
, ssub32
)
2350 RVVCALL(OPIVX2_RM
, vssub_vx_d
, OP_SSS_D
, H8
, H8
, ssub64
)
2351 GEN_VEXT_VX_RM(vssub_vx_b
)
2352 GEN_VEXT_VX_RM(vssub_vx_h
)
2353 GEN_VEXT_VX_RM(vssub_vx_w
)
2354 GEN_VEXT_VX_RM(vssub_vx_d
)
2356 /* Vector Single-Width Averaging Add and Subtract */
2357 static inline uint8_t get_round(int vxrm
, uint64_t v
, uint8_t shift
)
2359 uint8_t d
= extract64(v
, shift
, 1);
2363 if (shift
== 0 || shift
> 64) {
2367 d1
= extract64(v
, shift
- 1, 1);
2368 D1
= extract64(v
, 0, shift
);
2369 if (vxrm
== 0) { /* round-to-nearest-up (add +0.5 LSB) */
2371 } else if (vxrm
== 1) { /* round-to-nearest-even */
2373 D2
= extract64(v
, 0, shift
- 1);
2374 return d1
& ((D2
!= 0) | d
);
2378 } else if (vxrm
== 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2379 return !d
& (D1
!= 0);
2381 return 0; /* round-down (truncate) */
2384 static inline int32_t aadd32(CPURISCVState
*env
, int vxrm
, int32_t a
, int32_t b
)
2386 int64_t res
= (int64_t)a
+ b
;
2387 uint8_t round
= get_round(vxrm
, res
, 1);
2389 return (res
>> 1) + round
;
2392 static inline int64_t aadd64(CPURISCVState
*env
, int vxrm
, int64_t a
, int64_t b
)
2394 int64_t res
= a
+ b
;
2395 uint8_t round
= get_round(vxrm
, res
, 1);
2396 int64_t over
= (res
^ a
) & (res
^ b
) & INT64_MIN
;
2398 /* With signed overflow, bit 64 is inverse of bit 63. */
2399 return ((res
>> 1) ^ over
) + round
;
2402 RVVCALL(OPIVV2_RM
, vaadd_vv_b
, OP_SSS_B
, H1
, H1
, H1
, aadd32
)
2403 RVVCALL(OPIVV2_RM
, vaadd_vv_h
, OP_SSS_H
, H2
, H2
, H2
, aadd32
)
2404 RVVCALL(OPIVV2_RM
, vaadd_vv_w
, OP_SSS_W
, H4
, H4
, H4
, aadd32
)
2405 RVVCALL(OPIVV2_RM
, vaadd_vv_d
, OP_SSS_D
, H8
, H8
, H8
, aadd64
)
2406 GEN_VEXT_VV_RM(vaadd_vv_b
)
2407 GEN_VEXT_VV_RM(vaadd_vv_h
)
2408 GEN_VEXT_VV_RM(vaadd_vv_w
)
2409 GEN_VEXT_VV_RM(vaadd_vv_d
)
2411 RVVCALL(OPIVX2_RM
, vaadd_vx_b
, OP_SSS_B
, H1
, H1
, aadd32
)
2412 RVVCALL(OPIVX2_RM
, vaadd_vx_h
, OP_SSS_H
, H2
, H2
, aadd32
)
2413 RVVCALL(OPIVX2_RM
, vaadd_vx_w
, OP_SSS_W
, H4
, H4
, aadd32
)
2414 RVVCALL(OPIVX2_RM
, vaadd_vx_d
, OP_SSS_D
, H8
, H8
, aadd64
)
2415 GEN_VEXT_VX_RM(vaadd_vx_b
)
2416 GEN_VEXT_VX_RM(vaadd_vx_h
)
2417 GEN_VEXT_VX_RM(vaadd_vx_w
)
2418 GEN_VEXT_VX_RM(vaadd_vx_d
)
2420 static inline uint32_t aaddu32(CPURISCVState
*env
, int vxrm
,
2421 uint32_t a
, uint32_t b
)
2423 uint64_t res
= (uint64_t)a
+ b
;
2424 uint8_t round
= get_round(vxrm
, res
, 1);
2426 return (res
>> 1) + round
;
2429 static inline uint64_t aaddu64(CPURISCVState
*env
, int vxrm
,
2430 uint64_t a
, uint64_t b
)
2432 uint64_t res
= a
+ b
;
2433 uint8_t round
= get_round(vxrm
, res
, 1);
2434 uint64_t over
= (uint64_t)(res
< a
) << 63;
2436 return ((res
>> 1) | over
) + round
;
2439 RVVCALL(OPIVV2_RM
, vaaddu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, aaddu32
)
2440 RVVCALL(OPIVV2_RM
, vaaddu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, aaddu32
)
2441 RVVCALL(OPIVV2_RM
, vaaddu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, aaddu32
)
2442 RVVCALL(OPIVV2_RM
, vaaddu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, aaddu64
)
2443 GEN_VEXT_VV_RM(vaaddu_vv_b
)
2444 GEN_VEXT_VV_RM(vaaddu_vv_h
)
2445 GEN_VEXT_VV_RM(vaaddu_vv_w
)
2446 GEN_VEXT_VV_RM(vaaddu_vv_d
)
2448 RVVCALL(OPIVX2_RM
, vaaddu_vx_b
, OP_UUU_B
, H1
, H1
, aaddu32
)
2449 RVVCALL(OPIVX2_RM
, vaaddu_vx_h
, OP_UUU_H
, H2
, H2
, aaddu32
)
2450 RVVCALL(OPIVX2_RM
, vaaddu_vx_w
, OP_UUU_W
, H4
, H4
, aaddu32
)
2451 RVVCALL(OPIVX2_RM
, vaaddu_vx_d
, OP_UUU_D
, H8
, H8
, aaddu64
)
2452 GEN_VEXT_VX_RM(vaaddu_vx_b
)
2453 GEN_VEXT_VX_RM(vaaddu_vx_h
)
2454 GEN_VEXT_VX_RM(vaaddu_vx_w
)
2455 GEN_VEXT_VX_RM(vaaddu_vx_d
)
2457 static inline int32_t asub32(CPURISCVState
*env
, int vxrm
, int32_t a
, int32_t b
)
2459 int64_t res
= (int64_t)a
- b
;
2460 uint8_t round
= get_round(vxrm
, res
, 1);
2462 return (res
>> 1) + round
;
2465 static inline int64_t asub64(CPURISCVState
*env
, int vxrm
, int64_t a
, int64_t b
)
2467 int64_t res
= (int64_t)a
- b
;
2468 uint8_t round
= get_round(vxrm
, res
, 1);
2469 int64_t over
= (res
^ a
) & (a
^ b
) & INT64_MIN
;
2471 /* With signed overflow, bit 64 is inverse of bit 63. */
2472 return ((res
>> 1) ^ over
) + round
;
2475 RVVCALL(OPIVV2_RM
, vasub_vv_b
, OP_SSS_B
, H1
, H1
, H1
, asub32
)
2476 RVVCALL(OPIVV2_RM
, vasub_vv_h
, OP_SSS_H
, H2
, H2
, H2
, asub32
)
2477 RVVCALL(OPIVV2_RM
, vasub_vv_w
, OP_SSS_W
, H4
, H4
, H4
, asub32
)
2478 RVVCALL(OPIVV2_RM
, vasub_vv_d
, OP_SSS_D
, H8
, H8
, H8
, asub64
)
2479 GEN_VEXT_VV_RM(vasub_vv_b
)
2480 GEN_VEXT_VV_RM(vasub_vv_h
)
2481 GEN_VEXT_VV_RM(vasub_vv_w
)
2482 GEN_VEXT_VV_RM(vasub_vv_d
)
2484 RVVCALL(OPIVX2_RM
, vasub_vx_b
, OP_SSS_B
, H1
, H1
, asub32
)
2485 RVVCALL(OPIVX2_RM
, vasub_vx_h
, OP_SSS_H
, H2
, H2
, asub32
)
2486 RVVCALL(OPIVX2_RM
, vasub_vx_w
, OP_SSS_W
, H4
, H4
, asub32
)
2487 RVVCALL(OPIVX2_RM
, vasub_vx_d
, OP_SSS_D
, H8
, H8
, asub64
)
2488 GEN_VEXT_VX_RM(vasub_vx_b
)
2489 GEN_VEXT_VX_RM(vasub_vx_h
)
2490 GEN_VEXT_VX_RM(vasub_vx_w
)
2491 GEN_VEXT_VX_RM(vasub_vx_d
)
2493 static inline uint32_t asubu32(CPURISCVState
*env
, int vxrm
,
2494 uint32_t a
, uint32_t b
)
2496 int64_t res
= (int64_t)a
- b
;
2497 uint8_t round
= get_round(vxrm
, res
, 1);
2499 return (res
>> 1) + round
;
2502 static inline uint64_t asubu64(CPURISCVState
*env
, int vxrm
,
2503 uint64_t a
, uint64_t b
)
2505 uint64_t res
= (uint64_t)a
- b
;
2506 uint8_t round
= get_round(vxrm
, res
, 1);
2507 uint64_t over
= (uint64_t)(res
> a
) << 63;
2509 return ((res
>> 1) | over
) + round
;
2512 RVVCALL(OPIVV2_RM
, vasubu_vv_b
, OP_UUU_B
, H1
, H1
, H1
, asubu32
)
2513 RVVCALL(OPIVV2_RM
, vasubu_vv_h
, OP_UUU_H
, H2
, H2
, H2
, asubu32
)
2514 RVVCALL(OPIVV2_RM
, vasubu_vv_w
, OP_UUU_W
, H4
, H4
, H4
, asubu32
)
2515 RVVCALL(OPIVV2_RM
, vasubu_vv_d
, OP_UUU_D
, H8
, H8
, H8
, asubu64
)
2516 GEN_VEXT_VV_RM(vasubu_vv_b
)
2517 GEN_VEXT_VV_RM(vasubu_vv_h
)
2518 GEN_VEXT_VV_RM(vasubu_vv_w
)
2519 GEN_VEXT_VV_RM(vasubu_vv_d
)
2521 RVVCALL(OPIVX2_RM
, vasubu_vx_b
, OP_UUU_B
, H1
, H1
, asubu32
)
2522 RVVCALL(OPIVX2_RM
, vasubu_vx_h
, OP_UUU_H
, H2
, H2
, asubu32
)
2523 RVVCALL(OPIVX2_RM
, vasubu_vx_w
, OP_UUU_W
, H4
, H4
, asubu32
)
2524 RVVCALL(OPIVX2_RM
, vasubu_vx_d
, OP_UUU_D
, H8
, H8
, asubu64
)
2525 GEN_VEXT_VX_RM(vasubu_vx_b
)
2526 GEN_VEXT_VX_RM(vasubu_vx_h
)
2527 GEN_VEXT_VX_RM(vasubu_vx_w
)
2528 GEN_VEXT_VX_RM(vasubu_vx_d
)
2530 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2531 static inline int8_t vsmul8(CPURISCVState
*env
, int vxrm
, int8_t a
, int8_t b
)
2536 res
= (int16_t)a
* (int16_t)b
;
2537 round
= get_round(vxrm
, res
, 7);
2538 res
= (res
>> 7) + round
;
2540 if (res
> INT8_MAX
) {
2543 } else if (res
< INT8_MIN
) {
2551 static int16_t vsmul16(CPURISCVState
*env
, int vxrm
, int16_t a
, int16_t b
)
2556 res
= (int32_t)a
* (int32_t)b
;
2557 round
= get_round(vxrm
, res
, 15);
2558 res
= (res
>> 15) + round
;
2560 if (res
> INT16_MAX
) {
2563 } else if (res
< INT16_MIN
) {
2571 static int32_t vsmul32(CPURISCVState
*env
, int vxrm
, int32_t a
, int32_t b
)
2576 res
= (int64_t)a
* (int64_t)b
;
2577 round
= get_round(vxrm
, res
, 31);
2578 res
= (res
>> 31) + round
;
2580 if (res
> INT32_MAX
) {
2583 } else if (res
< INT32_MIN
) {
2591 static int64_t vsmul64(CPURISCVState
*env
, int vxrm
, int64_t a
, int64_t b
)
2594 uint64_t hi_64
, lo_64
;
2597 if (a
== INT64_MIN
&& b
== INT64_MIN
) {
2602 muls64(&lo_64
, &hi_64
, a
, b
);
2603 round
= get_round(vxrm
, lo_64
, 63);
2605 * Cannot overflow, as there are always
2606 * 2 sign bits after multiply.
2608 res
= (hi_64
<< 1) | (lo_64
>> 63);
2610 if (res
== INT64_MAX
) {
2619 RVVCALL(OPIVV2_RM
, vsmul_vv_b
, OP_SSS_B
, H1
, H1
, H1
, vsmul8
)
2620 RVVCALL(OPIVV2_RM
, vsmul_vv_h
, OP_SSS_H
, H2
, H2
, H2
, vsmul16
)
2621 RVVCALL(OPIVV2_RM
, vsmul_vv_w
, OP_SSS_W
, H4
, H4
, H4
, vsmul32
)
2622 RVVCALL(OPIVV2_RM
, vsmul_vv_d
, OP_SSS_D
, H8
, H8
, H8
, vsmul64
)
2623 GEN_VEXT_VV_RM(vsmul_vv_b
)
2624 GEN_VEXT_VV_RM(vsmul_vv_h
)
2625 GEN_VEXT_VV_RM(vsmul_vv_w
)
2626 GEN_VEXT_VV_RM(vsmul_vv_d
)
2628 RVVCALL(OPIVX2_RM
, vsmul_vx_b
, OP_SSS_B
, H1
, H1
, vsmul8
)
2629 RVVCALL(OPIVX2_RM
, vsmul_vx_h
, OP_SSS_H
, H2
, H2
, vsmul16
)
2630 RVVCALL(OPIVX2_RM
, vsmul_vx_w
, OP_SSS_W
, H4
, H4
, vsmul32
)
2631 RVVCALL(OPIVX2_RM
, vsmul_vx_d
, OP_SSS_D
, H8
, H8
, vsmul64
)
2632 GEN_VEXT_VX_RM(vsmul_vx_b
)
2633 GEN_VEXT_VX_RM(vsmul_vx_h
)
2634 GEN_VEXT_VX_RM(vsmul_vx_w
)
2635 GEN_VEXT_VX_RM(vsmul_vx_d
)
2637 /* Vector Single-Width Scaling Shift Instructions */
2638 static inline uint8_t
2639 vssrl8(CPURISCVState
*env
, int vxrm
, uint8_t a
, uint8_t b
)
2641 uint8_t round
, shift
= b
& 0x7;
2644 round
= get_round(vxrm
, a
, shift
);
2645 res
= (a
>> shift
) + round
;
2648 static inline uint16_t
2649 vssrl16(CPURISCVState
*env
, int vxrm
, uint16_t a
, uint16_t b
)
2651 uint8_t round
, shift
= b
& 0xf;
2654 round
= get_round(vxrm
, a
, shift
);
2655 res
= (a
>> shift
) + round
;
2658 static inline uint32_t
2659 vssrl32(CPURISCVState
*env
, int vxrm
, uint32_t a
, uint32_t b
)
2661 uint8_t round
, shift
= b
& 0x1f;
2664 round
= get_round(vxrm
, a
, shift
);
2665 res
= (a
>> shift
) + round
;
2668 static inline uint64_t
2669 vssrl64(CPURISCVState
*env
, int vxrm
, uint64_t a
, uint64_t b
)
2671 uint8_t round
, shift
= b
& 0x3f;
2674 round
= get_round(vxrm
, a
, shift
);
2675 res
= (a
>> shift
) + round
;
2678 RVVCALL(OPIVV2_RM
, vssrl_vv_b
, OP_UUU_B
, H1
, H1
, H1
, vssrl8
)
2679 RVVCALL(OPIVV2_RM
, vssrl_vv_h
, OP_UUU_H
, H2
, H2
, H2
, vssrl16
)
2680 RVVCALL(OPIVV2_RM
, vssrl_vv_w
, OP_UUU_W
, H4
, H4
, H4
, vssrl32
)
2681 RVVCALL(OPIVV2_RM
, vssrl_vv_d
, OP_UUU_D
, H8
, H8
, H8
, vssrl64
)
2682 GEN_VEXT_VV_RM(vssrl_vv_b
)
2683 GEN_VEXT_VV_RM(vssrl_vv_h
)
2684 GEN_VEXT_VV_RM(vssrl_vv_w
)
2685 GEN_VEXT_VV_RM(vssrl_vv_d
)
2687 RVVCALL(OPIVX2_RM
, vssrl_vx_b
, OP_UUU_B
, H1
, H1
, vssrl8
)
2688 RVVCALL(OPIVX2_RM
, vssrl_vx_h
, OP_UUU_H
, H2
, H2
, vssrl16
)
2689 RVVCALL(OPIVX2_RM
, vssrl_vx_w
, OP_UUU_W
, H4
, H4
, vssrl32
)
2690 RVVCALL(OPIVX2_RM
, vssrl_vx_d
, OP_UUU_D
, H8
, H8
, vssrl64
)
2691 GEN_VEXT_VX_RM(vssrl_vx_b
)
2692 GEN_VEXT_VX_RM(vssrl_vx_h
)
2693 GEN_VEXT_VX_RM(vssrl_vx_w
)
2694 GEN_VEXT_VX_RM(vssrl_vx_d
)
2696 static inline int8_t
2697 vssra8(CPURISCVState
*env
, int vxrm
, int8_t a
, int8_t b
)
2699 uint8_t round
, shift
= b
& 0x7;
2702 round
= get_round(vxrm
, a
, shift
);
2703 res
= (a
>> shift
) + round
;
2706 static inline int16_t
2707 vssra16(CPURISCVState
*env
, int vxrm
, int16_t a
, int16_t b
)
2709 uint8_t round
, shift
= b
& 0xf;
2712 round
= get_round(vxrm
, a
, shift
);
2713 res
= (a
>> shift
) + round
;
2716 static inline int32_t
2717 vssra32(CPURISCVState
*env
, int vxrm
, int32_t a
, int32_t b
)
2719 uint8_t round
, shift
= b
& 0x1f;
2722 round
= get_round(vxrm
, a
, shift
);
2723 res
= (a
>> shift
) + round
;
2726 static inline int64_t
2727 vssra64(CPURISCVState
*env
, int vxrm
, int64_t a
, int64_t b
)
2729 uint8_t round
, shift
= b
& 0x3f;
2732 round
= get_round(vxrm
, a
, shift
);
2733 res
= (a
>> shift
) + round
;
2737 RVVCALL(OPIVV2_RM
, vssra_vv_b
, OP_SSS_B
, H1
, H1
, H1
, vssra8
)
2738 RVVCALL(OPIVV2_RM
, vssra_vv_h
, OP_SSS_H
, H2
, H2
, H2
, vssra16
)
2739 RVVCALL(OPIVV2_RM
, vssra_vv_w
, OP_SSS_W
, H4
, H4
, H4
, vssra32
)
2740 RVVCALL(OPIVV2_RM
, vssra_vv_d
, OP_SSS_D
, H8
, H8
, H8
, vssra64
)
2741 GEN_VEXT_VV_RM(vssra_vv_b
)
2742 GEN_VEXT_VV_RM(vssra_vv_h
)
2743 GEN_VEXT_VV_RM(vssra_vv_w
)
2744 GEN_VEXT_VV_RM(vssra_vv_d
)
2746 RVVCALL(OPIVX2_RM
, vssra_vx_b
, OP_SSS_B
, H1
, H1
, vssra8
)
2747 RVVCALL(OPIVX2_RM
, vssra_vx_h
, OP_SSS_H
, H2
, H2
, vssra16
)
2748 RVVCALL(OPIVX2_RM
, vssra_vx_w
, OP_SSS_W
, H4
, H4
, vssra32
)
2749 RVVCALL(OPIVX2_RM
, vssra_vx_d
, OP_SSS_D
, H8
, H8
, vssra64
)
2750 GEN_VEXT_VX_RM(vssra_vx_b
)
2751 GEN_VEXT_VX_RM(vssra_vx_h
)
2752 GEN_VEXT_VX_RM(vssra_vx_w
)
2753 GEN_VEXT_VX_RM(vssra_vx_d
)
2755 /* Vector Narrowing Fixed-Point Clip Instructions */
2756 static inline int8_t
2757 vnclip8(CPURISCVState
*env
, int vxrm
, int16_t a
, int8_t b
)
2759 uint8_t round
, shift
= b
& 0xf;
2762 round
= get_round(vxrm
, a
, shift
);
2763 res
= (a
>> shift
) + round
;
2764 if (res
> INT8_MAX
) {
2767 } else if (res
< INT8_MIN
) {
2775 static inline int16_t
2776 vnclip16(CPURISCVState
*env
, int vxrm
, int32_t a
, int16_t b
)
2778 uint8_t round
, shift
= b
& 0x1f;
2781 round
= get_round(vxrm
, a
, shift
);
2782 res
= (a
>> shift
) + round
;
2783 if (res
> INT16_MAX
) {
2786 } else if (res
< INT16_MIN
) {
2794 static inline int32_t
2795 vnclip32(CPURISCVState
*env
, int vxrm
, int64_t a
, int32_t b
)
2797 uint8_t round
, shift
= b
& 0x3f;
2800 round
= get_round(vxrm
, a
, shift
);
2801 res
= (a
>> shift
) + round
;
2802 if (res
> INT32_MAX
) {
2805 } else if (res
< INT32_MIN
) {
2813 RVVCALL(OPIVV2_RM
, vnclip_wv_b
, NOP_SSS_B
, H1
, H2
, H1
, vnclip8
)
2814 RVVCALL(OPIVV2_RM
, vnclip_wv_h
, NOP_SSS_H
, H2
, H4
, H2
, vnclip16
)
2815 RVVCALL(OPIVV2_RM
, vnclip_wv_w
, NOP_SSS_W
, H4
, H8
, H4
, vnclip32
)
2816 GEN_VEXT_VV_RM(vnclip_wv_b
)
2817 GEN_VEXT_VV_RM(vnclip_wv_h
)
2818 GEN_VEXT_VV_RM(vnclip_wv_w
)
2820 RVVCALL(OPIVX2_RM
, vnclip_wx_b
, NOP_SSS_B
, H1
, H2
, vnclip8
)
2821 RVVCALL(OPIVX2_RM
, vnclip_wx_h
, NOP_SSS_H
, H2
, H4
, vnclip16
)
2822 RVVCALL(OPIVX2_RM
, vnclip_wx_w
, NOP_SSS_W
, H4
, H8
, vnclip32
)
2823 GEN_VEXT_VX_RM(vnclip_wx_b
)
2824 GEN_VEXT_VX_RM(vnclip_wx_h
)
2825 GEN_VEXT_VX_RM(vnclip_wx_w
)
2827 static inline uint8_t
2828 vnclipu8(CPURISCVState
*env
, int vxrm
, uint16_t a
, uint8_t b
)
2830 uint8_t round
, shift
= b
& 0xf;
2833 round
= get_round(vxrm
, a
, shift
);
2834 res
= (a
>> shift
) + round
;
2835 if (res
> UINT8_MAX
) {
2843 static inline uint16_t
2844 vnclipu16(CPURISCVState
*env
, int vxrm
, uint32_t a
, uint16_t b
)
2846 uint8_t round
, shift
= b
& 0x1f;
2849 round
= get_round(vxrm
, a
, shift
);
2850 res
= (a
>> shift
) + round
;
2851 if (res
> UINT16_MAX
) {
2859 static inline uint32_t
2860 vnclipu32(CPURISCVState
*env
, int vxrm
, uint64_t a
, uint32_t b
)
2862 uint8_t round
, shift
= b
& 0x3f;
2865 round
= get_round(vxrm
, a
, shift
);
2866 res
= (a
>> shift
) + round
;
2867 if (res
> UINT32_MAX
) {
2875 RVVCALL(OPIVV2_RM
, vnclipu_wv_b
, NOP_UUU_B
, H1
, H2
, H1
, vnclipu8
)
2876 RVVCALL(OPIVV2_RM
, vnclipu_wv_h
, NOP_UUU_H
, H2
, H4
, H2
, vnclipu16
)
2877 RVVCALL(OPIVV2_RM
, vnclipu_wv_w
, NOP_UUU_W
, H4
, H8
, H4
, vnclipu32
)
2878 GEN_VEXT_VV_RM(vnclipu_wv_b
)
2879 GEN_VEXT_VV_RM(vnclipu_wv_h
)
2880 GEN_VEXT_VV_RM(vnclipu_wv_w
)
2882 RVVCALL(OPIVX2_RM
, vnclipu_wx_b
, NOP_UUU_B
, H1
, H2
, vnclipu8
)
2883 RVVCALL(OPIVX2_RM
, vnclipu_wx_h
, NOP_UUU_H
, H2
, H4
, vnclipu16
)
2884 RVVCALL(OPIVX2_RM
, vnclipu_wx_w
, NOP_UUU_W
, H4
, H8
, vnclipu32
)
2885 GEN_VEXT_VX_RM(vnclipu_wx_b
)
2886 GEN_VEXT_VX_RM(vnclipu_wx_h
)
2887 GEN_VEXT_VX_RM(vnclipu_wx_w
)
2890 *** Vector Float Point Arithmetic Instructions
2892 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2893 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2894 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2895 CPURISCVState *env) \
2897 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2898 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2899 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2902 #define GEN_VEXT_VV_ENV(NAME) \
2903 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2904 void *vs2, CPURISCVState *env, \
2907 uint32_t vm = vext_vm(desc); \
2908 uint32_t vl = env->vl; \
2911 for (i = env->vstart; i < vl; i++) { \
2912 if (!vm && !vext_elem_mask(v0, i)) { \
2915 do_##NAME(vd, vs1, vs2, i, env); \
2920 RVVCALL(OPFVV2
, vfadd_vv_h
, OP_UUU_H
, H2
, H2
, H2
, float16_add
)
2921 RVVCALL(OPFVV2
, vfadd_vv_w
, OP_UUU_W
, H4
, H4
, H4
, float32_add
)
2922 RVVCALL(OPFVV2
, vfadd_vv_d
, OP_UUU_D
, H8
, H8
, H8
, float64_add
)
2923 GEN_VEXT_VV_ENV(vfadd_vv_h
)
2924 GEN_VEXT_VV_ENV(vfadd_vv_w
)
2925 GEN_VEXT_VV_ENV(vfadd_vv_d
)
2927 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2928 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2929 CPURISCVState *env) \
2931 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2932 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2935 #define GEN_VEXT_VF(NAME) \
2936 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2937 void *vs2, CPURISCVState *env, \
2940 uint32_t vm = vext_vm(desc); \
2941 uint32_t vl = env->vl; \
2944 for (i = env->vstart; i < vl; i++) { \
2945 if (!vm && !vext_elem_mask(v0, i)) { \
2948 do_##NAME(vd, s1, vs2, i, env); \
2953 RVVCALL(OPFVF2
, vfadd_vf_h
, OP_UUU_H
, H2
, H2
, float16_add
)
2954 RVVCALL(OPFVF2
, vfadd_vf_w
, OP_UUU_W
, H4
, H4
, float32_add
)
2955 RVVCALL(OPFVF2
, vfadd_vf_d
, OP_UUU_D
, H8
, H8
, float64_add
)
2956 GEN_VEXT_VF(vfadd_vf_h
)
2957 GEN_VEXT_VF(vfadd_vf_w
)
2958 GEN_VEXT_VF(vfadd_vf_d
)
2960 RVVCALL(OPFVV2
, vfsub_vv_h
, OP_UUU_H
, H2
, H2
, H2
, float16_sub
)
2961 RVVCALL(OPFVV2
, vfsub_vv_w
, OP_UUU_W
, H4
, H4
, H4
, float32_sub
)
2962 RVVCALL(OPFVV2
, vfsub_vv_d
, OP_UUU_D
, H8
, H8
, H8
, float64_sub
)
2963 GEN_VEXT_VV_ENV(vfsub_vv_h
)
2964 GEN_VEXT_VV_ENV(vfsub_vv_w
)
2965 GEN_VEXT_VV_ENV(vfsub_vv_d
)
2966 RVVCALL(OPFVF2
, vfsub_vf_h
, OP_UUU_H
, H2
, H2
, float16_sub
)
2967 RVVCALL(OPFVF2
, vfsub_vf_w
, OP_UUU_W
, H4
, H4
, float32_sub
)
2968 RVVCALL(OPFVF2
, vfsub_vf_d
, OP_UUU_D
, H8
, H8
, float64_sub
)
2969 GEN_VEXT_VF(vfsub_vf_h
)
2970 GEN_VEXT_VF(vfsub_vf_w
)
2971 GEN_VEXT_VF(vfsub_vf_d
)
2973 static uint16_t float16_rsub(uint16_t a
, uint16_t b
, float_status
*s
)
2975 return float16_sub(b
, a
, s
);
2978 static uint32_t float32_rsub(uint32_t a
, uint32_t b
, float_status
*s
)
2980 return float32_sub(b
, a
, s
);
2983 static uint64_t float64_rsub(uint64_t a
, uint64_t b
, float_status
*s
)
2985 return float64_sub(b
, a
, s
);
2988 RVVCALL(OPFVF2
, vfrsub_vf_h
, OP_UUU_H
, H2
, H2
, float16_rsub
)
2989 RVVCALL(OPFVF2
, vfrsub_vf_w
, OP_UUU_W
, H4
, H4
, float32_rsub
)
2990 RVVCALL(OPFVF2
, vfrsub_vf_d
, OP_UUU_D
, H8
, H8
, float64_rsub
)
2991 GEN_VEXT_VF(vfrsub_vf_h
)
2992 GEN_VEXT_VF(vfrsub_vf_w
)
2993 GEN_VEXT_VF(vfrsub_vf_d
)
2995 /* Vector Widening Floating-Point Add/Subtract Instructions */
2996 static uint32_t vfwadd16(uint16_t a
, uint16_t b
, float_status
*s
)
2998 return float32_add(float16_to_float32(a
, true, s
),
2999 float16_to_float32(b
, true, s
), s
);
3002 static uint64_t vfwadd32(uint32_t a
, uint32_t b
, float_status
*s
)
3004 return float64_add(float32_to_float64(a
, s
),
3005 float32_to_float64(b
, s
), s
);
3009 RVVCALL(OPFVV2
, vfwadd_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, vfwadd16
)
3010 RVVCALL(OPFVV2
, vfwadd_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, vfwadd32
)
3011 GEN_VEXT_VV_ENV(vfwadd_vv_h
)
3012 GEN_VEXT_VV_ENV(vfwadd_vv_w
)
3013 RVVCALL(OPFVF2
, vfwadd_vf_h
, WOP_UUU_H
, H4
, H2
, vfwadd16
)
3014 RVVCALL(OPFVF2
, vfwadd_vf_w
, WOP_UUU_W
, H8
, H4
, vfwadd32
)
3015 GEN_VEXT_VF(vfwadd_vf_h
)
3016 GEN_VEXT_VF(vfwadd_vf_w
)
3018 static uint32_t vfwsub16(uint16_t a
, uint16_t b
, float_status
*s
)
3020 return float32_sub(float16_to_float32(a
, true, s
),
3021 float16_to_float32(b
, true, s
), s
);
3024 static uint64_t vfwsub32(uint32_t a
, uint32_t b
, float_status
*s
)
3026 return float64_sub(float32_to_float64(a
, s
),
3027 float32_to_float64(b
, s
), s
);
3031 RVVCALL(OPFVV2
, vfwsub_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, vfwsub16
)
3032 RVVCALL(OPFVV2
, vfwsub_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, vfwsub32
)
3033 GEN_VEXT_VV_ENV(vfwsub_vv_h
)
3034 GEN_VEXT_VV_ENV(vfwsub_vv_w
)
3035 RVVCALL(OPFVF2
, vfwsub_vf_h
, WOP_UUU_H
, H4
, H2
, vfwsub16
)
3036 RVVCALL(OPFVF2
, vfwsub_vf_w
, WOP_UUU_W
, H8
, H4
, vfwsub32
)
3037 GEN_VEXT_VF(vfwsub_vf_h
)
3038 GEN_VEXT_VF(vfwsub_vf_w
)
3040 static uint32_t vfwaddw16(uint32_t a
, uint16_t b
, float_status
*s
)
3042 return float32_add(a
, float16_to_float32(b
, true, s
), s
);
3045 static uint64_t vfwaddw32(uint64_t a
, uint32_t b
, float_status
*s
)
3047 return float64_add(a
, float32_to_float64(b
, s
), s
);
3050 RVVCALL(OPFVV2
, vfwadd_wv_h
, WOP_WUUU_H
, H4
, H2
, H2
, vfwaddw16
)
3051 RVVCALL(OPFVV2
, vfwadd_wv_w
, WOP_WUUU_W
, H8
, H4
, H4
, vfwaddw32
)
3052 GEN_VEXT_VV_ENV(vfwadd_wv_h
)
3053 GEN_VEXT_VV_ENV(vfwadd_wv_w
)
3054 RVVCALL(OPFVF2
, vfwadd_wf_h
, WOP_WUUU_H
, H4
, H2
, vfwaddw16
)
3055 RVVCALL(OPFVF2
, vfwadd_wf_w
, WOP_WUUU_W
, H8
, H4
, vfwaddw32
)
3056 GEN_VEXT_VF(vfwadd_wf_h
)
3057 GEN_VEXT_VF(vfwadd_wf_w
)
3059 static uint32_t vfwsubw16(uint32_t a
, uint16_t b
, float_status
*s
)
3061 return float32_sub(a
, float16_to_float32(b
, true, s
), s
);
3064 static uint64_t vfwsubw32(uint64_t a
, uint32_t b
, float_status
*s
)
3066 return float64_sub(a
, float32_to_float64(b
, s
), s
);
3069 RVVCALL(OPFVV2
, vfwsub_wv_h
, WOP_WUUU_H
, H4
, H2
, H2
, vfwsubw16
)
3070 RVVCALL(OPFVV2
, vfwsub_wv_w
, WOP_WUUU_W
, H8
, H4
, H4
, vfwsubw32
)
3071 GEN_VEXT_VV_ENV(vfwsub_wv_h
)
3072 GEN_VEXT_VV_ENV(vfwsub_wv_w
)
3073 RVVCALL(OPFVF2
, vfwsub_wf_h
, WOP_WUUU_H
, H4
, H2
, vfwsubw16
)
3074 RVVCALL(OPFVF2
, vfwsub_wf_w
, WOP_WUUU_W
, H8
, H4
, vfwsubw32
)
3075 GEN_VEXT_VF(vfwsub_wf_h
)
3076 GEN_VEXT_VF(vfwsub_wf_w
)
3078 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3079 RVVCALL(OPFVV2
, vfmul_vv_h
, OP_UUU_H
, H2
, H2
, H2
, float16_mul
)
3080 RVVCALL(OPFVV2
, vfmul_vv_w
, OP_UUU_W
, H4
, H4
, H4
, float32_mul
)
3081 RVVCALL(OPFVV2
, vfmul_vv_d
, OP_UUU_D
, H8
, H8
, H8
, float64_mul
)
3082 GEN_VEXT_VV_ENV(vfmul_vv_h
)
3083 GEN_VEXT_VV_ENV(vfmul_vv_w
)
3084 GEN_VEXT_VV_ENV(vfmul_vv_d
)
3085 RVVCALL(OPFVF2
, vfmul_vf_h
, OP_UUU_H
, H2
, H2
, float16_mul
)
3086 RVVCALL(OPFVF2
, vfmul_vf_w
, OP_UUU_W
, H4
, H4
, float32_mul
)
3087 RVVCALL(OPFVF2
, vfmul_vf_d
, OP_UUU_D
, H8
, H8
, float64_mul
)
3088 GEN_VEXT_VF(vfmul_vf_h
)
3089 GEN_VEXT_VF(vfmul_vf_w
)
3090 GEN_VEXT_VF(vfmul_vf_d
)
3092 RVVCALL(OPFVV2
, vfdiv_vv_h
, OP_UUU_H
, H2
, H2
, H2
, float16_div
)
3093 RVVCALL(OPFVV2
, vfdiv_vv_w
, OP_UUU_W
, H4
, H4
, H4
, float32_div
)
3094 RVVCALL(OPFVV2
, vfdiv_vv_d
, OP_UUU_D
, H8
, H8
, H8
, float64_div
)
3095 GEN_VEXT_VV_ENV(vfdiv_vv_h
)
3096 GEN_VEXT_VV_ENV(vfdiv_vv_w
)
3097 GEN_VEXT_VV_ENV(vfdiv_vv_d
)
3098 RVVCALL(OPFVF2
, vfdiv_vf_h
, OP_UUU_H
, H2
, H2
, float16_div
)
3099 RVVCALL(OPFVF2
, vfdiv_vf_w
, OP_UUU_W
, H4
, H4
, float32_div
)
3100 RVVCALL(OPFVF2
, vfdiv_vf_d
, OP_UUU_D
, H8
, H8
, float64_div
)
3101 GEN_VEXT_VF(vfdiv_vf_h
)
3102 GEN_VEXT_VF(vfdiv_vf_w
)
3103 GEN_VEXT_VF(vfdiv_vf_d
)
3105 static uint16_t float16_rdiv(uint16_t a
, uint16_t b
, float_status
*s
)
3107 return float16_div(b
, a
, s
);
3110 static uint32_t float32_rdiv(uint32_t a
, uint32_t b
, float_status
*s
)
3112 return float32_div(b
, a
, s
);
3115 static uint64_t float64_rdiv(uint64_t a
, uint64_t b
, float_status
*s
)
3117 return float64_div(b
, a
, s
);
3120 RVVCALL(OPFVF2
, vfrdiv_vf_h
, OP_UUU_H
, H2
, H2
, float16_rdiv
)
3121 RVVCALL(OPFVF2
, vfrdiv_vf_w
, OP_UUU_W
, H4
, H4
, float32_rdiv
)
3122 RVVCALL(OPFVF2
, vfrdiv_vf_d
, OP_UUU_D
, H8
, H8
, float64_rdiv
)
3123 GEN_VEXT_VF(vfrdiv_vf_h
)
3124 GEN_VEXT_VF(vfrdiv_vf_w
)
3125 GEN_VEXT_VF(vfrdiv_vf_d
)
3127 /* Vector Widening Floating-Point Multiply */
3128 static uint32_t vfwmul16(uint16_t a
, uint16_t b
, float_status
*s
)
3130 return float32_mul(float16_to_float32(a
, true, s
),
3131 float16_to_float32(b
, true, s
), s
);
3134 static uint64_t vfwmul32(uint32_t a
, uint32_t b
, float_status
*s
)
3136 return float64_mul(float32_to_float64(a
, s
),
3137 float32_to_float64(b
, s
), s
);
3140 RVVCALL(OPFVV2
, vfwmul_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, vfwmul16
)
3141 RVVCALL(OPFVV2
, vfwmul_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, vfwmul32
)
3142 GEN_VEXT_VV_ENV(vfwmul_vv_h
)
3143 GEN_VEXT_VV_ENV(vfwmul_vv_w
)
3144 RVVCALL(OPFVF2
, vfwmul_vf_h
, WOP_UUU_H
, H4
, H2
, vfwmul16
)
3145 RVVCALL(OPFVF2
, vfwmul_vf_w
, WOP_UUU_W
, H8
, H4
, vfwmul32
)
3146 GEN_VEXT_VF(vfwmul_vf_h
)
3147 GEN_VEXT_VF(vfwmul_vf_w
)
3149 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3150 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3151 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3152 CPURISCVState *env) \
3154 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3155 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3156 TD d = *((TD *)vd + HD(i)); \
3157 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3160 static uint16_t fmacc16(uint16_t a
, uint16_t b
, uint16_t d
, float_status
*s
)
3162 return float16_muladd(a
, b
, d
, 0, s
);
3165 static uint32_t fmacc32(uint32_t a
, uint32_t b
, uint32_t d
, float_status
*s
)
3167 return float32_muladd(a
, b
, d
, 0, s
);
3170 static uint64_t fmacc64(uint64_t a
, uint64_t b
, uint64_t d
, float_status
*s
)
3172 return float64_muladd(a
, b
, d
, 0, s
);
3175 RVVCALL(OPFVV3
, vfmacc_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fmacc16
)
3176 RVVCALL(OPFVV3
, vfmacc_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fmacc32
)
3177 RVVCALL(OPFVV3
, vfmacc_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fmacc64
)
3178 GEN_VEXT_VV_ENV(vfmacc_vv_h
)
3179 GEN_VEXT_VV_ENV(vfmacc_vv_w
)
3180 GEN_VEXT_VV_ENV(vfmacc_vv_d
)
3182 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3183 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3184 CPURISCVState *env) \
3186 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3187 TD d = *((TD *)vd + HD(i)); \
3188 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3191 RVVCALL(OPFVF3
, vfmacc_vf_h
, OP_UUU_H
, H2
, H2
, fmacc16
)
3192 RVVCALL(OPFVF3
, vfmacc_vf_w
, OP_UUU_W
, H4
, H4
, fmacc32
)
3193 RVVCALL(OPFVF3
, vfmacc_vf_d
, OP_UUU_D
, H8
, H8
, fmacc64
)
3194 GEN_VEXT_VF(vfmacc_vf_h
)
3195 GEN_VEXT_VF(vfmacc_vf_w
)
3196 GEN_VEXT_VF(vfmacc_vf_d
)
3198 static uint16_t fnmacc16(uint16_t a
, uint16_t b
, uint16_t d
, float_status
*s
)
3200 return float16_muladd(a
, b
, d
,
3201 float_muladd_negate_c
| float_muladd_negate_product
, s
);
3204 static uint32_t fnmacc32(uint32_t a
, uint32_t b
, uint32_t d
, float_status
*s
)
3206 return float32_muladd(a
, b
, d
,
3207 float_muladd_negate_c
| float_muladd_negate_product
, s
);
3210 static uint64_t fnmacc64(uint64_t a
, uint64_t b
, uint64_t d
, float_status
*s
)
3212 return float64_muladd(a
, b
, d
,
3213 float_muladd_negate_c
| float_muladd_negate_product
, s
);
3216 RVVCALL(OPFVV3
, vfnmacc_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fnmacc16
)
3217 RVVCALL(OPFVV3
, vfnmacc_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fnmacc32
)
3218 RVVCALL(OPFVV3
, vfnmacc_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fnmacc64
)
3219 GEN_VEXT_VV_ENV(vfnmacc_vv_h
)
3220 GEN_VEXT_VV_ENV(vfnmacc_vv_w
)
3221 GEN_VEXT_VV_ENV(vfnmacc_vv_d
)
3222 RVVCALL(OPFVF3
, vfnmacc_vf_h
, OP_UUU_H
, H2
, H2
, fnmacc16
)
3223 RVVCALL(OPFVF3
, vfnmacc_vf_w
, OP_UUU_W
, H4
, H4
, fnmacc32
)
3224 RVVCALL(OPFVF3
, vfnmacc_vf_d
, OP_UUU_D
, H8
, H8
, fnmacc64
)
3225 GEN_VEXT_VF(vfnmacc_vf_h
)
3226 GEN_VEXT_VF(vfnmacc_vf_w
)
3227 GEN_VEXT_VF(vfnmacc_vf_d
)
3229 static uint16_t fmsac16(uint16_t a
, uint16_t b
, uint16_t d
, float_status
*s
)
3231 return float16_muladd(a
, b
, d
, float_muladd_negate_c
, s
);
3234 static uint32_t fmsac32(uint32_t a
, uint32_t b
, uint32_t d
, float_status
*s
)
3236 return float32_muladd(a
, b
, d
, float_muladd_negate_c
, s
);
3239 static uint64_t fmsac64(uint64_t a
, uint64_t b
, uint64_t d
, float_status
*s
)
3241 return float64_muladd(a
, b
, d
, float_muladd_negate_c
, s
);
3244 RVVCALL(OPFVV3
, vfmsac_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fmsac16
)
3245 RVVCALL(OPFVV3
, vfmsac_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fmsac32
)
3246 RVVCALL(OPFVV3
, vfmsac_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fmsac64
)
3247 GEN_VEXT_VV_ENV(vfmsac_vv_h
)
3248 GEN_VEXT_VV_ENV(vfmsac_vv_w
)
3249 GEN_VEXT_VV_ENV(vfmsac_vv_d
)
3250 RVVCALL(OPFVF3
, vfmsac_vf_h
, OP_UUU_H
, H2
, H2
, fmsac16
)
3251 RVVCALL(OPFVF3
, vfmsac_vf_w
, OP_UUU_W
, H4
, H4
, fmsac32
)
3252 RVVCALL(OPFVF3
, vfmsac_vf_d
, OP_UUU_D
, H8
, H8
, fmsac64
)
3253 GEN_VEXT_VF(vfmsac_vf_h
)
3254 GEN_VEXT_VF(vfmsac_vf_w
)
3255 GEN_VEXT_VF(vfmsac_vf_d
)
3257 static uint16_t fnmsac16(uint16_t a
, uint16_t b
, uint16_t d
, float_status
*s
)
3259 return float16_muladd(a
, b
, d
, float_muladd_negate_product
, s
);
3262 static uint32_t fnmsac32(uint32_t a
, uint32_t b
, uint32_t d
, float_status
*s
)
3264 return float32_muladd(a
, b
, d
, float_muladd_negate_product
, s
);
3267 static uint64_t fnmsac64(uint64_t a
, uint64_t b
, uint64_t d
, float_status
*s
)
3269 return float64_muladd(a
, b
, d
, float_muladd_negate_product
, s
);
3272 RVVCALL(OPFVV3
, vfnmsac_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fnmsac16
)
3273 RVVCALL(OPFVV3
, vfnmsac_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fnmsac32
)
3274 RVVCALL(OPFVV3
, vfnmsac_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fnmsac64
)
3275 GEN_VEXT_VV_ENV(vfnmsac_vv_h
)
3276 GEN_VEXT_VV_ENV(vfnmsac_vv_w
)
3277 GEN_VEXT_VV_ENV(vfnmsac_vv_d
)
3278 RVVCALL(OPFVF3
, vfnmsac_vf_h
, OP_UUU_H
, H2
, H2
, fnmsac16
)
3279 RVVCALL(OPFVF3
, vfnmsac_vf_w
, OP_UUU_W
, H4
, H4
, fnmsac32
)
3280 RVVCALL(OPFVF3
, vfnmsac_vf_d
, OP_UUU_D
, H8
, H8
, fnmsac64
)
3281 GEN_VEXT_VF(vfnmsac_vf_h
)
3282 GEN_VEXT_VF(vfnmsac_vf_w
)
3283 GEN_VEXT_VF(vfnmsac_vf_d
)
3285 static uint16_t fmadd16(uint16_t a
, uint16_t b
, uint16_t d
, float_status
*s
)
3287 return float16_muladd(d
, b
, a
, 0, s
);
3290 static uint32_t fmadd32(uint32_t a
, uint32_t b
, uint32_t d
, float_status
*s
)
3292 return float32_muladd(d
, b
, a
, 0, s
);
3295 static uint64_t fmadd64(uint64_t a
, uint64_t b
, uint64_t d
, float_status
*s
)
3297 return float64_muladd(d
, b
, a
, 0, s
);
3300 RVVCALL(OPFVV3
, vfmadd_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fmadd16
)
3301 RVVCALL(OPFVV3
, vfmadd_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fmadd32
)
3302 RVVCALL(OPFVV3
, vfmadd_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fmadd64
)
3303 GEN_VEXT_VV_ENV(vfmadd_vv_h
)
3304 GEN_VEXT_VV_ENV(vfmadd_vv_w
)
3305 GEN_VEXT_VV_ENV(vfmadd_vv_d
)
3306 RVVCALL(OPFVF3
, vfmadd_vf_h
, OP_UUU_H
, H2
, H2
, fmadd16
)
3307 RVVCALL(OPFVF3
, vfmadd_vf_w
, OP_UUU_W
, H4
, H4
, fmadd32
)
3308 RVVCALL(OPFVF3
, vfmadd_vf_d
, OP_UUU_D
, H8
, H8
, fmadd64
)
3309 GEN_VEXT_VF(vfmadd_vf_h
)
3310 GEN_VEXT_VF(vfmadd_vf_w
)
3311 GEN_VEXT_VF(vfmadd_vf_d
)
3313 static uint16_t fnmadd16(uint16_t a
, uint16_t b
, uint16_t d
, float_status
*s
)
3315 return float16_muladd(d
, b
, a
,
3316 float_muladd_negate_c
| float_muladd_negate_product
, s
);
3319 static uint32_t fnmadd32(uint32_t a
, uint32_t b
, uint32_t d
, float_status
*s
)
3321 return float32_muladd(d
, b
, a
,
3322 float_muladd_negate_c
| float_muladd_negate_product
, s
);
3325 static uint64_t fnmadd64(uint64_t a
, uint64_t b
, uint64_t d
, float_status
*s
)
3327 return float64_muladd(d
, b
, a
,
3328 float_muladd_negate_c
| float_muladd_negate_product
, s
);
3331 RVVCALL(OPFVV3
, vfnmadd_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fnmadd16
)
3332 RVVCALL(OPFVV3
, vfnmadd_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fnmadd32
)
3333 RVVCALL(OPFVV3
, vfnmadd_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fnmadd64
)
3334 GEN_VEXT_VV_ENV(vfnmadd_vv_h
)
3335 GEN_VEXT_VV_ENV(vfnmadd_vv_w
)
3336 GEN_VEXT_VV_ENV(vfnmadd_vv_d
)
3337 RVVCALL(OPFVF3
, vfnmadd_vf_h
, OP_UUU_H
, H2
, H2
, fnmadd16
)
3338 RVVCALL(OPFVF3
, vfnmadd_vf_w
, OP_UUU_W
, H4
, H4
, fnmadd32
)
3339 RVVCALL(OPFVF3
, vfnmadd_vf_d
, OP_UUU_D
, H8
, H8
, fnmadd64
)
3340 GEN_VEXT_VF(vfnmadd_vf_h
)
3341 GEN_VEXT_VF(vfnmadd_vf_w
)
3342 GEN_VEXT_VF(vfnmadd_vf_d
)
3344 static uint16_t fmsub16(uint16_t a
, uint16_t b
, uint16_t d
, float_status
*s
)
3346 return float16_muladd(d
, b
, a
, float_muladd_negate_c
, s
);
3349 static uint32_t fmsub32(uint32_t a
, uint32_t b
, uint32_t d
, float_status
*s
)
3351 return float32_muladd(d
, b
, a
, float_muladd_negate_c
, s
);
3354 static uint64_t fmsub64(uint64_t a
, uint64_t b
, uint64_t d
, float_status
*s
)
3356 return float64_muladd(d
, b
, a
, float_muladd_negate_c
, s
);
3359 RVVCALL(OPFVV3
, vfmsub_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fmsub16
)
3360 RVVCALL(OPFVV3
, vfmsub_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fmsub32
)
3361 RVVCALL(OPFVV3
, vfmsub_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fmsub64
)
3362 GEN_VEXT_VV_ENV(vfmsub_vv_h
)
3363 GEN_VEXT_VV_ENV(vfmsub_vv_w
)
3364 GEN_VEXT_VV_ENV(vfmsub_vv_d
)
3365 RVVCALL(OPFVF3
, vfmsub_vf_h
, OP_UUU_H
, H2
, H2
, fmsub16
)
3366 RVVCALL(OPFVF3
, vfmsub_vf_w
, OP_UUU_W
, H4
, H4
, fmsub32
)
3367 RVVCALL(OPFVF3
, vfmsub_vf_d
, OP_UUU_D
, H8
, H8
, fmsub64
)
3368 GEN_VEXT_VF(vfmsub_vf_h
)
3369 GEN_VEXT_VF(vfmsub_vf_w
)
3370 GEN_VEXT_VF(vfmsub_vf_d
)
3372 static uint16_t fnmsub16(uint16_t a
, uint16_t b
, uint16_t d
, float_status
*s
)
3374 return float16_muladd(d
, b
, a
, float_muladd_negate_product
, s
);
3377 static uint32_t fnmsub32(uint32_t a
, uint32_t b
, uint32_t d
, float_status
*s
)
3379 return float32_muladd(d
, b
, a
, float_muladd_negate_product
, s
);
3382 static uint64_t fnmsub64(uint64_t a
, uint64_t b
, uint64_t d
, float_status
*s
)
3384 return float64_muladd(d
, b
, a
, float_muladd_negate_product
, s
);
3387 RVVCALL(OPFVV3
, vfnmsub_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fnmsub16
)
3388 RVVCALL(OPFVV3
, vfnmsub_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fnmsub32
)
3389 RVVCALL(OPFVV3
, vfnmsub_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fnmsub64
)
3390 GEN_VEXT_VV_ENV(vfnmsub_vv_h
)
3391 GEN_VEXT_VV_ENV(vfnmsub_vv_w
)
3392 GEN_VEXT_VV_ENV(vfnmsub_vv_d
)
3393 RVVCALL(OPFVF3
, vfnmsub_vf_h
, OP_UUU_H
, H2
, H2
, fnmsub16
)
3394 RVVCALL(OPFVF3
, vfnmsub_vf_w
, OP_UUU_W
, H4
, H4
, fnmsub32
)
3395 RVVCALL(OPFVF3
, vfnmsub_vf_d
, OP_UUU_D
, H8
, H8
, fnmsub64
)
3396 GEN_VEXT_VF(vfnmsub_vf_h
)
3397 GEN_VEXT_VF(vfnmsub_vf_w
)
3398 GEN_VEXT_VF(vfnmsub_vf_d
)
3400 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3401 static uint32_t fwmacc16(uint16_t a
, uint16_t b
, uint32_t d
, float_status
*s
)
3403 return float32_muladd(float16_to_float32(a
, true, s
),
3404 float16_to_float32(b
, true, s
), d
, 0, s
);
3407 static uint64_t fwmacc32(uint32_t a
, uint32_t b
, uint64_t d
, float_status
*s
)
3409 return float64_muladd(float32_to_float64(a
, s
),
3410 float32_to_float64(b
, s
), d
, 0, s
);
3413 RVVCALL(OPFVV3
, vfwmacc_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, fwmacc16
)
3414 RVVCALL(OPFVV3
, vfwmacc_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, fwmacc32
)
3415 GEN_VEXT_VV_ENV(vfwmacc_vv_h
)
3416 GEN_VEXT_VV_ENV(vfwmacc_vv_w
)
3417 RVVCALL(OPFVF3
, vfwmacc_vf_h
, WOP_UUU_H
, H4
, H2
, fwmacc16
)
3418 RVVCALL(OPFVF3
, vfwmacc_vf_w
, WOP_UUU_W
, H8
, H4
, fwmacc32
)
3419 GEN_VEXT_VF(vfwmacc_vf_h
)
3420 GEN_VEXT_VF(vfwmacc_vf_w
)
3422 static uint32_t fwnmacc16(uint16_t a
, uint16_t b
, uint32_t d
, float_status
*s
)
3424 return float32_muladd(float16_to_float32(a
, true, s
),
3425 float16_to_float32(b
, true, s
), d
,
3426 float_muladd_negate_c
| float_muladd_negate_product
, s
);
3429 static uint64_t fwnmacc32(uint32_t a
, uint32_t b
, uint64_t d
, float_status
*s
)
3431 return float64_muladd(float32_to_float64(a
, s
),
3432 float32_to_float64(b
, s
), d
,
3433 float_muladd_negate_c
| float_muladd_negate_product
, s
);
3436 RVVCALL(OPFVV3
, vfwnmacc_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, fwnmacc16
)
3437 RVVCALL(OPFVV3
, vfwnmacc_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, fwnmacc32
)
3438 GEN_VEXT_VV_ENV(vfwnmacc_vv_h
)
3439 GEN_VEXT_VV_ENV(vfwnmacc_vv_w
)
3440 RVVCALL(OPFVF3
, vfwnmacc_vf_h
, WOP_UUU_H
, H4
, H2
, fwnmacc16
)
3441 RVVCALL(OPFVF3
, vfwnmacc_vf_w
, WOP_UUU_W
, H8
, H4
, fwnmacc32
)
3442 GEN_VEXT_VF(vfwnmacc_vf_h
)
3443 GEN_VEXT_VF(vfwnmacc_vf_w
)
3445 static uint32_t fwmsac16(uint16_t a
, uint16_t b
, uint32_t d
, float_status
*s
)
3447 return float32_muladd(float16_to_float32(a
, true, s
),
3448 float16_to_float32(b
, true, s
), d
,
3449 float_muladd_negate_c
, s
);
3452 static uint64_t fwmsac32(uint32_t a
, uint32_t b
, uint64_t d
, float_status
*s
)
3454 return float64_muladd(float32_to_float64(a
, s
),
3455 float32_to_float64(b
, s
), d
,
3456 float_muladd_negate_c
, s
);
3459 RVVCALL(OPFVV3
, vfwmsac_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, fwmsac16
)
3460 RVVCALL(OPFVV3
, vfwmsac_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, fwmsac32
)
3461 GEN_VEXT_VV_ENV(vfwmsac_vv_h
)
3462 GEN_VEXT_VV_ENV(vfwmsac_vv_w
)
3463 RVVCALL(OPFVF3
, vfwmsac_vf_h
, WOP_UUU_H
, H4
, H2
, fwmsac16
)
3464 RVVCALL(OPFVF3
, vfwmsac_vf_w
, WOP_UUU_W
, H8
, H4
, fwmsac32
)
3465 GEN_VEXT_VF(vfwmsac_vf_h
)
3466 GEN_VEXT_VF(vfwmsac_vf_w
)
3468 static uint32_t fwnmsac16(uint16_t a
, uint16_t b
, uint32_t d
, float_status
*s
)
3470 return float32_muladd(float16_to_float32(a
, true, s
),
3471 float16_to_float32(b
, true, s
), d
,
3472 float_muladd_negate_product
, s
);
3475 static uint64_t fwnmsac32(uint32_t a
, uint32_t b
, uint64_t d
, float_status
*s
)
3477 return float64_muladd(float32_to_float64(a
, s
),
3478 float32_to_float64(b
, s
), d
,
3479 float_muladd_negate_product
, s
);
3482 RVVCALL(OPFVV3
, vfwnmsac_vv_h
, WOP_UUU_H
, H4
, H2
, H2
, fwnmsac16
)
3483 RVVCALL(OPFVV3
, vfwnmsac_vv_w
, WOP_UUU_W
, H8
, H4
, H4
, fwnmsac32
)
3484 GEN_VEXT_VV_ENV(vfwnmsac_vv_h
)
3485 GEN_VEXT_VV_ENV(vfwnmsac_vv_w
)
3486 RVVCALL(OPFVF3
, vfwnmsac_vf_h
, WOP_UUU_H
, H4
, H2
, fwnmsac16
)
3487 RVVCALL(OPFVF3
, vfwnmsac_vf_w
, WOP_UUU_W
, H8
, H4
, fwnmsac32
)
3488 GEN_VEXT_VF(vfwnmsac_vf_h
)
3489 GEN_VEXT_VF(vfwnmsac_vf_w
)
3491 /* Vector Floating-Point Square-Root Instruction */
3493 #define OP_UU_H uint16_t, uint16_t, uint16_t
3494 #define OP_UU_W uint32_t, uint32_t, uint32_t
3495 #define OP_UU_D uint64_t, uint64_t, uint64_t
3497 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3498 static void do_##NAME(void *vd, void *vs2, int i, \
3499 CPURISCVState *env) \
3501 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3502 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3505 #define GEN_VEXT_V_ENV(NAME) \
3506 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3507 CPURISCVState *env, uint32_t desc) \
3509 uint32_t vm = vext_vm(desc); \
3510 uint32_t vl = env->vl; \
3516 for (i = env->vstart; i < vl; i++) { \
3517 if (!vm && !vext_elem_mask(v0, i)) { \
3520 do_##NAME(vd, vs2, i, env); \
3525 RVVCALL(OPFVV1
, vfsqrt_v_h
, OP_UU_H
, H2
, H2
, float16_sqrt
)
3526 RVVCALL(OPFVV1
, vfsqrt_v_w
, OP_UU_W
, H4
, H4
, float32_sqrt
)
3527 RVVCALL(OPFVV1
, vfsqrt_v_d
, OP_UU_D
, H8
, H8
, float64_sqrt
)
3528 GEN_VEXT_V_ENV(vfsqrt_v_h
)
3529 GEN_VEXT_V_ENV(vfsqrt_v_w
)
3530 GEN_VEXT_V_ENV(vfsqrt_v_d
)
3533 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3535 * Adapted from riscv-v-spec recip.c:
3536 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3538 static uint64_t frsqrt7(uint64_t f
, int exp_size
, int frac_size
)
3540 uint64_t sign
= extract64(f
, frac_size
+ exp_size
, 1);
3541 uint64_t exp
= extract64(f
, frac_size
, exp_size
);
3542 uint64_t frac
= extract64(f
, 0, frac_size
);
3544 const uint8_t lookup_table
[] = {
3545 52, 51, 50, 48, 47, 46, 44, 43,
3546 42, 41, 40, 39, 38, 36, 35, 34,
3547 33, 32, 31, 30, 30, 29, 28, 27,
3548 26, 25, 24, 23, 23, 22, 21, 20,
3549 19, 19, 18, 17, 16, 16, 15, 14,
3550 14, 13, 12, 12, 11, 10, 10, 9,
3551 9, 8, 7, 7, 6, 6, 5, 4,
3552 4, 3, 3, 2, 2, 1, 1, 0,
3553 127, 125, 123, 121, 119, 118, 116, 114,
3554 113, 111, 109, 108, 106, 105, 103, 102,
3555 100, 99, 97, 96, 95, 93, 92, 91,
3556 90, 88, 87, 86, 85, 84, 83, 82,
3557 80, 79, 78, 77, 76, 75, 74, 73,
3558 72, 71, 70, 70, 69, 68, 67, 66,
3559 65, 64, 63, 63, 62, 61, 60, 59,
3560 59, 58, 57, 56, 56, 55, 54, 53
3562 const int precision
= 7;
3564 if (exp
== 0 && frac
!= 0) { /* subnormal */
3565 /* Normalize the subnormal. */
3566 while (extract64(frac
, frac_size
- 1, 1) == 0) {
3571 frac
= (frac
<< 1) & MAKE_64BIT_MASK(0, frac_size
);
3574 int idx
= ((exp
& 1) << (precision
- 1)) |
3575 (frac
>> (frac_size
- precision
+ 1));
3576 uint64_t out_frac
= (uint64_t)(lookup_table
[idx
]) <<
3577 (frac_size
- precision
);
3578 uint64_t out_exp
= (3 * MAKE_64BIT_MASK(0, exp_size
- 1) + ~exp
) / 2;
3581 val
= deposit64(val
, 0, frac_size
, out_frac
);
3582 val
= deposit64(val
, frac_size
, exp_size
, out_exp
);
3583 val
= deposit64(val
, frac_size
+ exp_size
, 1, sign
);
3587 static float16
frsqrt7_h(float16 f
, float_status
*s
)
3589 int exp_size
= 5, frac_size
= 10;
3590 bool sign
= float16_is_neg(f
);
3593 * frsqrt7(sNaN) = canonical NaN
3594 * frsqrt7(-inf) = canonical NaN
3595 * frsqrt7(-normal) = canonical NaN
3596 * frsqrt7(-subnormal) = canonical NaN
3598 if (float16_is_signaling_nan(f
, s
) ||
3599 (float16_is_infinity(f
) && sign
) ||
3600 (float16_is_normal(f
) && sign
) ||
3601 (float16_is_zero_or_denormal(f
) && !float16_is_zero(f
) && sign
)) {
3602 s
->float_exception_flags
|= float_flag_invalid
;
3603 return float16_default_nan(s
);
3606 /* frsqrt7(qNaN) = canonical NaN */
3607 if (float16_is_quiet_nan(f
, s
)) {
3608 return float16_default_nan(s
);
3611 /* frsqrt7(+-0) = +-inf */
3612 if (float16_is_zero(f
)) {
3613 s
->float_exception_flags
|= float_flag_divbyzero
;
3614 return float16_set_sign(float16_infinity
, sign
);
3617 /* frsqrt7(+inf) = +0 */
3618 if (float16_is_infinity(f
) && !sign
) {
3619 return float16_set_sign(float16_zero
, sign
);
3622 /* +normal, +subnormal */
3623 uint64_t val
= frsqrt7(f
, exp_size
, frac_size
);
3624 return make_float16(val
);
3627 static float32
frsqrt7_s(float32 f
, float_status
*s
)
3629 int exp_size
= 8, frac_size
= 23;
3630 bool sign
= float32_is_neg(f
);
3633 * frsqrt7(sNaN) = canonical NaN
3634 * frsqrt7(-inf) = canonical NaN
3635 * frsqrt7(-normal) = canonical NaN
3636 * frsqrt7(-subnormal) = canonical NaN
3638 if (float32_is_signaling_nan(f
, s
) ||
3639 (float32_is_infinity(f
) && sign
) ||
3640 (float32_is_normal(f
) && sign
) ||
3641 (float32_is_zero_or_denormal(f
) && !float32_is_zero(f
) && sign
)) {
3642 s
->float_exception_flags
|= float_flag_invalid
;
3643 return float32_default_nan(s
);
3646 /* frsqrt7(qNaN) = canonical NaN */
3647 if (float32_is_quiet_nan(f
, s
)) {
3648 return float32_default_nan(s
);
3651 /* frsqrt7(+-0) = +-inf */
3652 if (float32_is_zero(f
)) {
3653 s
->float_exception_flags
|= float_flag_divbyzero
;
3654 return float32_set_sign(float32_infinity
, sign
);
3657 /* frsqrt7(+inf) = +0 */
3658 if (float32_is_infinity(f
) && !sign
) {
3659 return float32_set_sign(float32_zero
, sign
);
3662 /* +normal, +subnormal */
3663 uint64_t val
= frsqrt7(f
, exp_size
, frac_size
);
3664 return make_float32(val
);
3667 static float64
frsqrt7_d(float64 f
, float_status
*s
)
3669 int exp_size
= 11, frac_size
= 52;
3670 bool sign
= float64_is_neg(f
);
3673 * frsqrt7(sNaN) = canonical NaN
3674 * frsqrt7(-inf) = canonical NaN
3675 * frsqrt7(-normal) = canonical NaN
3676 * frsqrt7(-subnormal) = canonical NaN
3678 if (float64_is_signaling_nan(f
, s
) ||
3679 (float64_is_infinity(f
) && sign
) ||
3680 (float64_is_normal(f
) && sign
) ||
3681 (float64_is_zero_or_denormal(f
) && !float64_is_zero(f
) && sign
)) {
3682 s
->float_exception_flags
|= float_flag_invalid
;
3683 return float64_default_nan(s
);
3686 /* frsqrt7(qNaN) = canonical NaN */
3687 if (float64_is_quiet_nan(f
, s
)) {
3688 return float64_default_nan(s
);
3691 /* frsqrt7(+-0) = +-inf */
3692 if (float64_is_zero(f
)) {
3693 s
->float_exception_flags
|= float_flag_divbyzero
;
3694 return float64_set_sign(float64_infinity
, sign
);
3697 /* frsqrt7(+inf) = +0 */
3698 if (float64_is_infinity(f
) && !sign
) {
3699 return float64_set_sign(float64_zero
, sign
);
3702 /* +normal, +subnormal */
3703 uint64_t val
= frsqrt7(f
, exp_size
, frac_size
);
3704 return make_float64(val
);
3707 RVVCALL(OPFVV1
, vfrsqrt7_v_h
, OP_UU_H
, H2
, H2
, frsqrt7_h
)
3708 RVVCALL(OPFVV1
, vfrsqrt7_v_w
, OP_UU_W
, H4
, H4
, frsqrt7_s
)
3709 RVVCALL(OPFVV1
, vfrsqrt7_v_d
, OP_UU_D
, H8
, H8
, frsqrt7_d
)
3710 GEN_VEXT_V_ENV(vfrsqrt7_v_h
)
3711 GEN_VEXT_V_ENV(vfrsqrt7_v_w
)
3712 GEN_VEXT_V_ENV(vfrsqrt7_v_d
)
3715 * Vector Floating-Point Reciprocal Estimate Instruction
3717 * Adapted from riscv-v-spec recip.c:
3718 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3720 static uint64_t frec7(uint64_t f
, int exp_size
, int frac_size
,
3723 uint64_t sign
= extract64(f
, frac_size
+ exp_size
, 1);
3724 uint64_t exp
= extract64(f
, frac_size
, exp_size
);
3725 uint64_t frac
= extract64(f
, 0, frac_size
);
3727 const uint8_t lookup_table
[] = {
3728 127, 125, 123, 121, 119, 117, 116, 114,
3729 112, 110, 109, 107, 105, 104, 102, 100,
3730 99, 97, 96, 94, 93, 91, 90, 88,
3731 87, 85, 84, 83, 81, 80, 79, 77,
3732 76, 75, 74, 72, 71, 70, 69, 68,
3733 66, 65, 64, 63, 62, 61, 60, 59,
3734 58, 57, 56, 55, 54, 53, 52, 51,
3735 50, 49, 48, 47, 46, 45, 44, 43,
3736 42, 41, 40, 40, 39, 38, 37, 36,
3737 35, 35, 34, 33, 32, 31, 31, 30,
3738 29, 28, 28, 27, 26, 25, 25, 24,
3739 23, 23, 22, 21, 21, 20, 19, 19,
3740 18, 17, 17, 16, 15, 15, 14, 14,
3741 13, 12, 12, 11, 11, 10, 9, 9,
3742 8, 8, 7, 7, 6, 5, 5, 4,
3743 4, 3, 3, 2, 2, 1, 1, 0
3745 const int precision
= 7;
3747 if (exp
== 0 && frac
!= 0) { /* subnormal */
3748 /* Normalize the subnormal. */
3749 while (extract64(frac
, frac_size
- 1, 1) == 0) {
3754 frac
= (frac
<< 1) & MAKE_64BIT_MASK(0, frac_size
);
3756 if (exp
!= 0 && exp
!= UINT64_MAX
) {
3758 * Overflow to inf or max value of same sign,
3759 * depending on sign and rounding mode.
3761 s
->float_exception_flags
|= (float_flag_inexact
|
3762 float_flag_overflow
);
3764 if ((s
->float_rounding_mode
== float_round_to_zero
) ||
3765 ((s
->float_rounding_mode
== float_round_down
) && !sign
) ||
3766 ((s
->float_rounding_mode
== float_round_up
) && sign
)) {
3767 /* Return greatest/negative finite value. */
3768 return (sign
<< (exp_size
+ frac_size
)) |
3769 (MAKE_64BIT_MASK(frac_size
, exp_size
) - 1);
3772 return (sign
<< (exp_size
+ frac_size
)) |
3773 MAKE_64BIT_MASK(frac_size
, exp_size
);
3778 int idx
= frac
>> (frac_size
- precision
);
3779 uint64_t out_frac
= (uint64_t)(lookup_table
[idx
]) <<
3780 (frac_size
- precision
);
3781 uint64_t out_exp
= 2 * MAKE_64BIT_MASK(0, exp_size
- 1) + ~exp
;
3783 if (out_exp
== 0 || out_exp
== UINT64_MAX
) {
3785 * The result is subnormal, but don't raise the underflow exception,
3786 * because there's no additional loss of precision.
3788 out_frac
= (out_frac
>> 1) | MAKE_64BIT_MASK(frac_size
- 1, 1);
3789 if (out_exp
== UINT64_MAX
) {
3796 val
= deposit64(val
, 0, frac_size
, out_frac
);
3797 val
= deposit64(val
, frac_size
, exp_size
, out_exp
);
3798 val
= deposit64(val
, frac_size
+ exp_size
, 1, sign
);
3802 static float16
frec7_h(float16 f
, float_status
*s
)
3804 int exp_size
= 5, frac_size
= 10;
3805 bool sign
= float16_is_neg(f
);
3807 /* frec7(+-inf) = +-0 */
3808 if (float16_is_infinity(f
)) {
3809 return float16_set_sign(float16_zero
, sign
);
3812 /* frec7(+-0) = +-inf */
3813 if (float16_is_zero(f
)) {
3814 s
->float_exception_flags
|= float_flag_divbyzero
;
3815 return float16_set_sign(float16_infinity
, sign
);
3818 /* frec7(sNaN) = canonical NaN */
3819 if (float16_is_signaling_nan(f
, s
)) {
3820 s
->float_exception_flags
|= float_flag_invalid
;
3821 return float16_default_nan(s
);
3824 /* frec7(qNaN) = canonical NaN */
3825 if (float16_is_quiet_nan(f
, s
)) {
3826 return float16_default_nan(s
);
3829 /* +-normal, +-subnormal */
3830 uint64_t val
= frec7(f
, exp_size
, frac_size
, s
);
3831 return make_float16(val
);
3834 static float32
frec7_s(float32 f
, float_status
*s
)
3836 int exp_size
= 8, frac_size
= 23;
3837 bool sign
= float32_is_neg(f
);
3839 /* frec7(+-inf) = +-0 */
3840 if (float32_is_infinity(f
)) {
3841 return float32_set_sign(float32_zero
, sign
);
3844 /* frec7(+-0) = +-inf */
3845 if (float32_is_zero(f
)) {
3846 s
->float_exception_flags
|= float_flag_divbyzero
;
3847 return float32_set_sign(float32_infinity
, sign
);
3850 /* frec7(sNaN) = canonical NaN */
3851 if (float32_is_signaling_nan(f
, s
)) {
3852 s
->float_exception_flags
|= float_flag_invalid
;
3853 return float32_default_nan(s
);
3856 /* frec7(qNaN) = canonical NaN */
3857 if (float32_is_quiet_nan(f
, s
)) {
3858 return float32_default_nan(s
);
3861 /* +-normal, +-subnormal */
3862 uint64_t val
= frec7(f
, exp_size
, frac_size
, s
);
3863 return make_float32(val
);
3866 static float64
frec7_d(float64 f
, float_status
*s
)
3868 int exp_size
= 11, frac_size
= 52;
3869 bool sign
= float64_is_neg(f
);
3871 /* frec7(+-inf) = +-0 */
3872 if (float64_is_infinity(f
)) {
3873 return float64_set_sign(float64_zero
, sign
);
3876 /* frec7(+-0) = +-inf */
3877 if (float64_is_zero(f
)) {
3878 s
->float_exception_flags
|= float_flag_divbyzero
;
3879 return float64_set_sign(float64_infinity
, sign
);
3882 /* frec7(sNaN) = canonical NaN */
3883 if (float64_is_signaling_nan(f
, s
)) {
3884 s
->float_exception_flags
|= float_flag_invalid
;
3885 return float64_default_nan(s
);
3888 /* frec7(qNaN) = canonical NaN */
3889 if (float64_is_quiet_nan(f
, s
)) {
3890 return float64_default_nan(s
);
3893 /* +-normal, +-subnormal */
3894 uint64_t val
= frec7(f
, exp_size
, frac_size
, s
);
3895 return make_float64(val
);
3898 RVVCALL(OPFVV1
, vfrec7_v_h
, OP_UU_H
, H2
, H2
, frec7_h
)
3899 RVVCALL(OPFVV1
, vfrec7_v_w
, OP_UU_W
, H4
, H4
, frec7_s
)
3900 RVVCALL(OPFVV1
, vfrec7_v_d
, OP_UU_D
, H8
, H8
, frec7_d
)
3901 GEN_VEXT_V_ENV(vfrec7_v_h
)
3902 GEN_VEXT_V_ENV(vfrec7_v_w
)
3903 GEN_VEXT_V_ENV(vfrec7_v_d
)
3905 /* Vector Floating-Point MIN/MAX Instructions */
3906 RVVCALL(OPFVV2
, vfmin_vv_h
, OP_UUU_H
, H2
, H2
, H2
, float16_minimum_number
)
3907 RVVCALL(OPFVV2
, vfmin_vv_w
, OP_UUU_W
, H4
, H4
, H4
, float32_minimum_number
)
3908 RVVCALL(OPFVV2
, vfmin_vv_d
, OP_UUU_D
, H8
, H8
, H8
, float64_minimum_number
)
3909 GEN_VEXT_VV_ENV(vfmin_vv_h
)
3910 GEN_VEXT_VV_ENV(vfmin_vv_w
)
3911 GEN_VEXT_VV_ENV(vfmin_vv_d
)
3912 RVVCALL(OPFVF2
, vfmin_vf_h
, OP_UUU_H
, H2
, H2
, float16_minimum_number
)
3913 RVVCALL(OPFVF2
, vfmin_vf_w
, OP_UUU_W
, H4
, H4
, float32_minimum_number
)
3914 RVVCALL(OPFVF2
, vfmin_vf_d
, OP_UUU_D
, H8
, H8
, float64_minimum_number
)
3915 GEN_VEXT_VF(vfmin_vf_h
)
3916 GEN_VEXT_VF(vfmin_vf_w
)
3917 GEN_VEXT_VF(vfmin_vf_d
)
3919 RVVCALL(OPFVV2
, vfmax_vv_h
, OP_UUU_H
, H2
, H2
, H2
, float16_maximum_number
)
3920 RVVCALL(OPFVV2
, vfmax_vv_w
, OP_UUU_W
, H4
, H4
, H4
, float32_maximum_number
)
3921 RVVCALL(OPFVV2
, vfmax_vv_d
, OP_UUU_D
, H8
, H8
, H8
, float64_maximum_number
)
3922 GEN_VEXT_VV_ENV(vfmax_vv_h
)
3923 GEN_VEXT_VV_ENV(vfmax_vv_w
)
3924 GEN_VEXT_VV_ENV(vfmax_vv_d
)
3925 RVVCALL(OPFVF2
, vfmax_vf_h
, OP_UUU_H
, H2
, H2
, float16_maximum_number
)
3926 RVVCALL(OPFVF2
, vfmax_vf_w
, OP_UUU_W
, H4
, H4
, float32_maximum_number
)
3927 RVVCALL(OPFVF2
, vfmax_vf_d
, OP_UUU_D
, H8
, H8
, float64_maximum_number
)
3928 GEN_VEXT_VF(vfmax_vf_h
)
3929 GEN_VEXT_VF(vfmax_vf_w
)
3930 GEN_VEXT_VF(vfmax_vf_d
)
3932 /* Vector Floating-Point Sign-Injection Instructions */
3933 static uint16_t fsgnj16(uint16_t a
, uint16_t b
, float_status
*s
)
3935 return deposit64(b
, 0, 15, a
);
3938 static uint32_t fsgnj32(uint32_t a
, uint32_t b
, float_status
*s
)
3940 return deposit64(b
, 0, 31, a
);
3943 static uint64_t fsgnj64(uint64_t a
, uint64_t b
, float_status
*s
)
3945 return deposit64(b
, 0, 63, a
);
3948 RVVCALL(OPFVV2
, vfsgnj_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fsgnj16
)
3949 RVVCALL(OPFVV2
, vfsgnj_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fsgnj32
)
3950 RVVCALL(OPFVV2
, vfsgnj_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fsgnj64
)
3951 GEN_VEXT_VV_ENV(vfsgnj_vv_h
)
3952 GEN_VEXT_VV_ENV(vfsgnj_vv_w
)
3953 GEN_VEXT_VV_ENV(vfsgnj_vv_d
)
3954 RVVCALL(OPFVF2
, vfsgnj_vf_h
, OP_UUU_H
, H2
, H2
, fsgnj16
)
3955 RVVCALL(OPFVF2
, vfsgnj_vf_w
, OP_UUU_W
, H4
, H4
, fsgnj32
)
3956 RVVCALL(OPFVF2
, vfsgnj_vf_d
, OP_UUU_D
, H8
, H8
, fsgnj64
)
3957 GEN_VEXT_VF(vfsgnj_vf_h
)
3958 GEN_VEXT_VF(vfsgnj_vf_w
)
3959 GEN_VEXT_VF(vfsgnj_vf_d
)
3961 static uint16_t fsgnjn16(uint16_t a
, uint16_t b
, float_status
*s
)
3963 return deposit64(~b
, 0, 15, a
);
3966 static uint32_t fsgnjn32(uint32_t a
, uint32_t b
, float_status
*s
)
3968 return deposit64(~b
, 0, 31, a
);
3971 static uint64_t fsgnjn64(uint64_t a
, uint64_t b
, float_status
*s
)
3973 return deposit64(~b
, 0, 63, a
);
3976 RVVCALL(OPFVV2
, vfsgnjn_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fsgnjn16
)
3977 RVVCALL(OPFVV2
, vfsgnjn_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fsgnjn32
)
3978 RVVCALL(OPFVV2
, vfsgnjn_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fsgnjn64
)
3979 GEN_VEXT_VV_ENV(vfsgnjn_vv_h
)
3980 GEN_VEXT_VV_ENV(vfsgnjn_vv_w
)
3981 GEN_VEXT_VV_ENV(vfsgnjn_vv_d
)
3982 RVVCALL(OPFVF2
, vfsgnjn_vf_h
, OP_UUU_H
, H2
, H2
, fsgnjn16
)
3983 RVVCALL(OPFVF2
, vfsgnjn_vf_w
, OP_UUU_W
, H4
, H4
, fsgnjn32
)
3984 RVVCALL(OPFVF2
, vfsgnjn_vf_d
, OP_UUU_D
, H8
, H8
, fsgnjn64
)
3985 GEN_VEXT_VF(vfsgnjn_vf_h
)
3986 GEN_VEXT_VF(vfsgnjn_vf_w
)
3987 GEN_VEXT_VF(vfsgnjn_vf_d
)
3989 static uint16_t fsgnjx16(uint16_t a
, uint16_t b
, float_status
*s
)
3991 return deposit64(b
^ a
, 0, 15, a
);
3994 static uint32_t fsgnjx32(uint32_t a
, uint32_t b
, float_status
*s
)
3996 return deposit64(b
^ a
, 0, 31, a
);
3999 static uint64_t fsgnjx64(uint64_t a
, uint64_t b
, float_status
*s
)
4001 return deposit64(b
^ a
, 0, 63, a
);
4004 RVVCALL(OPFVV2
, vfsgnjx_vv_h
, OP_UUU_H
, H2
, H2
, H2
, fsgnjx16
)
4005 RVVCALL(OPFVV2
, vfsgnjx_vv_w
, OP_UUU_W
, H4
, H4
, H4
, fsgnjx32
)
4006 RVVCALL(OPFVV2
, vfsgnjx_vv_d
, OP_UUU_D
, H8
, H8
, H8
, fsgnjx64
)
4007 GEN_VEXT_VV_ENV(vfsgnjx_vv_h
)
4008 GEN_VEXT_VV_ENV(vfsgnjx_vv_w
)
4009 GEN_VEXT_VV_ENV(vfsgnjx_vv_d
)
4010 RVVCALL(OPFVF2
, vfsgnjx_vf_h
, OP_UUU_H
, H2
, H2
, fsgnjx16
)
4011 RVVCALL(OPFVF2
, vfsgnjx_vf_w
, OP_UUU_W
, H4
, H4
, fsgnjx32
)
4012 RVVCALL(OPFVF2
, vfsgnjx_vf_d
, OP_UUU_D
, H8
, H8
, fsgnjx64
)
4013 GEN_VEXT_VF(vfsgnjx_vf_h
)
4014 GEN_VEXT_VF(vfsgnjx_vf_w
)
4015 GEN_VEXT_VF(vfsgnjx_vf_d
)
4017 /* Vector Floating-Point Compare Instructions */
4018 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4019 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4020 CPURISCVState *env, uint32_t desc) \
4022 uint32_t vm = vext_vm(desc); \
4023 uint32_t vl = env->vl; \
4026 for (i = env->vstart; i < vl; i++) { \
4027 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4028 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4029 if (!vm && !vext_elem_mask(v0, i)) { \
4032 vext_set_elem_mask(vd, i, \
4033 DO_OP(s2, s1, &env->fp_status)); \
4038 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h
, uint16_t, H2
, float16_eq_quiet
)
4039 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w
, uint32_t, H4
, float32_eq_quiet
)
4040 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d
, uint64_t, H8
, float64_eq_quiet
)
4042 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4043 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4044 CPURISCVState *env, uint32_t desc) \
4046 uint32_t vm = vext_vm(desc); \
4047 uint32_t vl = env->vl; \
4050 for (i = env->vstart; i < vl; i++) { \
4051 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4052 if (!vm && !vext_elem_mask(v0, i)) { \
4055 vext_set_elem_mask(vd, i, \
4056 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4061 GEN_VEXT_CMP_VF(vmfeq_vf_h
, uint16_t, H2
, float16_eq_quiet
)
4062 GEN_VEXT_CMP_VF(vmfeq_vf_w
, uint32_t, H4
, float32_eq_quiet
)
4063 GEN_VEXT_CMP_VF(vmfeq_vf_d
, uint64_t, H8
, float64_eq_quiet
)
4065 static bool vmfne16(uint16_t a
, uint16_t b
, float_status
*s
)
4067 FloatRelation compare
= float16_compare_quiet(a
, b
, s
);
4068 return compare
!= float_relation_equal
;
4071 static bool vmfne32(uint32_t a
, uint32_t b
, float_status
*s
)
4073 FloatRelation compare
= float32_compare_quiet(a
, b
, s
);
4074 return compare
!= float_relation_equal
;
4077 static bool vmfne64(uint64_t a
, uint64_t b
, float_status
*s
)
4079 FloatRelation compare
= float64_compare_quiet(a
, b
, s
);
4080 return compare
!= float_relation_equal
;
4083 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h
, uint16_t, H2
, vmfne16
)
4084 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w
, uint32_t, H4
, vmfne32
)
4085 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d
, uint64_t, H8
, vmfne64
)
4086 GEN_VEXT_CMP_VF(vmfne_vf_h
, uint16_t, H2
, vmfne16
)
4087 GEN_VEXT_CMP_VF(vmfne_vf_w
, uint32_t, H4
, vmfne32
)
4088 GEN_VEXT_CMP_VF(vmfne_vf_d
, uint64_t, H8
, vmfne64
)
4090 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h
, uint16_t, H2
, float16_lt
)
4091 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w
, uint32_t, H4
, float32_lt
)
4092 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d
, uint64_t, H8
, float64_lt
)
4093 GEN_VEXT_CMP_VF(vmflt_vf_h
, uint16_t, H2
, float16_lt
)
4094 GEN_VEXT_CMP_VF(vmflt_vf_w
, uint32_t, H4
, float32_lt
)
4095 GEN_VEXT_CMP_VF(vmflt_vf_d
, uint64_t, H8
, float64_lt
)
4097 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h
, uint16_t, H2
, float16_le
)
4098 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w
, uint32_t, H4
, float32_le
)
4099 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d
, uint64_t, H8
, float64_le
)
4100 GEN_VEXT_CMP_VF(vmfle_vf_h
, uint16_t, H2
, float16_le
)
4101 GEN_VEXT_CMP_VF(vmfle_vf_w
, uint32_t, H4
, float32_le
)
4102 GEN_VEXT_CMP_VF(vmfle_vf_d
, uint64_t, H8
, float64_le
)
4104 static bool vmfgt16(uint16_t a
, uint16_t b
, float_status
*s
)
4106 FloatRelation compare
= float16_compare(a
, b
, s
);
4107 return compare
== float_relation_greater
;
4110 static bool vmfgt32(uint32_t a
, uint32_t b
, float_status
*s
)
4112 FloatRelation compare
= float32_compare(a
, b
, s
);
4113 return compare
== float_relation_greater
;
4116 static bool vmfgt64(uint64_t a
, uint64_t b
, float_status
*s
)
4118 FloatRelation compare
= float64_compare(a
, b
, s
);
4119 return compare
== float_relation_greater
;
4122 GEN_VEXT_CMP_VF(vmfgt_vf_h
, uint16_t, H2
, vmfgt16
)
4123 GEN_VEXT_CMP_VF(vmfgt_vf_w
, uint32_t, H4
, vmfgt32
)
4124 GEN_VEXT_CMP_VF(vmfgt_vf_d
, uint64_t, H8
, vmfgt64
)
4126 static bool vmfge16(uint16_t a
, uint16_t b
, float_status
*s
)
4128 FloatRelation compare
= float16_compare(a
, b
, s
);
4129 return compare
== float_relation_greater
||
4130 compare
== float_relation_equal
;
4133 static bool vmfge32(uint32_t a
, uint32_t b
, float_status
*s
)
4135 FloatRelation compare
= float32_compare(a
, b
, s
);
4136 return compare
== float_relation_greater
||
4137 compare
== float_relation_equal
;
4140 static bool vmfge64(uint64_t a
, uint64_t b
, float_status
*s
)
4142 FloatRelation compare
= float64_compare(a
, b
, s
);
4143 return compare
== float_relation_greater
||
4144 compare
== float_relation_equal
;
4147 GEN_VEXT_CMP_VF(vmfge_vf_h
, uint16_t, H2
, vmfge16
)
4148 GEN_VEXT_CMP_VF(vmfge_vf_w
, uint32_t, H4
, vmfge32
)
4149 GEN_VEXT_CMP_VF(vmfge_vf_d
, uint64_t, H8
, vmfge64
)
4151 /* Vector Floating-Point Classify Instruction */
4152 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4153 static void do_##NAME(void *vd, void *vs2, int i) \
4155 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4156 *((TD *)vd + HD(i)) = OP(s2); \
4159 #define GEN_VEXT_V(NAME) \
4160 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4161 CPURISCVState *env, uint32_t desc) \
4163 uint32_t vm = vext_vm(desc); \
4164 uint32_t vl = env->vl; \
4167 for (i = env->vstart; i < vl; i++) { \
4168 if (!vm && !vext_elem_mask(v0, i)) { \
4171 do_##NAME(vd, vs2, i); \
4176 target_ulong
fclass_h(uint64_t frs1
)
4179 bool sign
= float16_is_neg(f
);
4181 if (float16_is_infinity(f
)) {
4182 return sign
? 1 << 0 : 1 << 7;
4183 } else if (float16_is_zero(f
)) {
4184 return sign
? 1 << 3 : 1 << 4;
4185 } else if (float16_is_zero_or_denormal(f
)) {
4186 return sign
? 1 << 2 : 1 << 5;
4187 } else if (float16_is_any_nan(f
)) {
4188 float_status s
= { }; /* for snan_bit_is_one */
4189 return float16_is_quiet_nan(f
, &s
) ? 1 << 9 : 1 << 8;
4191 return sign
? 1 << 1 : 1 << 6;
4195 target_ulong
fclass_s(uint64_t frs1
)
4198 bool sign
= float32_is_neg(f
);
4200 if (float32_is_infinity(f
)) {
4201 return sign
? 1 << 0 : 1 << 7;
4202 } else if (float32_is_zero(f
)) {
4203 return sign
? 1 << 3 : 1 << 4;
4204 } else if (float32_is_zero_or_denormal(f
)) {
4205 return sign
? 1 << 2 : 1 << 5;
4206 } else if (float32_is_any_nan(f
)) {
4207 float_status s
= { }; /* for snan_bit_is_one */
4208 return float32_is_quiet_nan(f
, &s
) ? 1 << 9 : 1 << 8;
4210 return sign
? 1 << 1 : 1 << 6;
4214 target_ulong
fclass_d(uint64_t frs1
)
4217 bool sign
= float64_is_neg(f
);
4219 if (float64_is_infinity(f
)) {
4220 return sign
? 1 << 0 : 1 << 7;
4221 } else if (float64_is_zero(f
)) {
4222 return sign
? 1 << 3 : 1 << 4;
4223 } else if (float64_is_zero_or_denormal(f
)) {
4224 return sign
? 1 << 2 : 1 << 5;
4225 } else if (float64_is_any_nan(f
)) {
4226 float_status s
= { }; /* for snan_bit_is_one */
4227 return float64_is_quiet_nan(f
, &s
) ? 1 << 9 : 1 << 8;
4229 return sign
? 1 << 1 : 1 << 6;
4233 RVVCALL(OPIVV1
, vfclass_v_h
, OP_UU_H
, H2
, H2
, fclass_h
)
4234 RVVCALL(OPIVV1
, vfclass_v_w
, OP_UU_W
, H4
, H4
, fclass_s
)
4235 RVVCALL(OPIVV1
, vfclass_v_d
, OP_UU_D
, H8
, H8
, fclass_d
)
4236 GEN_VEXT_V(vfclass_v_h
)
4237 GEN_VEXT_V(vfclass_v_w
)
4238 GEN_VEXT_V(vfclass_v_d
)
4240 /* Vector Floating-Point Merge Instruction */
4241 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4242 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4243 CPURISCVState *env, uint32_t desc) \
4245 uint32_t vm = vext_vm(desc); \
4246 uint32_t vl = env->vl; \
4249 for (i = env->vstart; i < vl; i++) { \
4250 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4251 *((ETYPE *)vd + H(i)) \
4252 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4257 GEN_VFMERGE_VF(vfmerge_vfm_h
, int16_t, H2
)
4258 GEN_VFMERGE_VF(vfmerge_vfm_w
, int32_t, H4
)
4259 GEN_VFMERGE_VF(vfmerge_vfm_d
, int64_t, H8
)
4261 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4262 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4263 RVVCALL(OPFVV1
, vfcvt_xu_f_v_h
, OP_UU_H
, H2
, H2
, float16_to_uint16
)
4264 RVVCALL(OPFVV1
, vfcvt_xu_f_v_w
, OP_UU_W
, H4
, H4
, float32_to_uint32
)
4265 RVVCALL(OPFVV1
, vfcvt_xu_f_v_d
, OP_UU_D
, H8
, H8
, float64_to_uint64
)
4266 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h
)
4267 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w
)
4268 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d
)
4270 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4271 RVVCALL(OPFVV1
, vfcvt_x_f_v_h
, OP_UU_H
, H2
, H2
, float16_to_int16
)
4272 RVVCALL(OPFVV1
, vfcvt_x_f_v_w
, OP_UU_W
, H4
, H4
, float32_to_int32
)
4273 RVVCALL(OPFVV1
, vfcvt_x_f_v_d
, OP_UU_D
, H8
, H8
, float64_to_int64
)
4274 GEN_VEXT_V_ENV(vfcvt_x_f_v_h
)
4275 GEN_VEXT_V_ENV(vfcvt_x_f_v_w
)
4276 GEN_VEXT_V_ENV(vfcvt_x_f_v_d
)
4278 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4279 RVVCALL(OPFVV1
, vfcvt_f_xu_v_h
, OP_UU_H
, H2
, H2
, uint16_to_float16
)
4280 RVVCALL(OPFVV1
, vfcvt_f_xu_v_w
, OP_UU_W
, H4
, H4
, uint32_to_float32
)
4281 RVVCALL(OPFVV1
, vfcvt_f_xu_v_d
, OP_UU_D
, H8
, H8
, uint64_to_float64
)
4282 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h
)
4283 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w
)
4284 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d
)
4286 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4287 RVVCALL(OPFVV1
, vfcvt_f_x_v_h
, OP_UU_H
, H2
, H2
, int16_to_float16
)
4288 RVVCALL(OPFVV1
, vfcvt_f_x_v_w
, OP_UU_W
, H4
, H4
, int32_to_float32
)
4289 RVVCALL(OPFVV1
, vfcvt_f_x_v_d
, OP_UU_D
, H8
, H8
, int64_to_float64
)
4290 GEN_VEXT_V_ENV(vfcvt_f_x_v_h
)
4291 GEN_VEXT_V_ENV(vfcvt_f_x_v_w
)
4292 GEN_VEXT_V_ENV(vfcvt_f_x_v_d
)
4294 /* Widening Floating-Point/Integer Type-Convert Instructions */
4296 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4297 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4298 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4299 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4300 RVVCALL(OPFVV1
, vfwcvt_xu_f_v_h
, WOP_UU_H
, H4
, H2
, float16_to_uint32
)
4301 RVVCALL(OPFVV1
, vfwcvt_xu_f_v_w
, WOP_UU_W
, H8
, H4
, float32_to_uint64
)
4302 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h
)
4303 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w
)
4305 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4306 RVVCALL(OPFVV1
, vfwcvt_x_f_v_h
, WOP_UU_H
, H4
, H2
, float16_to_int32
)
4307 RVVCALL(OPFVV1
, vfwcvt_x_f_v_w
, WOP_UU_W
, H8
, H4
, float32_to_int64
)
4308 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h
)
4309 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w
)
4311 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4312 RVVCALL(OPFVV1
, vfwcvt_f_xu_v_b
, WOP_UU_B
, H2
, H1
, uint8_to_float16
)
4313 RVVCALL(OPFVV1
, vfwcvt_f_xu_v_h
, WOP_UU_H
, H4
, H2
, uint16_to_float32
)
4314 RVVCALL(OPFVV1
, vfwcvt_f_xu_v_w
, WOP_UU_W
, H8
, H4
, uint32_to_float64
)
4315 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b
)
4316 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h
)
4317 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w
)
4319 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4320 RVVCALL(OPFVV1
, vfwcvt_f_x_v_b
, WOP_UU_B
, H2
, H1
, int8_to_float16
)
4321 RVVCALL(OPFVV1
, vfwcvt_f_x_v_h
, WOP_UU_H
, H4
, H2
, int16_to_float32
)
4322 RVVCALL(OPFVV1
, vfwcvt_f_x_v_w
, WOP_UU_W
, H8
, H4
, int32_to_float64
)
4323 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b
)
4324 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h
)
4325 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w
)
4328 * vfwcvt.f.f.v vd, vs2, vm
4329 * Convert single-width float to double-width float.
4331 static uint32_t vfwcvtffv16(uint16_t a
, float_status
*s
)
4333 return float16_to_float32(a
, true, s
);
4336 RVVCALL(OPFVV1
, vfwcvt_f_f_v_h
, WOP_UU_H
, H4
, H2
, vfwcvtffv16
)
4337 RVVCALL(OPFVV1
, vfwcvt_f_f_v_w
, WOP_UU_W
, H8
, H4
, float32_to_float64
)
4338 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h
)
4339 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w
)
4341 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4343 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4344 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4345 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4346 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4347 RVVCALL(OPFVV1
, vfncvt_xu_f_w_b
, NOP_UU_B
, H1
, H2
, float16_to_uint8
)
4348 RVVCALL(OPFVV1
, vfncvt_xu_f_w_h
, NOP_UU_H
, H2
, H4
, float32_to_uint16
)
4349 RVVCALL(OPFVV1
, vfncvt_xu_f_w_w
, NOP_UU_W
, H4
, H8
, float64_to_uint32
)
4350 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b
)
4351 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h
)
4352 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w
)
4354 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4355 RVVCALL(OPFVV1
, vfncvt_x_f_w_b
, NOP_UU_B
, H1
, H2
, float16_to_int8
)
4356 RVVCALL(OPFVV1
, vfncvt_x_f_w_h
, NOP_UU_H
, H2
, H4
, float32_to_int16
)
4357 RVVCALL(OPFVV1
, vfncvt_x_f_w_w
, NOP_UU_W
, H4
, H8
, float64_to_int32
)
4358 GEN_VEXT_V_ENV(vfncvt_x_f_w_b
)
4359 GEN_VEXT_V_ENV(vfncvt_x_f_w_h
)
4360 GEN_VEXT_V_ENV(vfncvt_x_f_w_w
)
4362 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4363 RVVCALL(OPFVV1
, vfncvt_f_xu_w_h
, NOP_UU_H
, H2
, H4
, uint32_to_float16
)
4364 RVVCALL(OPFVV1
, vfncvt_f_xu_w_w
, NOP_UU_W
, H4
, H8
, uint64_to_float32
)
4365 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h
)
4366 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w
)
4368 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4369 RVVCALL(OPFVV1
, vfncvt_f_x_w_h
, NOP_UU_H
, H2
, H4
, int32_to_float16
)
4370 RVVCALL(OPFVV1
, vfncvt_f_x_w_w
, NOP_UU_W
, H4
, H8
, int64_to_float32
)
4371 GEN_VEXT_V_ENV(vfncvt_f_x_w_h
)
4372 GEN_VEXT_V_ENV(vfncvt_f_x_w_w
)
4374 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4375 static uint16_t vfncvtffv16(uint32_t a
, float_status
*s
)
4377 return float32_to_float16(a
, true, s
);
4380 RVVCALL(OPFVV1
, vfncvt_f_f_w_h
, NOP_UU_H
, H2
, H4
, vfncvtffv16
)
4381 RVVCALL(OPFVV1
, vfncvt_f_f_w_w
, NOP_UU_W
, H4
, H8
, float64_to_float32
)
4382 GEN_VEXT_V_ENV(vfncvt_f_f_w_h
)
4383 GEN_VEXT_V_ENV(vfncvt_f_f_w_w
)
4386 *** Vector Reduction Operations
4388 /* Vector Single-Width Integer Reduction Instructions */
4389 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4390 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4391 void *vs2, CPURISCVState *env, uint32_t desc) \
4393 uint32_t vm = vext_vm(desc); \
4394 uint32_t vl = env->vl; \
4396 TD s1 = *((TD *)vs1 + HD(0)); \
4398 for (i = env->vstart; i < vl; i++) { \
4399 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4400 if (!vm && !vext_elem_mask(v0, i)) { \
4403 s1 = OP(s1, (TD)s2); \
4405 *((TD *)vd + HD(0)) = s1; \
4409 /* vd[0] = sum(vs1[0], vs2[*]) */
4410 GEN_VEXT_RED(vredsum_vs_b
, int8_t, int8_t, H1
, H1
, DO_ADD
)
4411 GEN_VEXT_RED(vredsum_vs_h
, int16_t, int16_t, H2
, H2
, DO_ADD
)
4412 GEN_VEXT_RED(vredsum_vs_w
, int32_t, int32_t, H4
, H4
, DO_ADD
)
4413 GEN_VEXT_RED(vredsum_vs_d
, int64_t, int64_t, H8
, H8
, DO_ADD
)
4415 /* vd[0] = maxu(vs1[0], vs2[*]) */
4416 GEN_VEXT_RED(vredmaxu_vs_b
, uint8_t, uint8_t, H1
, H1
, DO_MAX
)
4417 GEN_VEXT_RED(vredmaxu_vs_h
, uint16_t, uint16_t, H2
, H2
, DO_MAX
)
4418 GEN_VEXT_RED(vredmaxu_vs_w
, uint32_t, uint32_t, H4
, H4
, DO_MAX
)
4419 GEN_VEXT_RED(vredmaxu_vs_d
, uint64_t, uint64_t, H8
, H8
, DO_MAX
)
4421 /* vd[0] = max(vs1[0], vs2[*]) */
4422 GEN_VEXT_RED(vredmax_vs_b
, int8_t, int8_t, H1
, H1
, DO_MAX
)
4423 GEN_VEXT_RED(vredmax_vs_h
, int16_t, int16_t, H2
, H2
, DO_MAX
)
4424 GEN_VEXT_RED(vredmax_vs_w
, int32_t, int32_t, H4
, H4
, DO_MAX
)
4425 GEN_VEXT_RED(vredmax_vs_d
, int64_t, int64_t, H8
, H8
, DO_MAX
)
4427 /* vd[0] = minu(vs1[0], vs2[*]) */
4428 GEN_VEXT_RED(vredminu_vs_b
, uint8_t, uint8_t, H1
, H1
, DO_MIN
)
4429 GEN_VEXT_RED(vredminu_vs_h
, uint16_t, uint16_t, H2
, H2
, DO_MIN
)
4430 GEN_VEXT_RED(vredminu_vs_w
, uint32_t, uint32_t, H4
, H4
, DO_MIN
)
4431 GEN_VEXT_RED(vredminu_vs_d
, uint64_t, uint64_t, H8
, H8
, DO_MIN
)
4433 /* vd[0] = min(vs1[0], vs2[*]) */
4434 GEN_VEXT_RED(vredmin_vs_b
, int8_t, int8_t, H1
, H1
, DO_MIN
)
4435 GEN_VEXT_RED(vredmin_vs_h
, int16_t, int16_t, H2
, H2
, DO_MIN
)
4436 GEN_VEXT_RED(vredmin_vs_w
, int32_t, int32_t, H4
, H4
, DO_MIN
)
4437 GEN_VEXT_RED(vredmin_vs_d
, int64_t, int64_t, H8
, H8
, DO_MIN
)
4439 /* vd[0] = and(vs1[0], vs2[*]) */
4440 GEN_VEXT_RED(vredand_vs_b
, int8_t, int8_t, H1
, H1
, DO_AND
)
4441 GEN_VEXT_RED(vredand_vs_h
, int16_t, int16_t, H2
, H2
, DO_AND
)
4442 GEN_VEXT_RED(vredand_vs_w
, int32_t, int32_t, H4
, H4
, DO_AND
)
4443 GEN_VEXT_RED(vredand_vs_d
, int64_t, int64_t, H8
, H8
, DO_AND
)
4445 /* vd[0] = or(vs1[0], vs2[*]) */
4446 GEN_VEXT_RED(vredor_vs_b
, int8_t, int8_t, H1
, H1
, DO_OR
)
4447 GEN_VEXT_RED(vredor_vs_h
, int16_t, int16_t, H2
, H2
, DO_OR
)
4448 GEN_VEXT_RED(vredor_vs_w
, int32_t, int32_t, H4
, H4
, DO_OR
)
4449 GEN_VEXT_RED(vredor_vs_d
, int64_t, int64_t, H8
, H8
, DO_OR
)
4451 /* vd[0] = xor(vs1[0], vs2[*]) */
4452 GEN_VEXT_RED(vredxor_vs_b
, int8_t, int8_t, H1
, H1
, DO_XOR
)
4453 GEN_VEXT_RED(vredxor_vs_h
, int16_t, int16_t, H2
, H2
, DO_XOR
)
4454 GEN_VEXT_RED(vredxor_vs_w
, int32_t, int32_t, H4
, H4
, DO_XOR
)
4455 GEN_VEXT_RED(vredxor_vs_d
, int64_t, int64_t, H8
, H8
, DO_XOR
)
4457 /* Vector Widening Integer Reduction Instructions */
4458 /* signed sum reduction into double-width accumulator */
4459 GEN_VEXT_RED(vwredsum_vs_b
, int16_t, int8_t, H2
, H1
, DO_ADD
)
4460 GEN_VEXT_RED(vwredsum_vs_h
, int32_t, int16_t, H4
, H2
, DO_ADD
)
4461 GEN_VEXT_RED(vwredsum_vs_w
, int64_t, int32_t, H8
, H4
, DO_ADD
)
4463 /* Unsigned sum reduction into double-width accumulator */
4464 GEN_VEXT_RED(vwredsumu_vs_b
, uint16_t, uint8_t, H2
, H1
, DO_ADD
)
4465 GEN_VEXT_RED(vwredsumu_vs_h
, uint32_t, uint16_t, H4
, H2
, DO_ADD
)
4466 GEN_VEXT_RED(vwredsumu_vs_w
, uint64_t, uint32_t, H8
, H4
, DO_ADD
)
4468 /* Vector Single-Width Floating-Point Reduction Instructions */
4469 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4470 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4471 void *vs2, CPURISCVState *env, \
4474 uint32_t vm = vext_vm(desc); \
4475 uint32_t vl = env->vl; \
4477 TD s1 = *((TD *)vs1 + HD(0)); \
4479 for (i = env->vstart; i < vl; i++) { \
4480 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4481 if (!vm && !vext_elem_mask(v0, i)) { \
4484 s1 = OP(s1, (TD)s2, &env->fp_status); \
4486 *((TD *)vd + HD(0)) = s1; \
4491 GEN_VEXT_FRED(vfredsum_vs_h
, uint16_t, uint16_t, H2
, H2
, float16_add
)
4492 GEN_VEXT_FRED(vfredsum_vs_w
, uint32_t, uint32_t, H4
, H4
, float32_add
)
4493 GEN_VEXT_FRED(vfredsum_vs_d
, uint64_t, uint64_t, H8
, H8
, float64_add
)
4496 GEN_VEXT_FRED(vfredmax_vs_h
, uint16_t, uint16_t, H2
, H2
, float16_maximum_number
)
4497 GEN_VEXT_FRED(vfredmax_vs_w
, uint32_t, uint32_t, H4
, H4
, float32_maximum_number
)
4498 GEN_VEXT_FRED(vfredmax_vs_d
, uint64_t, uint64_t, H8
, H8
, float64_maximum_number
)
4501 GEN_VEXT_FRED(vfredmin_vs_h
, uint16_t, uint16_t, H2
, H2
, float16_minimum_number
)
4502 GEN_VEXT_FRED(vfredmin_vs_w
, uint32_t, uint32_t, H4
, H4
, float32_minimum_number
)
4503 GEN_VEXT_FRED(vfredmin_vs_d
, uint64_t, uint64_t, H8
, H8
, float64_minimum_number
)
4505 /* Vector Widening Floating-Point Reduction Instructions */
4506 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4507 void HELPER(vfwredsum_vs_h
)(void *vd
, void *v0
, void *vs1
,
4508 void *vs2
, CPURISCVState
*env
, uint32_t desc
)
4510 uint32_t vm
= vext_vm(desc
);
4511 uint32_t vl
= env
->vl
;
4513 uint32_t s1
= *((uint32_t *)vs1
+ H4(0));
4515 for (i
= env
->vstart
; i
< vl
; i
++) {
4516 uint16_t s2
= *((uint16_t *)vs2
+ H2(i
));
4517 if (!vm
&& !vext_elem_mask(v0
, i
)) {
4520 s1
= float32_add(s1
, float16_to_float32(s2
, true, &env
->fp_status
),
4523 *((uint32_t *)vd
+ H4(0)) = s1
;
4527 void HELPER(vfwredsum_vs_w
)(void *vd
, void *v0
, void *vs1
,
4528 void *vs2
, CPURISCVState
*env
, uint32_t desc
)
4530 uint32_t vm
= vext_vm(desc
);
4531 uint32_t vl
= env
->vl
;
4533 uint64_t s1
= *((uint64_t *)vs1
);
4535 for (i
= env
->vstart
; i
< vl
; i
++) {
4536 uint32_t s2
= *((uint32_t *)vs2
+ H4(i
));
4537 if (!vm
&& !vext_elem_mask(v0
, i
)) {
4540 s1
= float64_add(s1
, float32_to_float64(s2
, &env
->fp_status
),
4543 *((uint64_t *)vd
) = s1
;
4548 *** Vector Mask Operations
4550 /* Vector Mask-Register Logical Instructions */
4551 #define GEN_VEXT_MASK_VV(NAME, OP) \
4552 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4553 void *vs2, CPURISCVState *env, \
4556 uint32_t vl = env->vl; \
4560 for (i = env->vstart; i < vl; i++) { \
4561 a = vext_elem_mask(vs1, i); \
4562 b = vext_elem_mask(vs2, i); \
4563 vext_set_elem_mask(vd, i, OP(b, a)); \
4568 #define DO_NAND(N, M) (!(N & M))
4569 #define DO_ANDNOT(N, M) (N & !M)
4570 #define DO_NOR(N, M) (!(N | M))
4571 #define DO_ORNOT(N, M) (N | !M)
4572 #define DO_XNOR(N, M) (!(N ^ M))
4574 GEN_VEXT_MASK_VV(vmand_mm
, DO_AND
)
4575 GEN_VEXT_MASK_VV(vmnand_mm
, DO_NAND
)
4576 GEN_VEXT_MASK_VV(vmandn_mm
, DO_ANDNOT
)
4577 GEN_VEXT_MASK_VV(vmxor_mm
, DO_XOR
)
4578 GEN_VEXT_MASK_VV(vmor_mm
, DO_OR
)
4579 GEN_VEXT_MASK_VV(vmnor_mm
, DO_NOR
)
4580 GEN_VEXT_MASK_VV(vmorn_mm
, DO_ORNOT
)
4581 GEN_VEXT_MASK_VV(vmxnor_mm
, DO_XNOR
)
4583 /* Vector count population in mask vcpop */
4584 target_ulong
HELPER(vcpop_m
)(void *v0
, void *vs2
, CPURISCVState
*env
,
4587 target_ulong cnt
= 0;
4588 uint32_t vm
= vext_vm(desc
);
4589 uint32_t vl
= env
->vl
;
4592 for (i
= env
->vstart
; i
< vl
; i
++) {
4593 if (vm
|| vext_elem_mask(v0
, i
)) {
4594 if (vext_elem_mask(vs2
, i
)) {
4603 /* vfirst find-first-set mask bit*/
4604 target_ulong
HELPER(vfirst_m
)(void *v0
, void *vs2
, CPURISCVState
*env
,
4607 uint32_t vm
= vext_vm(desc
);
4608 uint32_t vl
= env
->vl
;
4611 for (i
= env
->vstart
; i
< vl
; i
++) {
4612 if (vm
|| vext_elem_mask(v0
, i
)) {
4613 if (vext_elem_mask(vs2
, i
)) {
4622 enum set_mask_type
{
4628 static void vmsetm(void *vd
, void *v0
, void *vs2
, CPURISCVState
*env
,
4629 uint32_t desc
, enum set_mask_type type
)
4631 uint32_t vm
= vext_vm(desc
);
4632 uint32_t vl
= env
->vl
;
4634 bool first_mask_bit
= false;
4636 for (i
= env
->vstart
; i
< vl
; i
++) {
4637 if (!vm
&& !vext_elem_mask(v0
, i
)) {
4640 /* write a zero to all following active elements */
4641 if (first_mask_bit
) {
4642 vext_set_elem_mask(vd
, i
, 0);
4645 if (vext_elem_mask(vs2
, i
)) {
4646 first_mask_bit
= true;
4647 if (type
== BEFORE_FIRST
) {
4648 vext_set_elem_mask(vd
, i
, 0);
4650 vext_set_elem_mask(vd
, i
, 1);
4653 if (type
== ONLY_FIRST
) {
4654 vext_set_elem_mask(vd
, i
, 0);
4656 vext_set_elem_mask(vd
, i
, 1);
4663 void HELPER(vmsbf_m
)(void *vd
, void *v0
, void *vs2
, CPURISCVState
*env
,
4666 vmsetm(vd
, v0
, vs2
, env
, desc
, BEFORE_FIRST
);
4669 void HELPER(vmsif_m
)(void *vd
, void *v0
, void *vs2
, CPURISCVState
*env
,
4672 vmsetm(vd
, v0
, vs2
, env
, desc
, INCLUDE_FIRST
);
4675 void HELPER(vmsof_m
)(void *vd
, void *v0
, void *vs2
, CPURISCVState
*env
,
4678 vmsetm(vd
, v0
, vs2
, env
, desc
, ONLY_FIRST
);
4681 /* Vector Iota Instruction */
4682 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4683 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4686 uint32_t vm = vext_vm(desc); \
4687 uint32_t vl = env->vl; \
4691 for (i = env->vstart; i < vl; i++) { \
4692 if (!vm && !vext_elem_mask(v0, i)) { \
4695 *((ETYPE *)vd + H(i)) = sum; \
4696 if (vext_elem_mask(vs2, i)) { \
4703 GEN_VEXT_VIOTA_M(viota_m_b
, uint8_t, H1
)
4704 GEN_VEXT_VIOTA_M(viota_m_h
, uint16_t, H2
)
4705 GEN_VEXT_VIOTA_M(viota_m_w
, uint32_t, H4
)
4706 GEN_VEXT_VIOTA_M(viota_m_d
, uint64_t, H8
)
4708 /* Vector Element Index Instruction */
4709 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4710 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4712 uint32_t vm = vext_vm(desc); \
4713 uint32_t vl = env->vl; \
4716 for (i = env->vstart; i < vl; i++) { \
4717 if (!vm && !vext_elem_mask(v0, i)) { \
4720 *((ETYPE *)vd + H(i)) = i; \
4725 GEN_VEXT_VID_V(vid_v_b
, uint8_t, H1
)
4726 GEN_VEXT_VID_V(vid_v_h
, uint16_t, H2
)
4727 GEN_VEXT_VID_V(vid_v_w
, uint32_t, H4
)
4728 GEN_VEXT_VID_V(vid_v_d
, uint64_t, H8
)
4731 *** Vector Permutation Instructions
4734 /* Vector Slide Instructions */
4735 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
4736 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4737 CPURISCVState *env, uint32_t desc) \
4739 uint32_t vm = vext_vm(desc); \
4740 uint32_t vl = env->vl; \
4741 target_ulong offset = s1, i_min, i; \
4743 i_min = MAX(env->vstart, offset); \
4744 for (i = i_min; i < vl; i++) { \
4745 if (!vm && !vext_elem_mask(v0, i)) { \
4748 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4752 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4753 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b
, uint8_t, H1
)
4754 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h
, uint16_t, H2
)
4755 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w
, uint32_t, H4
)
4756 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d
, uint64_t, H8
)
4758 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
4759 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4760 CPURISCVState *env, uint32_t desc) \
4762 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4763 uint32_t vm = vext_vm(desc); \
4764 uint32_t vl = env->vl; \
4765 target_ulong i_max, i; \
4767 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4768 for (i = env->vstart; i < i_max; ++i) { \
4769 if (vm || vext_elem_mask(v0, i)) { \
4770 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4774 for (i = i_max; i < vl; ++i) { \
4775 if (vm || vext_elem_mask(v0, i)) { \
4776 *((ETYPE *)vd + H(i)) = 0; \
4783 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4784 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b
, uint8_t, H1
)
4785 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h
, uint16_t, H2
)
4786 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w
, uint32_t, H4
)
4787 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d
, uint64_t, H8
)
4789 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
4790 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4791 void *vs2, CPURISCVState *env, uint32_t desc) \
4793 typedef uint##BITWIDTH##_t ETYPE; \
4794 uint32_t vm = vext_vm(desc); \
4795 uint32_t vl = env->vl; \
4798 for (i = env->vstart; i < vl; i++) { \
4799 if (!vm && !vext_elem_mask(v0, i)) { \
4803 *((ETYPE *)vd + H(i)) = s1; \
4805 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4811 GEN_VEXT_VSLIE1UP(8, H1
)
4812 GEN_VEXT_VSLIE1UP(16, H2
)
4813 GEN_VEXT_VSLIE1UP(32, H4
)
4814 GEN_VEXT_VSLIE1UP(64, H8
)
4816 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
4817 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4818 CPURISCVState *env, uint32_t desc) \
4820 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4823 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4824 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b
, 8)
4825 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h
, 16)
4826 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w
, 32)
4827 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d
, 64)
4829 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
4830 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4831 void *vs2, CPURISCVState *env, uint32_t desc) \
4833 typedef uint##BITWIDTH##_t ETYPE; \
4834 uint32_t vm = vext_vm(desc); \
4835 uint32_t vl = env->vl; \
4838 for (i = env->vstart; i < vl; i++) { \
4839 if (!vm && !vext_elem_mask(v0, i)) { \
4842 if (i == vl - 1) { \
4843 *((ETYPE *)vd + H(i)) = s1; \
4845 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4851 GEN_VEXT_VSLIDE1DOWN(8, H1
)
4852 GEN_VEXT_VSLIDE1DOWN(16, H2
)
4853 GEN_VEXT_VSLIDE1DOWN(32, H4
)
4854 GEN_VEXT_VSLIDE1DOWN(64, H8
)
4856 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
4857 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4858 CPURISCVState *env, uint32_t desc) \
4860 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4863 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4864 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b
, 8)
4865 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h
, 16)
4866 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w
, 32)
4867 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d
, 64)
4869 /* Vector Floating-Point Slide Instructions */
4870 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
4871 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4872 CPURISCVState *env, uint32_t desc) \
4874 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4877 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4878 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h
, 16)
4879 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w
, 32)
4880 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d
, 64)
4882 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
4883 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4884 CPURISCVState *env, uint32_t desc) \
4886 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4889 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4890 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h
, 16)
4891 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w
, 32)
4892 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d
, 64)
4894 /* Vector Register Gather Instruction */
4895 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
4896 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4897 CPURISCVState *env, uint32_t desc) \
4899 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
4900 uint32_t vm = vext_vm(desc); \
4901 uint32_t vl = env->vl; \
4905 for (i = env->vstart; i < vl; i++) { \
4906 if (!vm && !vext_elem_mask(v0, i)) { \
4909 index = *((TS1 *)vs1 + HS1(i)); \
4910 if (index >= vlmax) { \
4911 *((TS2 *)vd + HS2(i)) = 0; \
4913 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
4919 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4920 GEN_VEXT_VRGATHER_VV(vrgather_vv_b
, uint8_t, uint8_t, H1
, H1
)
4921 GEN_VEXT_VRGATHER_VV(vrgather_vv_h
, uint16_t, uint16_t, H2
, H2
)
4922 GEN_VEXT_VRGATHER_VV(vrgather_vv_w
, uint32_t, uint32_t, H4
, H4
)
4923 GEN_VEXT_VRGATHER_VV(vrgather_vv_d
, uint64_t, uint64_t, H8
, H8
)
4925 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b
, uint16_t, uint8_t, H2
, H1
)
4926 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h
, uint16_t, uint16_t, H2
, H2
)
4927 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w
, uint16_t, uint32_t, H2
, H4
)
4928 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d
, uint16_t, uint64_t, H2
, H8
)
4930 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
4931 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4932 CPURISCVState *env, uint32_t desc) \
4934 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4935 uint32_t vm = vext_vm(desc); \
4936 uint32_t vl = env->vl; \
4937 uint64_t index = s1; \
4940 for (i = env->vstart; i < vl; i++) { \
4941 if (!vm && !vext_elem_mask(v0, i)) { \
4944 if (index >= vlmax) { \
4945 *((ETYPE *)vd + H(i)) = 0; \
4947 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
4953 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4954 GEN_VEXT_VRGATHER_VX(vrgather_vx_b
, uint8_t, H1
)
4955 GEN_VEXT_VRGATHER_VX(vrgather_vx_h
, uint16_t, H2
)
4956 GEN_VEXT_VRGATHER_VX(vrgather_vx_w
, uint32_t, H4
)
4957 GEN_VEXT_VRGATHER_VX(vrgather_vx_d
, uint64_t, H8
)
4959 /* Vector Compress Instruction */
4960 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
4961 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4962 CPURISCVState *env, uint32_t desc) \
4964 uint32_t vl = env->vl; \
4965 uint32_t num = 0, i; \
4967 for (i = env->vstart; i < vl; i++) { \
4968 if (!vext_elem_mask(vs1, i)) { \
4971 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
4977 /* Compress into vd elements of vs2 where vs1 is enabled */
4978 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b
, uint8_t, H1
)
4979 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h
, uint16_t, H2
)
4980 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w
, uint32_t, H4
)
4981 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d
, uint64_t, H8
)
4983 /* Vector Whole Register Move */
4984 void HELPER(vmvr_v
)(void *vd
, void *vs2
, CPURISCVState
*env
, uint32_t desc
)
4987 uint32_t maxsz
= simd_maxsz(desc
);
4988 uint32_t sewb
= 1 << FIELD_EX64(env
->vtype
, VTYPE
, VSEW
);
4989 uint32_t startb
= env
->vstart
* sewb
;
4990 uint32_t i
= startb
;
4992 memcpy((uint8_t *)vd
+ H1(i
),
4993 (uint8_t *)vs2
+ H1(i
),
4999 /* Vector Integer Extension */
5000 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5001 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5002 CPURISCVState *env, uint32_t desc) \
5004 uint32_t vl = env->vl; \
5005 uint32_t vm = vext_vm(desc); \
5008 for (i = env->vstart; i < vl; i++) { \
5009 if (!vm && !vext_elem_mask(v0, i)) { \
5012 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5017 GEN_VEXT_INT_EXT(vzext_vf2_h
, uint16_t, uint8_t, H2
, H1
)
5018 GEN_VEXT_INT_EXT(vzext_vf2_w
, uint32_t, uint16_t, H4
, H2
)
5019 GEN_VEXT_INT_EXT(vzext_vf2_d
, uint64_t, uint32_t, H8
, H4
)
5020 GEN_VEXT_INT_EXT(vzext_vf4_w
, uint32_t, uint8_t, H4
, H1
)
5021 GEN_VEXT_INT_EXT(vzext_vf4_d
, uint64_t, uint16_t, H8
, H2
)
5022 GEN_VEXT_INT_EXT(vzext_vf8_d
, uint64_t, uint8_t, H8
, H1
)
5024 GEN_VEXT_INT_EXT(vsext_vf2_h
, int16_t, int8_t, H2
, H1
)
5025 GEN_VEXT_INT_EXT(vsext_vf2_w
, int32_t, int16_t, H4
, H2
)
5026 GEN_VEXT_INT_EXT(vsext_vf2_d
, int64_t, int32_t, H8
, H4
)
5027 GEN_VEXT_INT_EXT(vsext_vf4_w
, int32_t, int8_t, H4
, H1
)
5028 GEN_VEXT_INT_EXT(vsext_vf4_d
, int64_t, int16_t, H8
, H2
)
5029 GEN_VEXT_INT_EXT(vsext_vf8_d
, int64_t, int8_t, H8
, H1
)