X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=target%2Friscv%2Fvector_helper.c;h=7505f9470a64cda009199347b1b64c770f2aa149;hb=7b945bdc0b10eda16dbeafe6a4dc1480176db38e;hp=b94f809eb38f2785cf231cb78a21abebf90cfd5a;hpb=fe65642bbae13bc8ea2e2498ea7bd2977ed443e5;p=mirror_qemu.git diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index b94f809eb3..7505f9470a 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -50,10 +50,7 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } } - if ((sew > cpu->cfg.elen) - || vill - || (ediv != 0) - || (reserved != 0)) { + if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { /* only set vill bit. */ env->vill = 1; env->vtype = 0; @@ -172,7 +169,7 @@ static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) { - return (addr & env->cur_pmmask) | env->cur_pmbase; + return (addr & ~env->cur_pmmask) | env->cur_pmbase; } /* @@ -211,7 +208,7 @@ static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, return; } if (tot - cnt == 0) { - return ; + return; } memset(base + cnt, -1, tot - cnt); } @@ -267,8 +264,35 @@ GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) +static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl, + void *vd, uint32_t desc, uint32_t nf, + uint32_t esz, uint32_t max_elems) +{ + uint32_t total_elems, vlenb, registers_used; + uint32_t vta = vext_vta(desc); + int k; + + if (vta == 0) { + return; + } + + total_elems = vext_get_total_elems(env, desc, esz); + vlenb = riscv_cpu_cfg(env)->vlen >> 3; + + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, + (k * max_elems + max_elems) * esz); + } + + if (nf * max_elems % total_elems != 0) { + registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } +} + /* - *** stride: access vector element from strided memory + * stride: access vector element from strided memory */ static void vext_ldst_stride(void *vd, void *v0, target_ulong base, @@ -281,8 +305,6 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, uint32_t nf = vext_nf(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); uint32_t esz = 1 << log2_esz; - uint32_t total_elems = vext_get_total_elems(env, desc, esz); - uint32_t vta = vext_vta(desc); uint32_t vma = vext_vma(desc); for (i = env->vstart; i < env->vl; i++, env->vstart++) { @@ -301,18 +323,8 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, } } env->vstart = 0; - /* set tail elements to 1s */ - for (k = 0; k < nf; ++k) { - vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, - (k * max_elems + max_elems) * esz); - } - if (nf * max_elems % total_elems != 0) { - uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; - uint32_t registers_used = - ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; - vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, - registers_used * vlenb); - } + + vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); } #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ @@ -346,10 +358,10 @@ GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) /* - *** unit-stride: access elements stored contiguously in memory + * unit-stride: access elements stored contiguously in memory */ -/* unmasked unit-stride load and store operation*/ +/* unmasked unit-stride load and store operation */ static void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, @@ -359,8 +371,6 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, uint32_t nf = vext_nf(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); uint32_t esz = 1 << log2_esz; - uint32_t total_elems = vext_get_total_elems(env, desc, esz); - uint32_t vta = vext_vta(desc); /* load bytes from guest memory */ for (i = env->vstart; i < evl; i++, env->vstart++) { @@ -372,23 +382,13 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, } } env->vstart = 0; - /* set tail elements to 1s */ - for (k = 0; k < nf; ++k) { - vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, - (k * max_elems + max_elems) * esz); - } - if (nf * max_elems % total_elems != 0) { - uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; - uint32_t registers_used = - ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; - vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, - registers_used * vlenb); - } + + vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems); } /* - * masked unit-stride load and store operation will be a special case of stride, - * stride = NF * sizeof (MTYPE) + * masked unit-stride load and store operation will be a special case of + * stride, stride = NF * sizeof (MTYPE) */ #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ @@ -434,7 +434,7 @@ GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) /* - *** unit stride mask load and store, EEW = 1 + * unit stride mask load and store, EEW = 1 */ void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, CPURISCVState *env, uint32_t desc) @@ -455,7 +455,7 @@ void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, } /* - *** index: access vector element from indexed memory + * index: access vector element from indexed memory */ typedef target_ulong vext_get_index_addr(target_ulong base, uint32_t idx, void *vs2); @@ -484,8 +484,6 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t vm = vext_vm(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); uint32_t esz = 1 << log2_esz; - uint32_t total_elems = vext_get_total_elems(env, desc, esz); - uint32_t vta = vext_vta(desc); uint32_t vma = vext_vma(desc); /* load bytes from guest memory */ @@ -505,18 +503,8 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, } } env->vstart = 0; - /* set tail elements to 1s */ - for (k = 0; k < nf; ++k) { - vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, - (k * max_elems + max_elems) * esz); - } - if (nf * max_elems % total_elems != 0) { - uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; - uint32_t registers_used = - ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; - vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, - registers_used * vlenb); - } + + vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); } #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ @@ -571,7 +559,7 @@ GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) /* - *** unit-stride fault-only-fisrt load instructions + * unit-stride fault-only-fisrt load instructions */ static inline void vext_ldff(void *vd, void *v0, target_ulong base, @@ -585,12 +573,10 @@ vext_ldff(void *vd, void *v0, target_ulong base, uint32_t vm = vext_vm(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); uint32_t esz = 1 << log2_esz; - uint32_t total_elems = vext_get_total_elems(env, desc, esz); - uint32_t vta = vext_vta(desc); uint32_t vma = vext_vma(desc); target_ulong addr, offset, remain; - /* probe every access*/ + /* probe every access */ for (i = env->vstart; i < env->vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { continue; @@ -647,18 +633,8 @@ ProbeSuccess: } } env->vstart = 0; - /* set tail elements to 1s */ - for (k = 0; k < nf; ++k) { - vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, - (k * max_elems + max_elems) * esz); - } - if (nf * max_elems % total_elems != 0) { - uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; - uint32_t registers_used = - ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; - vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, - registers_used * vlenb); - } + + vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); } #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ @@ -689,7 +665,7 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) /* - *** load and store whole register instructions + * load and store whole register instructions */ static void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, @@ -697,7 +673,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, { uint32_t i, k, off, pos; uint32_t nf = vext_nf(desc); - uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; uint32_t max_elems = vlenb >> log2_esz; k = env->vstart / max_elems; @@ -707,7 +683,8 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, /* load/store rest of elements of current segment pointed by vstart */ for (pos = off; pos < max_elems; pos++, env->vstart++) { target_ulong addr = base + ((pos + k * max_elems) << log2_esz); - ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); + ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, + ra); } k++; } @@ -762,7 +739,7 @@ GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) /* - *** Vector Integer Arithmetic Instructions + * Vector Integer Arithmetic Instructions */ /* expand macro args before macro */ @@ -1142,7 +1119,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ \ *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ } \ - env->vstart = 0; \ + env->vstart = 0; \ /* set tail elements to 1s */ \ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } @@ -1167,7 +1144,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vl = env->vl; \ uint32_t vm = vext_vm(desc); \ - uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ @@ -1178,8 +1155,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -1203,7 +1182,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ { \ uint32_t vl = env->vl; \ uint32_t vm = vext_vm(desc); \ - uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ @@ -1214,8 +1193,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -1331,10 +1312,13 @@ GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) -/* generate the helpers for shift instructions with one vector and one scalar */ +/* + * generate the helpers for shift instructions with one vector and one scalar + */ #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ @@ -1402,7 +1386,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ - uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ @@ -1420,8 +1404,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -1465,7 +1451,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ - uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ @@ -1483,8 +1469,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ DO_OP(s2, (ETYPE)(target_long)s1)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -1761,9 +1749,9 @@ GEN_VEXT_VX(vmulhsu_vx_d, 8) /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) -#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ +#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) -#define DO_REM(N, M) (unlikely(M == 0) ? N :\ +#define DO_REM(N, M) (unlikely(M == 0) ? N : \ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) @@ -1872,7 +1860,7 @@ GEN_VEXT_VX(vwmulsu_vx_h, 4) GEN_VEXT_VX(vwmulsu_vx_w, 8) /* Vector Single-Width Integer Multiply-Add Instructions */ -#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ { \ TX1 s1 = *((T1 *)vs1 + HS1(i)); \ @@ -2103,7 +2091,7 @@ GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) /* - *** Vector Fixed-Point Arithmetic Instructions + * Vector Fixed-Point Arithmetic Instructions */ /* Vector Single-Width Saturating Add and Subtract */ @@ -2185,7 +2173,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ do_##NAME, ESZ); \ } -static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, + uint8_t b) { uint8_t res = a + b; if (res < a) { @@ -2303,7 +2292,8 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, /* generate helpers for fixed point instructions with OPIVX format */ #define GEN_VEXT_VX_RM(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ do_##NAME, ESZ); \ @@ -2328,7 +2318,8 @@ static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) return res; } -static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, + int16_t b) { int16_t res = a + b; if ((res ^ a) & (res ^ b) & INT16_MIN) { @@ -2338,7 +2329,8 @@ static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) return res; } -static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, + int32_t b) { int32_t res = a + b; if ((res ^ a) & (res ^ b) & INT32_MIN) { @@ -2348,7 +2340,8 @@ static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) return res; } -static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, + int64_t b) { int64_t res = a + b; if ((res ^ a) & (res ^ b) & INT64_MIN) { @@ -2376,7 +2369,8 @@ GEN_VEXT_VX_RM(vsadd_vx_h, 2) GEN_VEXT_VX_RM(vsadd_vx_w, 4) GEN_VEXT_VX_RM(vsadd_vx_d, 8) -static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, + uint8_t b) { uint8_t res = a - b; if (res > a) { @@ -2447,7 +2441,8 @@ static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) return res; } -static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, + int16_t b) { int16_t res = a - b; if ((res ^ a) & (a ^ b) & INT16_MIN) { @@ -2457,7 +2452,8 @@ static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) return res; } -static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, + int32_t b) { int32_t res = a - b; if ((res ^ a) & (a ^ b) & INT32_MIN) { @@ -2467,7 +2463,8 @@ static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) return res; } -static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, + int64_t b) { int64_t res = a - b; if ((res ^ a) & (a ^ b) & INT64_MIN) { @@ -2523,7 +2520,8 @@ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) return 0; /* round-down (truncate) */ } -static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, + int32_t b) { int64_t res = (int64_t)a + b; uint8_t round = get_round(vxrm, res, 1); @@ -2531,7 +2529,8 @@ static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) return (res >> 1) + round; } -static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, + int64_t b) { int64_t res = a + b; uint8_t round = get_round(vxrm, res, 1); @@ -2596,7 +2595,8 @@ GEN_VEXT_VX_RM(vaaddu_vx_h, 2) GEN_VEXT_VX_RM(vaaddu_vx_w, 4) GEN_VEXT_VX_RM(vaaddu_vx_d, 8) -static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, + int32_t b) { int64_t res = (int64_t)a - b; uint8_t round = get_round(vxrm, res, 1); @@ -2604,7 +2604,8 @@ static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) return (res >> 1) + round; } -static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, + int64_t b) { int64_t res = (int64_t)a - b; uint8_t round = get_round(vxrm, res, 1); @@ -2677,7 +2678,7 @@ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) res = (int16_t)a * (int16_t)b; round = get_round(vxrm, res, 7); - res = (res >> 7) + round; + res = (res >> 7) + round; if (res > INT8_MAX) { env->vxsat = 0x1; @@ -2697,7 +2698,7 @@ static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) res = (int32_t)a * (int32_t)b; round = get_round(vxrm, res, 15); - res = (res >> 15) + round; + res = (res >> 15) + round; if (res > INT16_MAX) { env->vxsat = 0x1; @@ -2717,7 +2718,7 @@ static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) res = (int64_t)a * (int64_t)b; round = get_round(vxrm, res, 31); - res = (res >> 31) + round; + res = (res >> 31) + round; if (res > INT32_MAX) { env->vxsat = 0x1; @@ -2784,38 +2785,32 @@ vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) uint8_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; return res; } static inline uint16_t vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) { uint8_t round, shift = b & 0xf; - uint16_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; - return res; + return (a >> shift) + round; } static inline uint32_t vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) { uint8_t round, shift = b & 0x1f; - uint32_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; - return res; + return (a >> shift) + round; } static inline uint64_t vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) { uint8_t round, shift = b & 0x3f; - uint64_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; - return res; + return (a >> shift) + round; } RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) @@ -2839,41 +2834,33 @@ static inline int8_t vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { uint8_t round, shift = b & 0x7; - int8_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; - return res; + return (a >> shift) + round; } static inline int16_t vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) { uint8_t round, shift = b & 0xf; - int16_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; - return res; + return (a >> shift) + round; } static inline int32_t vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) { uint8_t round, shift = b & 0x1f; - int32_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; - return res; + return (a >> shift) + round; } static inline int64_t vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) { uint8_t round, shift = b & 0x3f; - int64_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; - return res; + return (a >> shift) + round; } RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) @@ -2902,7 +2889,7 @@ vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) int16_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > INT8_MAX) { env->vxsat = 0x1; return INT8_MAX; @@ -2921,7 +2908,7 @@ vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) int32_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > INT16_MAX) { env->vxsat = 0x1; return INT16_MAX; @@ -2940,7 +2927,7 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) int64_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > INT32_MAX) { env->vxsat = 0x1; return INT32_MAX; @@ -2973,7 +2960,7 @@ vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) uint16_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > UINT8_MAX) { env->vxsat = 0x1; return UINT8_MAX; @@ -2989,7 +2976,7 @@ vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) uint32_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > UINT16_MAX) { env->vxsat = 0x1; return UINT16_MAX; @@ -3005,7 +2992,7 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) uint64_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > UINT32_MAX) { env->vxsat = 0x1; return UINT32_MAX; @@ -3029,7 +3016,7 @@ GEN_VEXT_VX_RM(vnclipu_wx_h, 2) GEN_VEXT_VX_RM(vnclipu_wx_w, 4) /* - *** Vector Float Point Arithmetic Instructions + * Vector Float Point Arithmetic Instructions */ /* Vector Single-Width Floating-Point Add/Subtract Instructions */ #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -3092,7 +3079,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t total_elems = \ - vext_get_total_elems(env, desc, ESZ); \ + vext_get_total_elems(env, desc, ESZ); \ uint32_t vta = vext_vta(desc); \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ @@ -3158,13 +3145,13 @@ GEN_VEXT_VF(vfrsub_vf_d, 8) static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) { return float32_add(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), s); + float16_to_float32(b, true, s), s); } static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) { return float64_add(float32_to_float64(a, s), - float32_to_float64(b, s), s); + float32_to_float64(b, s), s); } @@ -3180,13 +3167,13 @@ GEN_VEXT_VF(vfwadd_vf_w, 8) static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { return float32_sub(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), s); + float16_to_float32(b, true, s), s); } static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) { return float64_sub(float32_to_float64(a, s), - float32_to_float64(b, s), s); + float32_to_float64(b, s), s); } @@ -3290,13 +3277,13 @@ GEN_VEXT_VF(vfrdiv_vf_d, 8) static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) { return float32_mul(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), s); + float16_to_float32(b, true, s), s); } static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) { return float64_mul(float32_to_float64(a, s), - float32_to_float64(b, s), s); + float32_to_float64(b, s), s); } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) @@ -3311,7 +3298,7 @@ GEN_VEXT_VF(vfwmul_vf_w, 8) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ - CPURISCVState *env) \ + CPURISCVState *env) \ { \ TX1 s1 = *((T1 *)vs1 + HS1(i)); \ TX2 s2 = *((T2 *)vs2 + HS2(i)); \ @@ -3343,7 +3330,7 @@ GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ - CPURISCVState *env) \ + CPURISCVState *env) \ { \ TX2 s2 = *((T2 *)vs2 + HS2(i)); \ TD d = *((TD *)vd + HD(i)); \ @@ -3359,20 +3346,20 @@ GEN_VEXT_VF(vfmacc_vf_d, 8) static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { - return float16_muladd(a, b, d, - float_muladd_negate_c | float_muladd_negate_product, s); + return float16_muladd(a, b, d, float_muladd_negate_c | + float_muladd_negate_product, s); } static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) { - return float32_muladd(a, b, d, - float_muladd_negate_c | float_muladd_negate_product, s); + return float32_muladd(a, b, d, float_muladd_negate_c | + float_muladd_negate_product, s); } static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) { - return float64_muladd(a, b, d, - float_muladd_negate_c | float_muladd_negate_product, s); + return float64_muladd(a, b, d, float_muladd_negate_c | + float_muladd_negate_product, s); } RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) @@ -3474,20 +3461,20 @@ GEN_VEXT_VF(vfmadd_vf_d, 8) static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { - return float16_muladd(d, b, a, - float_muladd_negate_c | float_muladd_negate_product, s); + return float16_muladd(d, b, a, float_muladd_negate_c | + float_muladd_negate_product, s); } static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) { - return float32_muladd(d, b, a, - float_muladd_negate_c | float_muladd_negate_product, s); + return float32_muladd(d, b, a, float_muladd_negate_c | + float_muladd_negate_product, s); } static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) { - return float64_muladd(d, b, a, - float_muladd_negate_c | float_muladd_negate_product, s); + return float64_muladd(d, b, a, float_muladd_negate_c | + float_muladd_negate_product, s); } RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) @@ -3563,13 +3550,13 @@ GEN_VEXT_VF(vfnmsub_vf_d, 8) static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), d, 0, s); + float16_to_float32(b, true, s), d, 0, s); } static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), - float32_to_float64(b, s), d, 0, s); + float32_to_float64(b, s), d, 0, s); } RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) @@ -3584,15 +3571,16 @@ GEN_VEXT_VF(vfwmacc_vf_w, 8) static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), d, - float_muladd_negate_c | float_muladd_negate_product, s); + float16_to_float32(b, true, s), d, + float_muladd_negate_c | float_muladd_negate_product, + s); } static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { - return float64_muladd(float32_to_float64(a, s), - float32_to_float64(b, s), d, - float_muladd_negate_c | float_muladd_negate_product, s); + return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), + d, float_muladd_negate_c | + float_muladd_negate_product, s); } RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) @@ -3607,15 +3595,15 @@ GEN_VEXT_VF(vfwnmacc_vf_w, 8) static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), d, - float_muladd_negate_c, s); + float16_to_float32(b, true, s), d, + float_muladd_negate_c, s); } static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), - float32_to_float64(b, s), d, - float_muladd_negate_c, s); + float32_to_float64(b, s), d, + float_muladd_negate_c, s); } RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) @@ -3630,15 +3618,15 @@ GEN_VEXT_VF(vfwmsac_vf_w, 8) static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), d, - float_muladd_negate_product, s); + float16_to_float32(b, true, s), d, + float_muladd_negate_product, s); } static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), - float32_to_float64(b, s), d, - float_muladd_negate_product, s); + float32_to_float64(b, s), d, + float_muladd_negate_product, s); } RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) @@ -3656,9 +3644,9 @@ GEN_VEXT_VF(vfwnmsac_vf_w, 8) #define OP_UU_W uint32_t, uint32_t, uint32_t #define OP_UU_D uint64_t, uint64_t, uint64_t -#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, void *vs2, int i, \ - CPURISCVState *env) \ + CPURISCVState *env) \ { \ TX2 s2 = *((T2 *)vs2 + HS2(i)); \ *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ @@ -3666,7 +3654,7 @@ static void do_##NAME(void *vd, void *vs2, int i, \ #define GEN_VEXT_V_ENV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ + CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ @@ -3743,9 +3731,9 @@ static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) } int idx = ((exp & 1) << (precision - 1)) | - (frac >> (frac_size - precision + 1)); + (frac >> (frac_size - precision + 1)); uint64_t out_frac = (uint64_t)(lookup_table[idx]) << - (frac_size - precision); + (frac_size - precision); uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; uint64_t val = 0; @@ -3767,9 +3755,9 @@ static float16 frsqrt7_h(float16 f, float_status *s) * frsqrt7(-subnormal) = canonical NaN */ if (float16_is_signaling_nan(f, s) || - (float16_is_infinity(f) && sign) || - (float16_is_normal(f) && sign) || - (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { + (float16_is_infinity(f) && sign) || + (float16_is_normal(f) && sign) || + (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { s->float_exception_flags |= float_flag_invalid; return float16_default_nan(s); } @@ -3807,9 +3795,9 @@ static float32 frsqrt7_s(float32 f, float_status *s) * frsqrt7(-subnormal) = canonical NaN */ if (float32_is_signaling_nan(f, s) || - (float32_is_infinity(f) && sign) || - (float32_is_normal(f) && sign) || - (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { + (float32_is_infinity(f) && sign) || + (float32_is_normal(f) && sign) || + (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { s->float_exception_flags |= float_flag_invalid; return float32_default_nan(s); } @@ -3847,9 +3835,9 @@ static float64 frsqrt7_d(float64 f, float_status *s) * frsqrt7(-subnormal) = canonical NaN */ if (float64_is_signaling_nan(f, s) || - (float64_is_infinity(f) && sign) || - (float64_is_normal(f) && sign) || - (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { + (float64_is_infinity(f) && sign) || + (float64_is_normal(f) && sign) || + (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { s->float_exception_flags |= float_flag_invalid; return float64_default_nan(s); } @@ -3937,18 +3925,18 @@ static uint64_t frec7(uint64_t f, int exp_size, int frac_size, ((s->float_rounding_mode == float_round_up) && sign)) { /* Return greatest/negative finite value. */ return (sign << (exp_size + frac_size)) | - (MAKE_64BIT_MASK(frac_size, exp_size) - 1); + (MAKE_64BIT_MASK(frac_size, exp_size) - 1); } else { /* Return +-inf. */ return (sign << (exp_size + frac_size)) | - MAKE_64BIT_MASK(frac_size, exp_size); + MAKE_64BIT_MASK(frac_size, exp_size); } } } int idx = frac >> (frac_size - precision); uint64_t out_frac = (uint64_t)(lookup_table[idx]) << - (frac_size - precision); + (frac_size - precision); uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; if (out_exp == 0 || out_exp == UINT64_MAX) { @@ -4192,7 +4180,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ - uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ @@ -4211,8 +4199,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ DO_OP(s2, s1, &env->fp_status)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -4230,7 +4220,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ - uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ @@ -4248,8 +4238,10 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -4462,8 +4454,8 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ - *((ETYPE *)vd + H(i)) \ - = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ + *((ETYPE *)vd + H(i)) = \ + (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ } \ env->vstart = 0; \ /* set tail elements to 1s */ \ @@ -4512,7 +4504,9 @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) #define WOP_UU_B uint16_t, uint8_t, uint8_t #define WOP_UU_H uint32_t, uint16_t, uint16_t #define WOP_UU_W uint64_t, uint32_t, uint32_t -/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ +/* + * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. + */ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) @@ -4524,7 +4518,9 @@ RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) -/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ +/* + * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. + */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) @@ -4541,8 +4537,7 @@ GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) /* - * vfwcvt.f.f.v vd, vs2, vm - * Convert single-width float to double-width float. + * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. */ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) { @@ -4575,7 +4570,9 @@ GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) -/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ +/* + * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. + */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) @@ -4599,12 +4596,13 @@ GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) /* - *** Vector Reduction Operations + * Vector Reduction Operations */ /* Vector Single-Width Integer Reduction Instructions */ #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ @@ -4724,14 +4722,20 @@ GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) /* Maximum value */ -GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) -GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) -GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) +GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, + float16_maximum_number) +GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, + float32_maximum_number) +GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, + float64_maximum_number) /* Minimum value */ -GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) -GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) -GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) +GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, + float16_minimum_number) +GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, + float32_minimum_number) +GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, + float64_minimum_number) /* Vector Widening Floating-Point Add Instructions */ static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) @@ -4752,7 +4756,7 @@ GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) /* - *** Vector Mask Operations + * Vector Mask Operations */ /* Vector Mask-Register Logical Instructions */ #define GEN_VEXT_MASK_VV(NAME, OP) \ @@ -4761,7 +4765,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t desc) \ { \ uint32_t vl = env->vl; \ - uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ int a, b; \ @@ -4772,10 +4776,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ vext_set_elem_mask(vd, i, OP(b, a)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail- \ - * agnostic \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s */ \ - /* set tail elements to 1s */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -4818,7 +4822,7 @@ target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, return cnt; } -/* vfirst find-first-set mask bit*/ +/* vfirst find-first-set mask bit */ target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, uint32_t desc) { @@ -4848,7 +4852,7 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; - uint32_t total_elems = env_archcpu(env)->cfg.vlen; + uint32_t total_elems = riscv_cpu_cfg(env)->vlen; uint32_t vta_all_1s = vext_vta_all_1s(desc); uint32_t vma = vext_vma(desc); int i; @@ -4883,8 +4887,10 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, } } env->vstart = 0; - /* mask destination register are always tail-agnostic */ - /* set tail elements to 1s */ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ if (vta_all_1s) { for (; i < total_elems; i++) { vext_set_elem_mask(vd, i, 1); @@ -4976,7 +4982,7 @@ GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) /* - *** Vector Permutation Instructions + * Vector Permutation Instructions */ /* Vector Slide Instructions */ @@ -5052,8 +5058,9 @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ -static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ +static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ @@ -5100,8 +5107,9 @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ -static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ +static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \