]> git.proxmox.com Git - mirror_qemu.git/blob - target/riscv/vector_helper.c
Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging
[mirror_qemu.git] / target / riscv / vector_helper.c
1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33 {
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
44
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
57 /* only set vill bit. */
58 env->vill = 1;
59 env->vtype = 0;
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
74 env->vill = 0;
75 return vl;
76 }
77
78 /*
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
81 */
82 #if HOST_BIG_ENDIAN
83 #define H1(x) ((x) ^ 7)
84 #define H1_2(x) ((x) ^ 6)
85 #define H1_4(x) ((x) ^ 4)
86 #define H2(x) ((x) ^ 3)
87 #define H4(x) ((x) ^ 1)
88 #define H8(x) ((x))
89 #else
90 #define H1(x) (x)
91 #define H1_2(x) (x)
92 #define H1_4(x) (x)
93 #define H2(x) (x)
94 #define H4(x) (x)
95 #define H8(x) (x)
96 #endif
97
98 static inline uint32_t vext_nf(uint32_t desc)
99 {
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101 }
102
103 static inline uint32_t vext_vm(uint32_t desc)
104 {
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106 }
107
108 /*
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
119 */
120 static inline int32_t vext_lmul(uint32_t desc)
121 {
122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
123 }
124
125 /*
126 * Get the maximum number of elements can be operated.
127 *
128 * esz: log2 of element size in bytes.
129 */
130 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
131 {
132 /*
133 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
134 * so vlen in bytes (vlenb) is encoded as maxsz.
135 */
136 uint32_t vlenb = simd_maxsz(desc);
137
138 /* Return VLMAX */
139 int scale = vext_lmul(desc) - esz;
140 return scale < 0 ? vlenb >> -scale : vlenb << scale;
141 }
142
143 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
144 {
145 return (addr & env->cur_pmmask) | env->cur_pmbase;
146 }
147
148 /*
149 * This function checks watchpoint before real load operation.
150 *
151 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
152 * In user mode, there is no watchpoint support now.
153 *
154 * It will trigger an exception if there is no mapping in TLB
155 * and page table walk can't fill the TLB entry. Then the guest
156 * software can return here after process the exception or never return.
157 */
158 static void probe_pages(CPURISCVState *env, target_ulong addr,
159 target_ulong len, uintptr_t ra,
160 MMUAccessType access_type)
161 {
162 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
163 target_ulong curlen = MIN(pagelen, len);
164
165 probe_access(env, adjust_addr(env, addr), curlen, access_type,
166 cpu_mmu_index(env, false), ra);
167 if (len > curlen) {
168 addr += curlen;
169 curlen = len - curlen;
170 probe_access(env, adjust_addr(env, addr), curlen, access_type,
171 cpu_mmu_index(env, false), ra);
172 }
173 }
174
175 static inline void vext_set_elem_mask(void *v0, int index,
176 uint8_t value)
177 {
178 int idx = index / 64;
179 int pos = index % 64;
180 uint64_t old = ((uint64_t *)v0)[idx];
181 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
182 }
183
184 /*
185 * Earlier designs (pre-0.9) had a varying number of bits
186 * per mask value (MLEN). In the 0.9 design, MLEN=1.
187 * (Section 4.5)
188 */
189 static inline int vext_elem_mask(void *v0, int index)
190 {
191 int idx = index / 64;
192 int pos = index % 64;
193 return (((uint64_t *)v0)[idx] >> pos) & 1;
194 }
195
196 /* elements operations for load and store */
197 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
198 uint32_t idx, void *vd, uintptr_t retaddr);
199
200 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
201 static void NAME(CPURISCVState *env, abi_ptr addr, \
202 uint32_t idx, void *vd, uintptr_t retaddr)\
203 { \
204 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
205 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
206 } \
207
208 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
209 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
210 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
211 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
212
213 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
214 static void NAME(CPURISCVState *env, abi_ptr addr, \
215 uint32_t idx, void *vd, uintptr_t retaddr)\
216 { \
217 ETYPE data = *((ETYPE *)vd + H(idx)); \
218 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
219 }
220
221 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
222 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
223 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
224 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
225
226 /*
227 *** stride: access vector element from strided memory
228 */
229 static void
230 vext_ldst_stride(void *vd, void *v0, target_ulong base,
231 target_ulong stride, CPURISCVState *env,
232 uint32_t desc, uint32_t vm,
233 vext_ldst_elem_fn *ldst_elem,
234 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
235 {
236 uint32_t i, k;
237 uint32_t nf = vext_nf(desc);
238 uint32_t max_elems = vext_max_elems(desc, esz);
239
240 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
241 if (!vm && !vext_elem_mask(v0, i)) {
242 continue;
243 }
244
245 k = 0;
246 while (k < nf) {
247 target_ulong addr = base + stride * i + (k << esz);
248 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
249 k++;
250 }
251 }
252 env->vstart = 0;
253 }
254
255 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
256 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
257 target_ulong stride, CPURISCVState *env, \
258 uint32_t desc) \
259 { \
260 uint32_t vm = vext_vm(desc); \
261 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
262 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
263 }
264
265 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
266 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
267 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
268 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
269
270 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
271 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
272 target_ulong stride, CPURISCVState *env, \
273 uint32_t desc) \
274 { \
275 uint32_t vm = vext_vm(desc); \
276 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
277 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
278 }
279
280 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
281 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
282 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
283 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
284
285 /*
286 *** unit-stride: access elements stored contiguously in memory
287 */
288
289 /* unmasked unit-stride load and store operation*/
290 static void
291 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
292 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
293 uintptr_t ra, MMUAccessType access_type)
294 {
295 uint32_t i, k;
296 uint32_t nf = vext_nf(desc);
297 uint32_t max_elems = vext_max_elems(desc, esz);
298
299 /* load bytes from guest memory */
300 for (i = env->vstart; i < evl; i++, env->vstart++) {
301 k = 0;
302 while (k < nf) {
303 target_ulong addr = base + ((i * nf + k) << esz);
304 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
305 k++;
306 }
307 }
308 env->vstart = 0;
309 }
310
311 /*
312 * masked unit-stride load and store operation will be a special case of stride,
313 * stride = NF * sizeof (MTYPE)
314 */
315
316 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
317 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
318 CPURISCVState *env, uint32_t desc) \
319 { \
320 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
321 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
322 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
323 } \
324 \
325 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
326 CPURISCVState *env, uint32_t desc) \
327 { \
328 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
329 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
330 }
331
332 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
333 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
334 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
335 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
336
337 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
338 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
339 CPURISCVState *env, uint32_t desc) \
340 { \
341 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
342 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
343 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
344 } \
345 \
346 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
347 CPURISCVState *env, uint32_t desc) \
348 { \
349 vext_ldst_us(vd, base, env, desc, STORE_FN, \
350 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
351 }
352
353 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
354 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
355 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
356 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
357
358 /*
359 *** unit stride mask load and store, EEW = 1
360 */
361 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
362 CPURISCVState *env, uint32_t desc)
363 {
364 /* evl = ceil(vl/8) */
365 uint8_t evl = (env->vl + 7) >> 3;
366 vext_ldst_us(vd, base, env, desc, lde_b,
367 0, evl, GETPC(), MMU_DATA_LOAD);
368 }
369
370 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
371 CPURISCVState *env, uint32_t desc)
372 {
373 /* evl = ceil(vl/8) */
374 uint8_t evl = (env->vl + 7) >> 3;
375 vext_ldst_us(vd, base, env, desc, ste_b,
376 0, evl, GETPC(), MMU_DATA_STORE);
377 }
378
379 /*
380 *** index: access vector element from indexed memory
381 */
382 typedef target_ulong vext_get_index_addr(target_ulong base,
383 uint32_t idx, void *vs2);
384
385 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
386 static target_ulong NAME(target_ulong base, \
387 uint32_t idx, void *vs2) \
388 { \
389 return (base + *((ETYPE *)vs2 + H(idx))); \
390 }
391
392 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
393 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
394 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
395 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
396
397 static inline void
398 vext_ldst_index(void *vd, void *v0, target_ulong base,
399 void *vs2, CPURISCVState *env, uint32_t desc,
400 vext_get_index_addr get_index_addr,
401 vext_ldst_elem_fn *ldst_elem,
402 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
403 {
404 uint32_t i, k;
405 uint32_t nf = vext_nf(desc);
406 uint32_t vm = vext_vm(desc);
407 uint32_t max_elems = vext_max_elems(desc, esz);
408
409 /* load bytes from guest memory */
410 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
411 if (!vm && !vext_elem_mask(v0, i)) {
412 continue;
413 }
414
415 k = 0;
416 while (k < nf) {
417 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
418 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
419 k++;
420 }
421 }
422 env->vstart = 0;
423 }
424
425 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
426 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
427 void *vs2, CPURISCVState *env, uint32_t desc) \
428 { \
429 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
430 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
431 }
432
433 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
434 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
435 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
436 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
437 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
438 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
439 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
440 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
441 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
442 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
443 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
444 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
445 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
446 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
447 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
448 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
449
450 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
451 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
452 void *vs2, CPURISCVState *env, uint32_t desc) \
453 { \
454 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
455 STORE_FN, ctzl(sizeof(ETYPE)), \
456 GETPC(), MMU_DATA_STORE); \
457 }
458
459 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
460 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
461 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
462 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
463 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
464 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
465 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
466 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
467 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
468 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
469 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
470 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
471 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
472 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
473 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
474 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
475
476 /*
477 *** unit-stride fault-only-fisrt load instructions
478 */
479 static inline void
480 vext_ldff(void *vd, void *v0, target_ulong base,
481 CPURISCVState *env, uint32_t desc,
482 vext_ldst_elem_fn *ldst_elem,
483 uint32_t esz, uintptr_t ra)
484 {
485 void *host;
486 uint32_t i, k, vl = 0;
487 uint32_t nf = vext_nf(desc);
488 uint32_t vm = vext_vm(desc);
489 uint32_t max_elems = vext_max_elems(desc, esz);
490 target_ulong addr, offset, remain;
491
492 /* probe every access*/
493 for (i = env->vstart; i < env->vl; i++) {
494 if (!vm && !vext_elem_mask(v0, i)) {
495 continue;
496 }
497 addr = adjust_addr(env, base + i * (nf << esz));
498 if (i == 0) {
499 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
500 } else {
501 /* if it triggers an exception, no need to check watchpoint */
502 remain = nf << esz;
503 while (remain > 0) {
504 offset = -(addr | TARGET_PAGE_MASK);
505 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
506 cpu_mmu_index(env, false));
507 if (host) {
508 #ifdef CONFIG_USER_ONLY
509 if (page_check_range(addr, offset, PAGE_READ) < 0) {
510 vl = i;
511 goto ProbeSuccess;
512 }
513 #else
514 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
515 #endif
516 } else {
517 vl = i;
518 goto ProbeSuccess;
519 }
520 if (remain <= offset) {
521 break;
522 }
523 remain -= offset;
524 addr = adjust_addr(env, addr + offset);
525 }
526 }
527 }
528 ProbeSuccess:
529 /* load bytes from guest memory */
530 if (vl != 0) {
531 env->vl = vl;
532 }
533 for (i = env->vstart; i < env->vl; i++) {
534 k = 0;
535 if (!vm && !vext_elem_mask(v0, i)) {
536 continue;
537 }
538 while (k < nf) {
539 target_ulong addr = base + ((i * nf + k) << esz);
540 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
541 k++;
542 }
543 }
544 env->vstart = 0;
545 }
546
547 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
548 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
549 CPURISCVState *env, uint32_t desc) \
550 { \
551 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
552 ctzl(sizeof(ETYPE)), GETPC()); \
553 }
554
555 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
556 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
557 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
558 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
559
560 #define DO_SWAP(N, M) (M)
561 #define DO_AND(N, M) (N & M)
562 #define DO_XOR(N, M) (N ^ M)
563 #define DO_OR(N, M) (N | M)
564 #define DO_ADD(N, M) (N + M)
565
566 /* Signed min/max */
567 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
568 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
569
570 /* Unsigned min/max */
571 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
572 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
573
574 /*
575 *** load and store whole register instructions
576 */
577 static void
578 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
579 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
580 MMUAccessType access_type)
581 {
582 uint32_t i, k, off, pos;
583 uint32_t nf = vext_nf(desc);
584 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
585 uint32_t max_elems = vlenb >> esz;
586
587 k = env->vstart / max_elems;
588 off = env->vstart % max_elems;
589
590 if (off) {
591 /* load/store rest of elements of current segment pointed by vstart */
592 for (pos = off; pos < max_elems; pos++, env->vstart++) {
593 target_ulong addr = base + ((pos + k * max_elems) << esz);
594 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
595 }
596 k++;
597 }
598
599 /* load/store elements for rest of segments */
600 for (; k < nf; k++) {
601 for (i = 0; i < max_elems; i++, env->vstart++) {
602 target_ulong addr = base + ((i + k * max_elems) << esz);
603 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
604 }
605 }
606
607 env->vstart = 0;
608 }
609
610 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
611 void HELPER(NAME)(void *vd, target_ulong base, \
612 CPURISCVState *env, uint32_t desc) \
613 { \
614 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
615 ctzl(sizeof(ETYPE)), GETPC(), \
616 MMU_DATA_LOAD); \
617 }
618
619 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
620 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
621 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
622 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
623 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
624 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
625 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
626 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
627 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
628 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
629 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
630 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
631 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
632 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
633 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
634 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
635
636 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
637 void HELPER(NAME)(void *vd, target_ulong base, \
638 CPURISCVState *env, uint32_t desc) \
639 { \
640 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
641 ctzl(sizeof(ETYPE)), GETPC(), \
642 MMU_DATA_STORE); \
643 }
644
645 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
646 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
647 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
648 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
649
650 /*
651 *** Vector Integer Arithmetic Instructions
652 */
653
654 /* expand macro args before macro */
655 #define RVVCALL(macro, ...) macro(__VA_ARGS__)
656
657 /* (TD, T1, T2, TX1, TX2) */
658 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
659 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
660 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
661 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
662 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
663 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
664 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
665 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
666 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
667 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
668 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
669 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
670 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
671 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
672 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
673 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
674 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
675 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
676 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
677 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
678 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
679 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
680 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
681 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
682 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
683 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
684 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
685 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
686 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
687 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
688
689 /* operation of two vector elements */
690 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
691
692 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
693 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
694 { \
695 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
696 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
697 *((TD *)vd + HD(i)) = OP(s2, s1); \
698 }
699 #define DO_SUB(N, M) (N - M)
700 #define DO_RSUB(N, M) (M - N)
701
702 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
703 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
704 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
705 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
706 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
707 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
708 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
709 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
710
711 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
712 CPURISCVState *env, uint32_t desc,
713 uint32_t esz, uint32_t dsz,
714 opivv2_fn *fn)
715 {
716 uint32_t vm = vext_vm(desc);
717 uint32_t vl = env->vl;
718 uint32_t i;
719
720 for (i = env->vstart; i < vl; i++) {
721 if (!vm && !vext_elem_mask(v0, i)) {
722 continue;
723 }
724 fn(vd, vs1, vs2, i);
725 }
726 env->vstart = 0;
727 }
728
729 /* generate the helpers for OPIVV */
730 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \
731 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
732 void *vs2, CPURISCVState *env, \
733 uint32_t desc) \
734 { \
735 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
736 do_##NAME); \
737 }
738
739 GEN_VEXT_VV(vadd_vv_b, 1, 1)
740 GEN_VEXT_VV(vadd_vv_h, 2, 2)
741 GEN_VEXT_VV(vadd_vv_w, 4, 4)
742 GEN_VEXT_VV(vadd_vv_d, 8, 8)
743 GEN_VEXT_VV(vsub_vv_b, 1, 1)
744 GEN_VEXT_VV(vsub_vv_h, 2, 2)
745 GEN_VEXT_VV(vsub_vv_w, 4, 4)
746 GEN_VEXT_VV(vsub_vv_d, 8, 8)
747
748 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
749
750 /*
751 * (T1)s1 gives the real operator type.
752 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
753 */
754 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
755 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
756 { \
757 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
758 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
759 }
760
761 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
762 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
763 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
764 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
765 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
766 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
767 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
768 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
769 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
770 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
771 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
772 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
773
774 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
775 CPURISCVState *env, uint32_t desc,
776 uint32_t esz, uint32_t dsz,
777 opivx2_fn fn)
778 {
779 uint32_t vm = vext_vm(desc);
780 uint32_t vl = env->vl;
781 uint32_t i;
782
783 for (i = env->vstart; i < vl; i++) {
784 if (!vm && !vext_elem_mask(v0, i)) {
785 continue;
786 }
787 fn(vd, s1, vs2, i);
788 }
789 env->vstart = 0;
790 }
791
792 /* generate the helpers for OPIVX */
793 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \
794 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
795 void *vs2, CPURISCVState *env, \
796 uint32_t desc) \
797 { \
798 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
799 do_##NAME); \
800 }
801
802 GEN_VEXT_VX(vadd_vx_b, 1, 1)
803 GEN_VEXT_VX(vadd_vx_h, 2, 2)
804 GEN_VEXT_VX(vadd_vx_w, 4, 4)
805 GEN_VEXT_VX(vadd_vx_d, 8, 8)
806 GEN_VEXT_VX(vsub_vx_b, 1, 1)
807 GEN_VEXT_VX(vsub_vx_h, 2, 2)
808 GEN_VEXT_VX(vsub_vx_w, 4, 4)
809 GEN_VEXT_VX(vsub_vx_d, 8, 8)
810 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
811 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
812 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
813 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
814
815 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
816 {
817 intptr_t oprsz = simd_oprsz(desc);
818 intptr_t i;
819
820 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
821 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
822 }
823 }
824
825 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
826 {
827 intptr_t oprsz = simd_oprsz(desc);
828 intptr_t i;
829
830 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
831 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
832 }
833 }
834
835 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
836 {
837 intptr_t oprsz = simd_oprsz(desc);
838 intptr_t i;
839
840 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
841 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
842 }
843 }
844
845 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
846 {
847 intptr_t oprsz = simd_oprsz(desc);
848 intptr_t i;
849
850 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
851 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
852 }
853 }
854
855 /* Vector Widening Integer Add/Subtract */
856 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
857 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
858 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
859 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
860 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
861 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
862 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
863 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
864 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
865 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
866 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
867 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
868 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
869 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
870 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
871 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
872 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
873 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
874 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
875 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
876 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
877 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
878 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
879 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
880 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
881 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
882 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
883 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
884 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
885 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
886 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
887 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
888 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
889 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
890 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
891 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
892 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
893 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
894 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
895 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
896 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
897 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
898 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
899 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
900 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
901 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
902 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
903 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
904 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
905 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
906 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
907 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
908 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
909 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
910 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
911 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
912 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
913 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
914 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
915 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
916
917 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
918 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
919 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
920 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
921 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
922 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
923 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
924 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
925 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
926 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
927 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
928 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
929 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
930 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
931 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
932 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
933 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
934 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
935 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
936 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
937 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
938 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
939 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
940 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
941 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
942 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
943 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
944 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
945 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
946 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
947 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
948 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
949 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
950 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
951 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
952 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
953 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
954 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
955 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
956 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
957 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
958 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
959 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
960 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
961 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
962 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
963 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
964 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
965
966 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
967 #define DO_VADC(N, M, C) (N + M + C)
968 #define DO_VSBC(N, M, C) (N - M - C)
969
970 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
971 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
972 CPURISCVState *env, uint32_t desc) \
973 { \
974 uint32_t vl = env->vl; \
975 uint32_t i; \
976 \
977 for (i = env->vstart; i < vl; i++) { \
978 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
979 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
980 ETYPE carry = vext_elem_mask(v0, i); \
981 \
982 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
983 } \
984 env->vstart = 0; \
985 }
986
987 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
988 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
989 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
990 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
991
992 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
993 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
994 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
995 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
996
997 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
998 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
999 CPURISCVState *env, uint32_t desc) \
1000 { \
1001 uint32_t vl = env->vl; \
1002 uint32_t i; \
1003 \
1004 for (i = env->vstart; i < vl; i++) { \
1005 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1006 ETYPE carry = vext_elem_mask(v0, i); \
1007 \
1008 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1009 } \
1010 env->vstart = 0; \
1011 }
1012
1013 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1014 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1015 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1016 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1017
1018 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1019 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1020 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1021 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1022
1023 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1024 (__typeof(N))(N + M) < N)
1025 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1026
1027 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1028 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1029 CPURISCVState *env, uint32_t desc) \
1030 { \
1031 uint32_t vl = env->vl; \
1032 uint32_t vm = vext_vm(desc); \
1033 uint32_t i; \
1034 \
1035 for (i = env->vstart; i < vl; i++) { \
1036 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1037 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1038 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1039 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1040 } \
1041 env->vstart = 0; \
1042 }
1043
1044 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1045 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1046 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1047 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1048
1049 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1050 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1051 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1052 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1053
1054 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1055 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1056 void *vs2, CPURISCVState *env, uint32_t desc) \
1057 { \
1058 uint32_t vl = env->vl; \
1059 uint32_t vm = vext_vm(desc); \
1060 uint32_t i; \
1061 \
1062 for (i = env->vstart; i < vl; i++) { \
1063 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1064 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1065 vext_set_elem_mask(vd, i, \
1066 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1067 } \
1068 env->vstart = 0; \
1069 }
1070
1071 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1072 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1073 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1074 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1075
1076 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1077 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1078 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1079 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1080
1081 /* Vector Bitwise Logical Instructions */
1082 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1083 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1084 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1085 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1086 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1087 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1088 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1089 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1090 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1091 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1092 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1093 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1094 GEN_VEXT_VV(vand_vv_b, 1, 1)
1095 GEN_VEXT_VV(vand_vv_h, 2, 2)
1096 GEN_VEXT_VV(vand_vv_w, 4, 4)
1097 GEN_VEXT_VV(vand_vv_d, 8, 8)
1098 GEN_VEXT_VV(vor_vv_b, 1, 1)
1099 GEN_VEXT_VV(vor_vv_h, 2, 2)
1100 GEN_VEXT_VV(vor_vv_w, 4, 4)
1101 GEN_VEXT_VV(vor_vv_d, 8, 8)
1102 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1103 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1104 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1105 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1106
1107 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1108 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1109 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1110 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1111 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1112 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1113 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1114 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1115 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1116 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1117 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1118 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1119 GEN_VEXT_VX(vand_vx_b, 1, 1)
1120 GEN_VEXT_VX(vand_vx_h, 2, 2)
1121 GEN_VEXT_VX(vand_vx_w, 4, 4)
1122 GEN_VEXT_VX(vand_vx_d, 8, 8)
1123 GEN_VEXT_VX(vor_vx_b, 1, 1)
1124 GEN_VEXT_VX(vor_vx_h, 2, 2)
1125 GEN_VEXT_VX(vor_vx_w, 4, 4)
1126 GEN_VEXT_VX(vor_vx_d, 8, 8)
1127 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1128 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1129 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1130 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1131
1132 /* Vector Single-Width Bit Shift Instructions */
1133 #define DO_SLL(N, M) (N << (M))
1134 #define DO_SRL(N, M) (N >> (M))
1135
1136 /* generate the helpers for shift instructions with two vector operators */
1137 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1138 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1139 void *vs2, CPURISCVState *env, uint32_t desc) \
1140 { \
1141 uint32_t vm = vext_vm(desc); \
1142 uint32_t vl = env->vl; \
1143 uint32_t i; \
1144 \
1145 for (i = env->vstart; i < vl; i++) { \
1146 if (!vm && !vext_elem_mask(v0, i)) { \
1147 continue; \
1148 } \
1149 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1150 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1151 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1152 } \
1153 env->vstart = 0; \
1154 }
1155
1156 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1157 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1158 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1159 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1160
1161 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1162 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1163 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1164 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1165
1166 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1167 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1168 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1169 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1170
1171 /* generate the helpers for shift instructions with one vector and one scalar */
1172 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1173 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1174 void *vs2, CPURISCVState *env, uint32_t desc) \
1175 { \
1176 uint32_t vm = vext_vm(desc); \
1177 uint32_t vl = env->vl; \
1178 uint32_t i; \
1179 \
1180 for (i = env->vstart; i < vl; i++) { \
1181 if (!vm && !vext_elem_mask(v0, i)) { \
1182 continue; \
1183 } \
1184 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1185 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1186 } \
1187 env->vstart = 0; \
1188 }
1189
1190 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1191 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1192 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1193 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1194
1195 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1196 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1197 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1198 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1199
1200 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1201 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1202 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1203 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1204
1205 /* Vector Narrowing Integer Right Shift Instructions */
1206 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1207 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1208 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1209 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1210 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1211 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1212 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1213 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1214 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1215 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1216 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1217 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1218
1219 /* Vector Integer Comparison Instructions */
1220 #define DO_MSEQ(N, M) (N == M)
1221 #define DO_MSNE(N, M) (N != M)
1222 #define DO_MSLT(N, M) (N < M)
1223 #define DO_MSLE(N, M) (N <= M)
1224 #define DO_MSGT(N, M) (N > M)
1225
1226 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1227 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1228 CPURISCVState *env, uint32_t desc) \
1229 { \
1230 uint32_t vm = vext_vm(desc); \
1231 uint32_t vl = env->vl; \
1232 uint32_t i; \
1233 \
1234 for (i = env->vstart; i < vl; i++) { \
1235 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1236 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1237 if (!vm && !vext_elem_mask(v0, i)) { \
1238 continue; \
1239 } \
1240 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1241 } \
1242 env->vstart = 0; \
1243 }
1244
1245 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1246 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1247 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1248 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1249
1250 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1251 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1252 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1253 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1254
1255 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1256 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1257 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1258 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1259
1260 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1261 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1262 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1263 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1264
1265 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1266 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1267 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1268 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1269
1270 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1271 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1272 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1273 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1274
1275 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1276 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1277 CPURISCVState *env, uint32_t desc) \
1278 { \
1279 uint32_t vm = vext_vm(desc); \
1280 uint32_t vl = env->vl; \
1281 uint32_t i; \
1282 \
1283 for (i = env->vstart; i < vl; i++) { \
1284 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1285 if (!vm && !vext_elem_mask(v0, i)) { \
1286 continue; \
1287 } \
1288 vext_set_elem_mask(vd, i, \
1289 DO_OP(s2, (ETYPE)(target_long)s1)); \
1290 } \
1291 env->vstart = 0; \
1292 }
1293
1294 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1295 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1296 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1297 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1298
1299 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1300 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1301 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1302 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1303
1304 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1305 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1306 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1307 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1308
1309 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1310 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1311 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1312 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1313
1314 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1315 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1316 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1317 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1318
1319 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1320 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1321 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1322 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1323
1324 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1325 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1326 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1327 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1328
1329 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1330 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1331 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1332 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1333
1334 /* Vector Integer Min/Max Instructions */
1335 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1336 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1337 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1338 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1339 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1340 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1341 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1342 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1343 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1344 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1345 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1346 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1347 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1348 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1349 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1350 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1351 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1352 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1353 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1354 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1355 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1356 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1357 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1358 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1359 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1360 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1361 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1362 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1363 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1364 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1365 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1366 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1367
1368 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1369 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1370 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1371 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1372 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1373 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1374 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1375 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1376 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1377 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1378 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1379 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1380 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1381 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1382 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1383 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1384 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1385 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1386 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1387 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1388 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1389 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1390 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1391 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1392 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1393 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1394 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1395 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1396 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1397 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1398 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1399 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1400
1401 /* Vector Single-Width Integer Multiply Instructions */
1402 #define DO_MUL(N, M) (N * M)
1403 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1404 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1405 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1406 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1407 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1408 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1409 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1410 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1411
1412 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1413 {
1414 return (int16_t)s2 * (int16_t)s1 >> 8;
1415 }
1416
1417 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1418 {
1419 return (int32_t)s2 * (int32_t)s1 >> 16;
1420 }
1421
1422 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1423 {
1424 return (int64_t)s2 * (int64_t)s1 >> 32;
1425 }
1426
1427 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1428 {
1429 uint64_t hi_64, lo_64;
1430
1431 muls64(&lo_64, &hi_64, s1, s2);
1432 return hi_64;
1433 }
1434
1435 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1436 {
1437 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1438 }
1439
1440 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1441 {
1442 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1443 }
1444
1445 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1446 {
1447 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1448 }
1449
1450 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1451 {
1452 uint64_t hi_64, lo_64;
1453
1454 mulu64(&lo_64, &hi_64, s2, s1);
1455 return hi_64;
1456 }
1457
1458 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1459 {
1460 return (int16_t)s2 * (uint16_t)s1 >> 8;
1461 }
1462
1463 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1464 {
1465 return (int32_t)s2 * (uint32_t)s1 >> 16;
1466 }
1467
1468 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1469 {
1470 return (int64_t)s2 * (uint64_t)s1 >> 32;
1471 }
1472
1473 /*
1474 * Let A = signed operand,
1475 * B = unsigned operand
1476 * P = mulu64(A, B), unsigned product
1477 *
1478 * LET X = 2 ** 64 - A, 2's complement of A
1479 * SP = signed product
1480 * THEN
1481 * IF A < 0
1482 * SP = -X * B
1483 * = -(2 ** 64 - A) * B
1484 * = A * B - 2 ** 64 * B
1485 * = P - 2 ** 64 * B
1486 * ELSE
1487 * SP = P
1488 * THEN
1489 * HI_P -= (A < 0 ? B : 0)
1490 */
1491
1492 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1493 {
1494 uint64_t hi_64, lo_64;
1495
1496 mulu64(&lo_64, &hi_64, s2, s1);
1497
1498 hi_64 -= s2 < 0 ? s1 : 0;
1499 return hi_64;
1500 }
1501
1502 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1503 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1504 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1505 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1506 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1507 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1508 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1509 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1510 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1511 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1512 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1513 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1514 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1515 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1516 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1517 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1518 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1519 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1520 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1521 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1522 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1523 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1524 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1525 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1526
1527 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1528 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1529 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1530 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1531 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1532 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1533 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1534 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1535 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1536 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1537 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1538 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1539 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1540 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1541 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1542 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1543 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1544 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1545 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1546 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1547 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1548 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1549 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1550 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1551 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1552 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1553 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1554 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1555 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1556 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1557 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1558 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1559
1560 /* Vector Integer Divide Instructions */
1561 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1562 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1563 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1564 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1565 #define DO_REM(N, M) (unlikely(M == 0) ? N :\
1566 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1567
1568 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1569 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1570 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1571 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1572 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1573 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1574 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1575 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1576 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1577 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1578 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1579 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1580 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1581 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1582 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1583 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1584 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1585 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1586 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1587 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1588 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1589 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1590 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1591 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1592 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1593 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1594 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1595 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1596 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1597 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1598 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1599 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1600
1601 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1602 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1603 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1604 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1605 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1606 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1607 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1608 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1609 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1610 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1611 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1612 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1613 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1614 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1615 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1616 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1617 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1618 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1619 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1620 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1621 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1622 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1623 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1624 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1625 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1626 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1627 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1628 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1629 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1630 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1631 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1632 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1633
1634 /* Vector Widening Integer Multiply Instructions */
1635 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1636 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1637 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1638 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1639 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1640 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1641 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1642 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1643 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1644 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1645 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1646 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1647 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1648 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1649 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1650 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1651 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1652 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1653
1654 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1655 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1656 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1657 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1658 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1659 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1660 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1661 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1662 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1663 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1664 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1665 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1666 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1667 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1668 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1669 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1670 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1671 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1672
1673 /* Vector Single-Width Integer Multiply-Add Instructions */
1674 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1675 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1676 { \
1677 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1678 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1679 TD d = *((TD *)vd + HD(i)); \
1680 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1681 }
1682
1683 #define DO_MACC(N, M, D) (M * N + D)
1684 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1685 #define DO_MADD(N, M, D) (M * D + N)
1686 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1687 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1688 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1689 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1690 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1691 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1692 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1693 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1694 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1695 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1696 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1697 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1698 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1699 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1700 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1701 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1702 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1703 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1704 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1705 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1706 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1707 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1708 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1709 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1710 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1711 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1712 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1713 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1714 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1715 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1716 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1717 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1718 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1719
1720 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1721 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1722 { \
1723 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1724 TD d = *((TD *)vd + HD(i)); \
1725 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1726 }
1727
1728 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1729 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1730 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1731 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1732 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1733 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1734 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1735 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1736 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1737 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1738 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1739 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1740 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1741 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1742 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1743 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1744 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1745 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1746 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1747 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1748 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1749 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1750 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1751 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1752 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1753 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1754 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1755 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1756 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1757 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1758 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1759 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1760
1761 /* Vector Widening Integer Multiply-Add Instructions */
1762 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1763 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1764 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1765 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1766 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1767 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1768 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1769 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1770 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1771 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1772 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1773 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1774 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1775 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1776 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1777 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1778 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1779 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1780
1781 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1782 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1783 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1784 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1785 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1786 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1787 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1788 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1789 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1790 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1791 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1792 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1793 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1794 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1795 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1796 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1797 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1798 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1799 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1800 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1801 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1802 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1803 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1804 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1805
1806 /* Vector Integer Merge and Move Instructions */
1807 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
1808 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1809 uint32_t desc) \
1810 { \
1811 uint32_t vl = env->vl; \
1812 uint32_t i; \
1813 \
1814 for (i = env->vstart; i < vl; i++) { \
1815 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1816 *((ETYPE *)vd + H(i)) = s1; \
1817 } \
1818 env->vstart = 0; \
1819 }
1820
1821 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1822 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1823 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1824 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1825
1826 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
1827 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1828 uint32_t desc) \
1829 { \
1830 uint32_t vl = env->vl; \
1831 uint32_t i; \
1832 \
1833 for (i = env->vstart; i < vl; i++) { \
1834 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1835 } \
1836 env->vstart = 0; \
1837 }
1838
1839 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1840 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1841 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1842 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1843
1844 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
1845 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1846 CPURISCVState *env, uint32_t desc) \
1847 { \
1848 uint32_t vl = env->vl; \
1849 uint32_t i; \
1850 \
1851 for (i = env->vstart; i < vl; i++) { \
1852 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
1853 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1854 } \
1855 env->vstart = 0; \
1856 }
1857
1858 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1859 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1860 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1861 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1862
1863 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
1864 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1865 void *vs2, CPURISCVState *env, uint32_t desc) \
1866 { \
1867 uint32_t vl = env->vl; \
1868 uint32_t i; \
1869 \
1870 for (i = env->vstart; i < vl; i++) { \
1871 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1872 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
1873 (ETYPE)(target_long)s1); \
1874 *((ETYPE *)vd + H(i)) = d; \
1875 } \
1876 env->vstart = 0; \
1877 }
1878
1879 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1880 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1881 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1882 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1883
1884 /*
1885 *** Vector Fixed-Point Arithmetic Instructions
1886 */
1887
1888 /* Vector Single-Width Saturating Add and Subtract */
1889
1890 /*
1891 * As fixed point instructions probably have round mode and saturation,
1892 * define common macros for fixed point here.
1893 */
1894 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1895 CPURISCVState *env, int vxrm);
1896
1897 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1898 static inline void \
1899 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1900 CPURISCVState *env, int vxrm) \
1901 { \
1902 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1903 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1904 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1905 }
1906
1907 static inline void
1908 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1909 CPURISCVState *env,
1910 uint32_t vl, uint32_t vm, int vxrm,
1911 opivv2_rm_fn *fn)
1912 {
1913 for (uint32_t i = env->vstart; i < vl; i++) {
1914 if (!vm && !vext_elem_mask(v0, i)) {
1915 continue;
1916 }
1917 fn(vd, vs1, vs2, i, env, vxrm);
1918 }
1919 env->vstart = 0;
1920 }
1921
1922 static inline void
1923 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1924 CPURISCVState *env,
1925 uint32_t desc, uint32_t esz, uint32_t dsz,
1926 opivv2_rm_fn *fn)
1927 {
1928 uint32_t vm = vext_vm(desc);
1929 uint32_t vl = env->vl;
1930
1931 switch (env->vxrm) {
1932 case 0: /* rnu */
1933 vext_vv_rm_1(vd, v0, vs1, vs2,
1934 env, vl, vm, 0, fn);
1935 break;
1936 case 1: /* rne */
1937 vext_vv_rm_1(vd, v0, vs1, vs2,
1938 env, vl, vm, 1, fn);
1939 break;
1940 case 2: /* rdn */
1941 vext_vv_rm_1(vd, v0, vs1, vs2,
1942 env, vl, vm, 2, fn);
1943 break;
1944 default: /* rod */
1945 vext_vv_rm_1(vd, v0, vs1, vs2,
1946 env, vl, vm, 3, fn);
1947 break;
1948 }
1949 }
1950
1951 /* generate helpers for fixed point instructions with OPIVV format */
1952 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \
1953 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1954 CPURISCVState *env, uint32_t desc) \
1955 { \
1956 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
1957 do_##NAME); \
1958 }
1959
1960 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1961 {
1962 uint8_t res = a + b;
1963 if (res < a) {
1964 res = UINT8_MAX;
1965 env->vxsat = 0x1;
1966 }
1967 return res;
1968 }
1969
1970 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1971 uint16_t b)
1972 {
1973 uint16_t res = a + b;
1974 if (res < a) {
1975 res = UINT16_MAX;
1976 env->vxsat = 0x1;
1977 }
1978 return res;
1979 }
1980
1981 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1982 uint32_t b)
1983 {
1984 uint32_t res = a + b;
1985 if (res < a) {
1986 res = UINT32_MAX;
1987 env->vxsat = 0x1;
1988 }
1989 return res;
1990 }
1991
1992 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1993 uint64_t b)
1994 {
1995 uint64_t res = a + b;
1996 if (res < a) {
1997 res = UINT64_MAX;
1998 env->vxsat = 0x1;
1999 }
2000 return res;
2001 }
2002
2003 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2004 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2005 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2006 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2007 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
2008 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
2009 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
2010 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
2011
2012 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2013 CPURISCVState *env, int vxrm);
2014
2015 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2016 static inline void \
2017 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2018 CPURISCVState *env, int vxrm) \
2019 { \
2020 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2021 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2022 }
2023
2024 static inline void
2025 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2026 CPURISCVState *env,
2027 uint32_t vl, uint32_t vm, int vxrm,
2028 opivx2_rm_fn *fn)
2029 {
2030 for (uint32_t i = env->vstart; i < vl; i++) {
2031 if (!vm && !vext_elem_mask(v0, i)) {
2032 continue;
2033 }
2034 fn(vd, s1, vs2, i, env, vxrm);
2035 }
2036 env->vstart = 0;
2037 }
2038
2039 static inline void
2040 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2041 CPURISCVState *env,
2042 uint32_t desc, uint32_t esz, uint32_t dsz,
2043 opivx2_rm_fn *fn)
2044 {
2045 uint32_t vm = vext_vm(desc);
2046 uint32_t vl = env->vl;
2047
2048 switch (env->vxrm) {
2049 case 0: /* rnu */
2050 vext_vx_rm_1(vd, v0, s1, vs2,
2051 env, vl, vm, 0, fn);
2052 break;
2053 case 1: /* rne */
2054 vext_vx_rm_1(vd, v0, s1, vs2,
2055 env, vl, vm, 1, fn);
2056 break;
2057 case 2: /* rdn */
2058 vext_vx_rm_1(vd, v0, s1, vs2,
2059 env, vl, vm, 2, fn);
2060 break;
2061 default: /* rod */
2062 vext_vx_rm_1(vd, v0, s1, vs2,
2063 env, vl, vm, 3, fn);
2064 break;
2065 }
2066 }
2067
2068 /* generate helpers for fixed point instructions with OPIVX format */
2069 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \
2070 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2071 void *vs2, CPURISCVState *env, uint32_t desc) \
2072 { \
2073 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
2074 do_##NAME); \
2075 }
2076
2077 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2078 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2079 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2080 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2081 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2082 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2083 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2084 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2085
2086 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2087 {
2088 int8_t res = a + b;
2089 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2090 res = a > 0 ? INT8_MAX : INT8_MIN;
2091 env->vxsat = 0x1;
2092 }
2093 return res;
2094 }
2095
2096 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2097 {
2098 int16_t res = a + b;
2099 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2100 res = a > 0 ? INT16_MAX : INT16_MIN;
2101 env->vxsat = 0x1;
2102 }
2103 return res;
2104 }
2105
2106 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2107 {
2108 int32_t res = a + b;
2109 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2110 res = a > 0 ? INT32_MAX : INT32_MIN;
2111 env->vxsat = 0x1;
2112 }
2113 return res;
2114 }
2115
2116 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2117 {
2118 int64_t res = a + b;
2119 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2120 res = a > 0 ? INT64_MAX : INT64_MIN;
2121 env->vxsat = 0x1;
2122 }
2123 return res;
2124 }
2125
2126 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2127 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2128 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2129 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2130 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2131 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2132 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2133 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2134
2135 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2136 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2137 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2138 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2139 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2140 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2141 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2142 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2143
2144 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2145 {
2146 uint8_t res = a - b;
2147 if (res > a) {
2148 res = 0;
2149 env->vxsat = 0x1;
2150 }
2151 return res;
2152 }
2153
2154 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2155 uint16_t b)
2156 {
2157 uint16_t res = a - b;
2158 if (res > a) {
2159 res = 0;
2160 env->vxsat = 0x1;
2161 }
2162 return res;
2163 }
2164
2165 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2166 uint32_t b)
2167 {
2168 uint32_t res = a - b;
2169 if (res > a) {
2170 res = 0;
2171 env->vxsat = 0x1;
2172 }
2173 return res;
2174 }
2175
2176 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2177 uint64_t b)
2178 {
2179 uint64_t res = a - b;
2180 if (res > a) {
2181 res = 0;
2182 env->vxsat = 0x1;
2183 }
2184 return res;
2185 }
2186
2187 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2188 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2189 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2190 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2191 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2192 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2193 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2194 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2195
2196 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2197 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2198 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2199 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2200 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2201 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2202 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2203 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2204
2205 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2206 {
2207 int8_t res = a - b;
2208 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2209 res = a >= 0 ? INT8_MAX : INT8_MIN;
2210 env->vxsat = 0x1;
2211 }
2212 return res;
2213 }
2214
2215 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2216 {
2217 int16_t res = a - b;
2218 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2219 res = a >= 0 ? INT16_MAX : INT16_MIN;
2220 env->vxsat = 0x1;
2221 }
2222 return res;
2223 }
2224
2225 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2226 {
2227 int32_t res = a - b;
2228 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2229 res = a >= 0 ? INT32_MAX : INT32_MIN;
2230 env->vxsat = 0x1;
2231 }
2232 return res;
2233 }
2234
2235 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2236 {
2237 int64_t res = a - b;
2238 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2239 res = a >= 0 ? INT64_MAX : INT64_MIN;
2240 env->vxsat = 0x1;
2241 }
2242 return res;
2243 }
2244
2245 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2246 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2247 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2248 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2249 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2250 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2251 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2252 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2253
2254 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2255 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2256 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2257 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2258 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2259 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2260 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2261 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2262
2263 /* Vector Single-Width Averaging Add and Subtract */
2264 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2265 {
2266 uint8_t d = extract64(v, shift, 1);
2267 uint8_t d1;
2268 uint64_t D1, D2;
2269
2270 if (shift == 0 || shift > 64) {
2271 return 0;
2272 }
2273
2274 d1 = extract64(v, shift - 1, 1);
2275 D1 = extract64(v, 0, shift);
2276 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2277 return d1;
2278 } else if (vxrm == 1) { /* round-to-nearest-even */
2279 if (shift > 1) {
2280 D2 = extract64(v, 0, shift - 1);
2281 return d1 & ((D2 != 0) | d);
2282 } else {
2283 return d1 & d;
2284 }
2285 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2286 return !d & (D1 != 0);
2287 }
2288 return 0; /* round-down (truncate) */
2289 }
2290
2291 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2292 {
2293 int64_t res = (int64_t)a + b;
2294 uint8_t round = get_round(vxrm, res, 1);
2295
2296 return (res >> 1) + round;
2297 }
2298
2299 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2300 {
2301 int64_t res = a + b;
2302 uint8_t round = get_round(vxrm, res, 1);
2303 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2304
2305 /* With signed overflow, bit 64 is inverse of bit 63. */
2306 return ((res >> 1) ^ over) + round;
2307 }
2308
2309 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2310 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2311 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2312 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2313 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2314 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2315 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2316 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2317
2318 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2319 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2320 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2321 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2322 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2323 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2324 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2325 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2326
2327 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2328 uint32_t a, uint32_t b)
2329 {
2330 uint64_t res = (uint64_t)a + b;
2331 uint8_t round = get_round(vxrm, res, 1);
2332
2333 return (res >> 1) + round;
2334 }
2335
2336 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2337 uint64_t a, uint64_t b)
2338 {
2339 uint64_t res = a + b;
2340 uint8_t round = get_round(vxrm, res, 1);
2341 uint64_t over = (uint64_t)(res < a) << 63;
2342
2343 return ((res >> 1) | over) + round;
2344 }
2345
2346 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2347 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2348 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2349 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2350 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2351 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2352 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2353 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2354
2355 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2356 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2357 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2358 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2359 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2360 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2361 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2362 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2363
2364 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2365 {
2366 int64_t res = (int64_t)a - b;
2367 uint8_t round = get_round(vxrm, res, 1);
2368
2369 return (res >> 1) + round;
2370 }
2371
2372 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2373 {
2374 int64_t res = (int64_t)a - b;
2375 uint8_t round = get_round(vxrm, res, 1);
2376 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2377
2378 /* With signed overflow, bit 64 is inverse of bit 63. */
2379 return ((res >> 1) ^ over) + round;
2380 }
2381
2382 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2383 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2384 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2385 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2386 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2387 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2388 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2389 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2390
2391 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2392 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2393 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2394 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2395 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2396 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2397 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2398 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2399
2400 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2401 uint32_t a, uint32_t b)
2402 {
2403 int64_t res = (int64_t)a - b;
2404 uint8_t round = get_round(vxrm, res, 1);
2405
2406 return (res >> 1) + round;
2407 }
2408
2409 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2410 uint64_t a, uint64_t b)
2411 {
2412 uint64_t res = (uint64_t)a - b;
2413 uint8_t round = get_round(vxrm, res, 1);
2414 uint64_t over = (uint64_t)(res > a) << 63;
2415
2416 return ((res >> 1) | over) + round;
2417 }
2418
2419 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2420 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2421 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2422 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2423 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2424 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2425 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2426 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2427
2428 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2429 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2430 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2431 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2432 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2433 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2434 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2435 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2436
2437 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2438 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2439 {
2440 uint8_t round;
2441 int16_t res;
2442
2443 res = (int16_t)a * (int16_t)b;
2444 round = get_round(vxrm, res, 7);
2445 res = (res >> 7) + round;
2446
2447 if (res > INT8_MAX) {
2448 env->vxsat = 0x1;
2449 return INT8_MAX;
2450 } else if (res < INT8_MIN) {
2451 env->vxsat = 0x1;
2452 return INT8_MIN;
2453 } else {
2454 return res;
2455 }
2456 }
2457
2458 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2459 {
2460 uint8_t round;
2461 int32_t res;
2462
2463 res = (int32_t)a * (int32_t)b;
2464 round = get_round(vxrm, res, 15);
2465 res = (res >> 15) + round;
2466
2467 if (res > INT16_MAX) {
2468 env->vxsat = 0x1;
2469 return INT16_MAX;
2470 } else if (res < INT16_MIN) {
2471 env->vxsat = 0x1;
2472 return INT16_MIN;
2473 } else {
2474 return res;
2475 }
2476 }
2477
2478 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2479 {
2480 uint8_t round;
2481 int64_t res;
2482
2483 res = (int64_t)a * (int64_t)b;
2484 round = get_round(vxrm, res, 31);
2485 res = (res >> 31) + round;
2486
2487 if (res > INT32_MAX) {
2488 env->vxsat = 0x1;
2489 return INT32_MAX;
2490 } else if (res < INT32_MIN) {
2491 env->vxsat = 0x1;
2492 return INT32_MIN;
2493 } else {
2494 return res;
2495 }
2496 }
2497
2498 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2499 {
2500 uint8_t round;
2501 uint64_t hi_64, lo_64;
2502 int64_t res;
2503
2504 if (a == INT64_MIN && b == INT64_MIN) {
2505 env->vxsat = 1;
2506 return INT64_MAX;
2507 }
2508
2509 muls64(&lo_64, &hi_64, a, b);
2510 round = get_round(vxrm, lo_64, 63);
2511 /*
2512 * Cannot overflow, as there are always
2513 * 2 sign bits after multiply.
2514 */
2515 res = (hi_64 << 1) | (lo_64 >> 63);
2516 if (round) {
2517 if (res == INT64_MAX) {
2518 env->vxsat = 1;
2519 } else {
2520 res += 1;
2521 }
2522 }
2523 return res;
2524 }
2525
2526 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2527 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2528 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2529 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2530 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2531 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2532 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2533 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2534
2535 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2536 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2537 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2538 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2539 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2540 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2541 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2542 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2543
2544 /* Vector Single-Width Scaling Shift Instructions */
2545 static inline uint8_t
2546 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2547 {
2548 uint8_t round, shift = b & 0x7;
2549 uint8_t res;
2550
2551 round = get_round(vxrm, a, shift);
2552 res = (a >> shift) + round;
2553 return res;
2554 }
2555 static inline uint16_t
2556 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2557 {
2558 uint8_t round, shift = b & 0xf;
2559 uint16_t res;
2560
2561 round = get_round(vxrm, a, shift);
2562 res = (a >> shift) + round;
2563 return res;
2564 }
2565 static inline uint32_t
2566 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2567 {
2568 uint8_t round, shift = b & 0x1f;
2569 uint32_t res;
2570
2571 round = get_round(vxrm, a, shift);
2572 res = (a >> shift) + round;
2573 return res;
2574 }
2575 static inline uint64_t
2576 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2577 {
2578 uint8_t round, shift = b & 0x3f;
2579 uint64_t res;
2580
2581 round = get_round(vxrm, a, shift);
2582 res = (a >> shift) + round;
2583 return res;
2584 }
2585 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2586 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2587 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2588 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2589 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2590 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2591 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2592 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2593
2594 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2595 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2596 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2597 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2598 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2599 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2600 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2601 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2602
2603 static inline int8_t
2604 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2605 {
2606 uint8_t round, shift = b & 0x7;
2607 int8_t res;
2608
2609 round = get_round(vxrm, a, shift);
2610 res = (a >> shift) + round;
2611 return res;
2612 }
2613 static inline int16_t
2614 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2615 {
2616 uint8_t round, shift = b & 0xf;
2617 int16_t res;
2618
2619 round = get_round(vxrm, a, shift);
2620 res = (a >> shift) + round;
2621 return res;
2622 }
2623 static inline int32_t
2624 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2625 {
2626 uint8_t round, shift = b & 0x1f;
2627 int32_t res;
2628
2629 round = get_round(vxrm, a, shift);
2630 res = (a >> shift) + round;
2631 return res;
2632 }
2633 static inline int64_t
2634 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2635 {
2636 uint8_t round, shift = b & 0x3f;
2637 int64_t res;
2638
2639 round = get_round(vxrm, a, shift);
2640 res = (a >> shift) + round;
2641 return res;
2642 }
2643
2644 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2645 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2646 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2647 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2648 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2649 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2650 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2651 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2652
2653 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2654 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2655 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2656 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2657 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2658 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2659 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2660 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2661
2662 /* Vector Narrowing Fixed-Point Clip Instructions */
2663 static inline int8_t
2664 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2665 {
2666 uint8_t round, shift = b & 0xf;
2667 int16_t res;
2668
2669 round = get_round(vxrm, a, shift);
2670 res = (a >> shift) + round;
2671 if (res > INT8_MAX) {
2672 env->vxsat = 0x1;
2673 return INT8_MAX;
2674 } else if (res < INT8_MIN) {
2675 env->vxsat = 0x1;
2676 return INT8_MIN;
2677 } else {
2678 return res;
2679 }
2680 }
2681
2682 static inline int16_t
2683 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2684 {
2685 uint8_t round, shift = b & 0x1f;
2686 int32_t res;
2687
2688 round = get_round(vxrm, a, shift);
2689 res = (a >> shift) + round;
2690 if (res > INT16_MAX) {
2691 env->vxsat = 0x1;
2692 return INT16_MAX;
2693 } else if (res < INT16_MIN) {
2694 env->vxsat = 0x1;
2695 return INT16_MIN;
2696 } else {
2697 return res;
2698 }
2699 }
2700
2701 static inline int32_t
2702 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2703 {
2704 uint8_t round, shift = b & 0x3f;
2705 int64_t res;
2706
2707 round = get_round(vxrm, a, shift);
2708 res = (a >> shift) + round;
2709 if (res > INT32_MAX) {
2710 env->vxsat = 0x1;
2711 return INT32_MAX;
2712 } else if (res < INT32_MIN) {
2713 env->vxsat = 0x1;
2714 return INT32_MIN;
2715 } else {
2716 return res;
2717 }
2718 }
2719
2720 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2721 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2722 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2723 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2724 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2725 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2726
2727 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2728 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2729 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2730 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2731 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2732 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2733
2734 static inline uint8_t
2735 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2736 {
2737 uint8_t round, shift = b & 0xf;
2738 uint16_t res;
2739
2740 round = get_round(vxrm, a, shift);
2741 res = (a >> shift) + round;
2742 if (res > UINT8_MAX) {
2743 env->vxsat = 0x1;
2744 return UINT8_MAX;
2745 } else {
2746 return res;
2747 }
2748 }
2749
2750 static inline uint16_t
2751 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2752 {
2753 uint8_t round, shift = b & 0x1f;
2754 uint32_t res;
2755
2756 round = get_round(vxrm, a, shift);
2757 res = (a >> shift) + round;
2758 if (res > UINT16_MAX) {
2759 env->vxsat = 0x1;
2760 return UINT16_MAX;
2761 } else {
2762 return res;
2763 }
2764 }
2765
2766 static inline uint32_t
2767 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2768 {
2769 uint8_t round, shift = b & 0x3f;
2770 uint64_t res;
2771
2772 round = get_round(vxrm, a, shift);
2773 res = (a >> shift) + round;
2774 if (res > UINT32_MAX) {
2775 env->vxsat = 0x1;
2776 return UINT32_MAX;
2777 } else {
2778 return res;
2779 }
2780 }
2781
2782 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2783 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2784 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2785 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2786 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2787 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2788
2789 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2790 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2791 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2792 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2793 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2794 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2795
2796 /*
2797 *** Vector Float Point Arithmetic Instructions
2798 */
2799 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2800 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2801 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2802 CPURISCVState *env) \
2803 { \
2804 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2805 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2806 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2807 }
2808
2809 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \
2810 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2811 void *vs2, CPURISCVState *env, \
2812 uint32_t desc) \
2813 { \
2814 uint32_t vm = vext_vm(desc); \
2815 uint32_t vl = env->vl; \
2816 uint32_t i; \
2817 \
2818 for (i = env->vstart; i < vl; i++) { \
2819 if (!vm && !vext_elem_mask(v0, i)) { \
2820 continue; \
2821 } \
2822 do_##NAME(vd, vs1, vs2, i, env); \
2823 } \
2824 env->vstart = 0; \
2825 }
2826
2827 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2828 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2829 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2830 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2831 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2832 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2833
2834 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2835 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2836 CPURISCVState *env) \
2837 { \
2838 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2839 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2840 }
2841
2842 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \
2843 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2844 void *vs2, CPURISCVState *env, \
2845 uint32_t desc) \
2846 { \
2847 uint32_t vm = vext_vm(desc); \
2848 uint32_t vl = env->vl; \
2849 uint32_t i; \
2850 \
2851 for (i = env->vstart; i < vl; i++) { \
2852 if (!vm && !vext_elem_mask(v0, i)) { \
2853 continue; \
2854 } \
2855 do_##NAME(vd, s1, vs2, i, env); \
2856 } \
2857 env->vstart = 0; \
2858 }
2859
2860 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2861 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2862 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2863 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2864 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2865 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2866
2867 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2868 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2869 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2870 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2871 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2872 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2873 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2874 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2875 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2876 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2877 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2878 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2879
2880 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2881 {
2882 return float16_sub(b, a, s);
2883 }
2884
2885 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2886 {
2887 return float32_sub(b, a, s);
2888 }
2889
2890 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2891 {
2892 return float64_sub(b, a, s);
2893 }
2894
2895 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2896 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2897 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2898 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2899 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2900 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2901
2902 /* Vector Widening Floating-Point Add/Subtract Instructions */
2903 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2904 {
2905 return float32_add(float16_to_float32(a, true, s),
2906 float16_to_float32(b, true, s), s);
2907 }
2908
2909 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2910 {
2911 return float64_add(float32_to_float64(a, s),
2912 float32_to_float64(b, s), s);
2913
2914 }
2915
2916 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2917 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2918 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2919 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2920 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2921 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2922 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2923 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2924
2925 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2926 {
2927 return float32_sub(float16_to_float32(a, true, s),
2928 float16_to_float32(b, true, s), s);
2929 }
2930
2931 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2932 {
2933 return float64_sub(float32_to_float64(a, s),
2934 float32_to_float64(b, s), s);
2935
2936 }
2937
2938 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2939 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2940 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2941 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2942 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2943 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2944 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2945 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2946
2947 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2948 {
2949 return float32_add(a, float16_to_float32(b, true, s), s);
2950 }
2951
2952 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2953 {
2954 return float64_add(a, float32_to_float64(b, s), s);
2955 }
2956
2957 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2958 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2959 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2960 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2961 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2962 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2963 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2964 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2965
2966 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2967 {
2968 return float32_sub(a, float16_to_float32(b, true, s), s);
2969 }
2970
2971 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2972 {
2973 return float64_sub(a, float32_to_float64(b, s), s);
2974 }
2975
2976 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2977 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2978 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2979 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2980 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2981 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2982 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2983 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2984
2985 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2986 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2987 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2988 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2989 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2990 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2991 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2992 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2993 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2994 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2995 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2996 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2997 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2998
2999 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3000 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3001 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3002 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
3003 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
3004 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
3005 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3006 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3007 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3008 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3009 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3010 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3011
3012 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3013 {
3014 return float16_div(b, a, s);
3015 }
3016
3017 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3018 {
3019 return float32_div(b, a, s);
3020 }
3021
3022 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3023 {
3024 return float64_div(b, a, s);
3025 }
3026
3027 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3028 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3029 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3030 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3031 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3032 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3033
3034 /* Vector Widening Floating-Point Multiply */
3035 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3036 {
3037 return float32_mul(float16_to_float32(a, true, s),
3038 float16_to_float32(b, true, s), s);
3039 }
3040
3041 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3042 {
3043 return float64_mul(float32_to_float64(a, s),
3044 float32_to_float64(b, s), s);
3045
3046 }
3047 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3048 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3049 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3050 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3051 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3052 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3053 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3054 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3055
3056 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3057 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3058 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3059 CPURISCVState *env) \
3060 { \
3061 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3062 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3063 TD d = *((TD *)vd + HD(i)); \
3064 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3065 }
3066
3067 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3068 {
3069 return float16_muladd(a, b, d, 0, s);
3070 }
3071
3072 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3073 {
3074 return float32_muladd(a, b, d, 0, s);
3075 }
3076
3077 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3078 {
3079 return float64_muladd(a, b, d, 0, s);
3080 }
3081
3082 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3083 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3084 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3085 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3086 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3087 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3088
3089 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3090 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3091 CPURISCVState *env) \
3092 { \
3093 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3094 TD d = *((TD *)vd + HD(i)); \
3095 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3096 }
3097
3098 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3099 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3100 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3101 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3102 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3103 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3104
3105 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3106 {
3107 return float16_muladd(a, b, d,
3108 float_muladd_negate_c | float_muladd_negate_product, s);
3109 }
3110
3111 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3112 {
3113 return float32_muladd(a, b, d,
3114 float_muladd_negate_c | float_muladd_negate_product, s);
3115 }
3116
3117 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3118 {
3119 return float64_muladd(a, b, d,
3120 float_muladd_negate_c | float_muladd_negate_product, s);
3121 }
3122
3123 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3124 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3125 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3126 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3127 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3128 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3129 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3130 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3131 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3132 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3133 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3134 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3135
3136 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3137 {
3138 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3139 }
3140
3141 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3142 {
3143 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3144 }
3145
3146 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3147 {
3148 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3149 }
3150
3151 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3152 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3153 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3154 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3155 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3156 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3157 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3158 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3159 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3160 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3161 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3162 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3163
3164 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3165 {
3166 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3167 }
3168
3169 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3170 {
3171 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3172 }
3173
3174 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3175 {
3176 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3177 }
3178
3179 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3180 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3181 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3182 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3183 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3184 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3185 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3186 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3187 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3188 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3189 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3190 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3191
3192 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3193 {
3194 return float16_muladd(d, b, a, 0, s);
3195 }
3196
3197 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3198 {
3199 return float32_muladd(d, b, a, 0, s);
3200 }
3201
3202 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3203 {
3204 return float64_muladd(d, b, a, 0, s);
3205 }
3206
3207 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3208 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3209 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3210 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3211 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3212 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3213 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3214 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3215 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3216 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3217 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3218 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3219
3220 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3221 {
3222 return float16_muladd(d, b, a,
3223 float_muladd_negate_c | float_muladd_negate_product, s);
3224 }
3225
3226 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3227 {
3228 return float32_muladd(d, b, a,
3229 float_muladd_negate_c | float_muladd_negate_product, s);
3230 }
3231
3232 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3233 {
3234 return float64_muladd(d, b, a,
3235 float_muladd_negate_c | float_muladd_negate_product, s);
3236 }
3237
3238 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3239 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3240 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3241 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3242 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3243 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3244 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3245 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3246 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3247 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3248 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3249 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3250
3251 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3252 {
3253 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3254 }
3255
3256 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3257 {
3258 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3259 }
3260
3261 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3262 {
3263 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3264 }
3265
3266 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3267 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3268 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3269 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3270 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3271 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3272 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3273 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3274 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3275 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3276 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3277 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3278
3279 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3280 {
3281 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3282 }
3283
3284 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3285 {
3286 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3287 }
3288
3289 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3290 {
3291 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3292 }
3293
3294 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3295 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3296 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3297 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3298 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3299 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3300 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3301 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3302 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3303 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3304 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3305 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3306
3307 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3308 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3309 {
3310 return float32_muladd(float16_to_float32(a, true, s),
3311 float16_to_float32(b, true, s), d, 0, s);
3312 }
3313
3314 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3315 {
3316 return float64_muladd(float32_to_float64(a, s),
3317 float32_to_float64(b, s), d, 0, s);
3318 }
3319
3320 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3321 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3322 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3323 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3324 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3325 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3326 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3327 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3328
3329 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3330 {
3331 return float32_muladd(float16_to_float32(a, true, s),
3332 float16_to_float32(b, true, s), d,
3333 float_muladd_negate_c | float_muladd_negate_product, s);
3334 }
3335
3336 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3337 {
3338 return float64_muladd(float32_to_float64(a, s),
3339 float32_to_float64(b, s), d,
3340 float_muladd_negate_c | float_muladd_negate_product, s);
3341 }
3342
3343 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3344 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3345 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3346 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3347 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3348 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3349 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3350 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3351
3352 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3353 {
3354 return float32_muladd(float16_to_float32(a, true, s),
3355 float16_to_float32(b, true, s), d,
3356 float_muladd_negate_c, s);
3357 }
3358
3359 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3360 {
3361 return float64_muladd(float32_to_float64(a, s),
3362 float32_to_float64(b, s), d,
3363 float_muladd_negate_c, s);
3364 }
3365
3366 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3367 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3368 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3369 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3370 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3371 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3372 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3373 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3374
3375 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3376 {
3377 return float32_muladd(float16_to_float32(a, true, s),
3378 float16_to_float32(b, true, s), d,
3379 float_muladd_negate_product, s);
3380 }
3381
3382 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3383 {
3384 return float64_muladd(float32_to_float64(a, s),
3385 float32_to_float64(b, s), d,
3386 float_muladd_negate_product, s);
3387 }
3388
3389 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3390 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3391 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3392 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3393 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3394 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3395 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3396 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3397
3398 /* Vector Floating-Point Square-Root Instruction */
3399 /* (TD, T2, TX2) */
3400 #define OP_UU_H uint16_t, uint16_t, uint16_t
3401 #define OP_UU_W uint32_t, uint32_t, uint32_t
3402 #define OP_UU_D uint64_t, uint64_t, uint64_t
3403
3404 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3405 static void do_##NAME(void *vd, void *vs2, int i, \
3406 CPURISCVState *env) \
3407 { \
3408 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3409 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3410 }
3411
3412 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \
3413 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3414 CPURISCVState *env, uint32_t desc) \
3415 { \
3416 uint32_t vm = vext_vm(desc); \
3417 uint32_t vl = env->vl; \
3418 uint32_t i; \
3419 \
3420 if (vl == 0) { \
3421 return; \
3422 } \
3423 for (i = env->vstart; i < vl; i++) { \
3424 if (!vm && !vext_elem_mask(v0, i)) { \
3425 continue; \
3426 } \
3427 do_##NAME(vd, vs2, i, env); \
3428 } \
3429 env->vstart = 0; \
3430 }
3431
3432 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3433 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3434 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3435 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3436 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3437 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3438
3439 /*
3440 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3441 *
3442 * Adapted from riscv-v-spec recip.c:
3443 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3444 */
3445 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3446 {
3447 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3448 uint64_t exp = extract64(f, frac_size, exp_size);
3449 uint64_t frac = extract64(f, 0, frac_size);
3450
3451 const uint8_t lookup_table[] = {
3452 52, 51, 50, 48, 47, 46, 44, 43,
3453 42, 41, 40, 39, 38, 36, 35, 34,
3454 33, 32, 31, 30, 30, 29, 28, 27,
3455 26, 25, 24, 23, 23, 22, 21, 20,
3456 19, 19, 18, 17, 16, 16, 15, 14,
3457 14, 13, 12, 12, 11, 10, 10, 9,
3458 9, 8, 7, 7, 6, 6, 5, 4,
3459 4, 3, 3, 2, 2, 1, 1, 0,
3460 127, 125, 123, 121, 119, 118, 116, 114,
3461 113, 111, 109, 108, 106, 105, 103, 102,
3462 100, 99, 97, 96, 95, 93, 92, 91,
3463 90, 88, 87, 86, 85, 84, 83, 82,
3464 80, 79, 78, 77, 76, 75, 74, 73,
3465 72, 71, 70, 70, 69, 68, 67, 66,
3466 65, 64, 63, 63, 62, 61, 60, 59,
3467 59, 58, 57, 56, 56, 55, 54, 53
3468 };
3469 const int precision = 7;
3470
3471 if (exp == 0 && frac != 0) { /* subnormal */
3472 /* Normalize the subnormal. */
3473 while (extract64(frac, frac_size - 1, 1) == 0) {
3474 exp--;
3475 frac <<= 1;
3476 }
3477
3478 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3479 }
3480
3481 int idx = ((exp & 1) << (precision - 1)) |
3482 (frac >> (frac_size - precision + 1));
3483 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3484 (frac_size - precision);
3485 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3486
3487 uint64_t val = 0;
3488 val = deposit64(val, 0, frac_size, out_frac);
3489 val = deposit64(val, frac_size, exp_size, out_exp);
3490 val = deposit64(val, frac_size + exp_size, 1, sign);
3491 return val;
3492 }
3493
3494 static float16 frsqrt7_h(float16 f, float_status *s)
3495 {
3496 int exp_size = 5, frac_size = 10;
3497 bool sign = float16_is_neg(f);
3498
3499 /*
3500 * frsqrt7(sNaN) = canonical NaN
3501 * frsqrt7(-inf) = canonical NaN
3502 * frsqrt7(-normal) = canonical NaN
3503 * frsqrt7(-subnormal) = canonical NaN
3504 */
3505 if (float16_is_signaling_nan(f, s) ||
3506 (float16_is_infinity(f) && sign) ||
3507 (float16_is_normal(f) && sign) ||
3508 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3509 s->float_exception_flags |= float_flag_invalid;
3510 return float16_default_nan(s);
3511 }
3512
3513 /* frsqrt7(qNaN) = canonical NaN */
3514 if (float16_is_quiet_nan(f, s)) {
3515 return float16_default_nan(s);
3516 }
3517
3518 /* frsqrt7(+-0) = +-inf */
3519 if (float16_is_zero(f)) {
3520 s->float_exception_flags |= float_flag_divbyzero;
3521 return float16_set_sign(float16_infinity, sign);
3522 }
3523
3524 /* frsqrt7(+inf) = +0 */
3525 if (float16_is_infinity(f) && !sign) {
3526 return float16_set_sign(float16_zero, sign);
3527 }
3528
3529 /* +normal, +subnormal */
3530 uint64_t val = frsqrt7(f, exp_size, frac_size);
3531 return make_float16(val);
3532 }
3533
3534 static float32 frsqrt7_s(float32 f, float_status *s)
3535 {
3536 int exp_size = 8, frac_size = 23;
3537 bool sign = float32_is_neg(f);
3538
3539 /*
3540 * frsqrt7(sNaN) = canonical NaN
3541 * frsqrt7(-inf) = canonical NaN
3542 * frsqrt7(-normal) = canonical NaN
3543 * frsqrt7(-subnormal) = canonical NaN
3544 */
3545 if (float32_is_signaling_nan(f, s) ||
3546 (float32_is_infinity(f) && sign) ||
3547 (float32_is_normal(f) && sign) ||
3548 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3549 s->float_exception_flags |= float_flag_invalid;
3550 return float32_default_nan(s);
3551 }
3552
3553 /* frsqrt7(qNaN) = canonical NaN */
3554 if (float32_is_quiet_nan(f, s)) {
3555 return float32_default_nan(s);
3556 }
3557
3558 /* frsqrt7(+-0) = +-inf */
3559 if (float32_is_zero(f)) {
3560 s->float_exception_flags |= float_flag_divbyzero;
3561 return float32_set_sign(float32_infinity, sign);
3562 }
3563
3564 /* frsqrt7(+inf) = +0 */
3565 if (float32_is_infinity(f) && !sign) {
3566 return float32_set_sign(float32_zero, sign);
3567 }
3568
3569 /* +normal, +subnormal */
3570 uint64_t val = frsqrt7(f, exp_size, frac_size);
3571 return make_float32(val);
3572 }
3573
3574 static float64 frsqrt7_d(float64 f, float_status *s)
3575 {
3576 int exp_size = 11, frac_size = 52;
3577 bool sign = float64_is_neg(f);
3578
3579 /*
3580 * frsqrt7(sNaN) = canonical NaN
3581 * frsqrt7(-inf) = canonical NaN
3582 * frsqrt7(-normal) = canonical NaN
3583 * frsqrt7(-subnormal) = canonical NaN
3584 */
3585 if (float64_is_signaling_nan(f, s) ||
3586 (float64_is_infinity(f) && sign) ||
3587 (float64_is_normal(f) && sign) ||
3588 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3589 s->float_exception_flags |= float_flag_invalid;
3590 return float64_default_nan(s);
3591 }
3592
3593 /* frsqrt7(qNaN) = canonical NaN */
3594 if (float64_is_quiet_nan(f, s)) {
3595 return float64_default_nan(s);
3596 }
3597
3598 /* frsqrt7(+-0) = +-inf */
3599 if (float64_is_zero(f)) {
3600 s->float_exception_flags |= float_flag_divbyzero;
3601 return float64_set_sign(float64_infinity, sign);
3602 }
3603
3604 /* frsqrt7(+inf) = +0 */
3605 if (float64_is_infinity(f) && !sign) {
3606 return float64_set_sign(float64_zero, sign);
3607 }
3608
3609 /* +normal, +subnormal */
3610 uint64_t val = frsqrt7(f, exp_size, frac_size);
3611 return make_float64(val);
3612 }
3613
3614 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3615 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3616 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3617 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3618 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3619 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3620
3621 /*
3622 * Vector Floating-Point Reciprocal Estimate Instruction
3623 *
3624 * Adapted from riscv-v-spec recip.c:
3625 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3626 */
3627 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3628 float_status *s)
3629 {
3630 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3631 uint64_t exp = extract64(f, frac_size, exp_size);
3632 uint64_t frac = extract64(f, 0, frac_size);
3633
3634 const uint8_t lookup_table[] = {
3635 127, 125, 123, 121, 119, 117, 116, 114,
3636 112, 110, 109, 107, 105, 104, 102, 100,
3637 99, 97, 96, 94, 93, 91, 90, 88,
3638 87, 85, 84, 83, 81, 80, 79, 77,
3639 76, 75, 74, 72, 71, 70, 69, 68,
3640 66, 65, 64, 63, 62, 61, 60, 59,
3641 58, 57, 56, 55, 54, 53, 52, 51,
3642 50, 49, 48, 47, 46, 45, 44, 43,
3643 42, 41, 40, 40, 39, 38, 37, 36,
3644 35, 35, 34, 33, 32, 31, 31, 30,
3645 29, 28, 28, 27, 26, 25, 25, 24,
3646 23, 23, 22, 21, 21, 20, 19, 19,
3647 18, 17, 17, 16, 15, 15, 14, 14,
3648 13, 12, 12, 11, 11, 10, 9, 9,
3649 8, 8, 7, 7, 6, 5, 5, 4,
3650 4, 3, 3, 2, 2, 1, 1, 0
3651 };
3652 const int precision = 7;
3653
3654 if (exp == 0 && frac != 0) { /* subnormal */
3655 /* Normalize the subnormal. */
3656 while (extract64(frac, frac_size - 1, 1) == 0) {
3657 exp--;
3658 frac <<= 1;
3659 }
3660
3661 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3662
3663 if (exp != 0 && exp != UINT64_MAX) {
3664 /*
3665 * Overflow to inf or max value of same sign,
3666 * depending on sign and rounding mode.
3667 */
3668 s->float_exception_flags |= (float_flag_inexact |
3669 float_flag_overflow);
3670
3671 if ((s->float_rounding_mode == float_round_to_zero) ||
3672 ((s->float_rounding_mode == float_round_down) && !sign) ||
3673 ((s->float_rounding_mode == float_round_up) && sign)) {
3674 /* Return greatest/negative finite value. */
3675 return (sign << (exp_size + frac_size)) |
3676 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3677 } else {
3678 /* Return +-inf. */
3679 return (sign << (exp_size + frac_size)) |
3680 MAKE_64BIT_MASK(frac_size, exp_size);
3681 }
3682 }
3683 }
3684
3685 int idx = frac >> (frac_size - precision);
3686 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3687 (frac_size - precision);
3688 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3689
3690 if (out_exp == 0 || out_exp == UINT64_MAX) {
3691 /*
3692 * The result is subnormal, but don't raise the underflow exception,
3693 * because there's no additional loss of precision.
3694 */
3695 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3696 if (out_exp == UINT64_MAX) {
3697 out_frac >>= 1;
3698 out_exp = 0;
3699 }
3700 }
3701
3702 uint64_t val = 0;
3703 val = deposit64(val, 0, frac_size, out_frac);
3704 val = deposit64(val, frac_size, exp_size, out_exp);
3705 val = deposit64(val, frac_size + exp_size, 1, sign);
3706 return val;
3707 }
3708
3709 static float16 frec7_h(float16 f, float_status *s)
3710 {
3711 int exp_size = 5, frac_size = 10;
3712 bool sign = float16_is_neg(f);
3713
3714 /* frec7(+-inf) = +-0 */
3715 if (float16_is_infinity(f)) {
3716 return float16_set_sign(float16_zero, sign);
3717 }
3718
3719 /* frec7(+-0) = +-inf */
3720 if (float16_is_zero(f)) {
3721 s->float_exception_flags |= float_flag_divbyzero;
3722 return float16_set_sign(float16_infinity, sign);
3723 }
3724
3725 /* frec7(sNaN) = canonical NaN */
3726 if (float16_is_signaling_nan(f, s)) {
3727 s->float_exception_flags |= float_flag_invalid;
3728 return float16_default_nan(s);
3729 }
3730
3731 /* frec7(qNaN) = canonical NaN */
3732 if (float16_is_quiet_nan(f, s)) {
3733 return float16_default_nan(s);
3734 }
3735
3736 /* +-normal, +-subnormal */
3737 uint64_t val = frec7(f, exp_size, frac_size, s);
3738 return make_float16(val);
3739 }
3740
3741 static float32 frec7_s(float32 f, float_status *s)
3742 {
3743 int exp_size = 8, frac_size = 23;
3744 bool sign = float32_is_neg(f);
3745
3746 /* frec7(+-inf) = +-0 */
3747 if (float32_is_infinity(f)) {
3748 return float32_set_sign(float32_zero, sign);
3749 }
3750
3751 /* frec7(+-0) = +-inf */
3752 if (float32_is_zero(f)) {
3753 s->float_exception_flags |= float_flag_divbyzero;
3754 return float32_set_sign(float32_infinity, sign);
3755 }
3756
3757 /* frec7(sNaN) = canonical NaN */
3758 if (float32_is_signaling_nan(f, s)) {
3759 s->float_exception_flags |= float_flag_invalid;
3760 return float32_default_nan(s);
3761 }
3762
3763 /* frec7(qNaN) = canonical NaN */
3764 if (float32_is_quiet_nan(f, s)) {
3765 return float32_default_nan(s);
3766 }
3767
3768 /* +-normal, +-subnormal */
3769 uint64_t val = frec7(f, exp_size, frac_size, s);
3770 return make_float32(val);
3771 }
3772
3773 static float64 frec7_d(float64 f, float_status *s)
3774 {
3775 int exp_size = 11, frac_size = 52;
3776 bool sign = float64_is_neg(f);
3777
3778 /* frec7(+-inf) = +-0 */
3779 if (float64_is_infinity(f)) {
3780 return float64_set_sign(float64_zero, sign);
3781 }
3782
3783 /* frec7(+-0) = +-inf */
3784 if (float64_is_zero(f)) {
3785 s->float_exception_flags |= float_flag_divbyzero;
3786 return float64_set_sign(float64_infinity, sign);
3787 }
3788
3789 /* frec7(sNaN) = canonical NaN */
3790 if (float64_is_signaling_nan(f, s)) {
3791 s->float_exception_flags |= float_flag_invalid;
3792 return float64_default_nan(s);
3793 }
3794
3795 /* frec7(qNaN) = canonical NaN */
3796 if (float64_is_quiet_nan(f, s)) {
3797 return float64_default_nan(s);
3798 }
3799
3800 /* +-normal, +-subnormal */
3801 uint64_t val = frec7(f, exp_size, frac_size, s);
3802 return make_float64(val);
3803 }
3804
3805 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3806 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3807 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3808 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3809 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3810 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3811
3812 /* Vector Floating-Point MIN/MAX Instructions */
3813 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3814 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3815 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3816 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3817 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3818 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3819 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3820 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3821 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3822 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3823 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3824 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3825
3826 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3827 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3828 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3829 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3830 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3831 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3832 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3833 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3834 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3835 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3836 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3837 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3838
3839 /* Vector Floating-Point Sign-Injection Instructions */
3840 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3841 {
3842 return deposit64(b, 0, 15, a);
3843 }
3844
3845 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3846 {
3847 return deposit64(b, 0, 31, a);
3848 }
3849
3850 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3851 {
3852 return deposit64(b, 0, 63, a);
3853 }
3854
3855 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3856 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3857 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3858 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3859 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3860 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3861 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3862 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3863 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3864 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3865 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3866 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3867
3868 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3869 {
3870 return deposit64(~b, 0, 15, a);
3871 }
3872
3873 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3874 {
3875 return deposit64(~b, 0, 31, a);
3876 }
3877
3878 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3879 {
3880 return deposit64(~b, 0, 63, a);
3881 }
3882
3883 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3884 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3885 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3886 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3887 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3888 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3889 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3890 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3891 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3892 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3893 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3894 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3895
3896 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3897 {
3898 return deposit64(b ^ a, 0, 15, a);
3899 }
3900
3901 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3902 {
3903 return deposit64(b ^ a, 0, 31, a);
3904 }
3905
3906 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3907 {
3908 return deposit64(b ^ a, 0, 63, a);
3909 }
3910
3911 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3912 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3913 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3914 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3915 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3916 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3917 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3918 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3919 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3920 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3921 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3922 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3923
3924 /* Vector Floating-Point Compare Instructions */
3925 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
3926 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
3927 CPURISCVState *env, uint32_t desc) \
3928 { \
3929 uint32_t vm = vext_vm(desc); \
3930 uint32_t vl = env->vl; \
3931 uint32_t i; \
3932 \
3933 for (i = env->vstart; i < vl; i++) { \
3934 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
3935 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
3936 if (!vm && !vext_elem_mask(v0, i)) { \
3937 continue; \
3938 } \
3939 vext_set_elem_mask(vd, i, \
3940 DO_OP(s2, s1, &env->fp_status)); \
3941 } \
3942 env->vstart = 0; \
3943 }
3944
3945 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3946 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3947 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3948
3949 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
3950 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3951 CPURISCVState *env, uint32_t desc) \
3952 { \
3953 uint32_t vm = vext_vm(desc); \
3954 uint32_t vl = env->vl; \
3955 uint32_t i; \
3956 \
3957 for (i = env->vstart; i < vl; i++) { \
3958 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
3959 if (!vm && !vext_elem_mask(v0, i)) { \
3960 continue; \
3961 } \
3962 vext_set_elem_mask(vd, i, \
3963 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
3964 } \
3965 env->vstart = 0; \
3966 }
3967
3968 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3969 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3970 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3971
3972 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3973 {
3974 FloatRelation compare = float16_compare_quiet(a, b, s);
3975 return compare != float_relation_equal;
3976 }
3977
3978 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3979 {
3980 FloatRelation compare = float32_compare_quiet(a, b, s);
3981 return compare != float_relation_equal;
3982 }
3983
3984 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3985 {
3986 FloatRelation compare = float64_compare_quiet(a, b, s);
3987 return compare != float_relation_equal;
3988 }
3989
3990 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3991 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3992 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3993 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3994 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3995 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3996
3997 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3998 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3999 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4000 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4001 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4002 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4003
4004 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4005 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4006 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4007 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4008 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4009 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4010
4011 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4012 {
4013 FloatRelation compare = float16_compare(a, b, s);
4014 return compare == float_relation_greater;
4015 }
4016
4017 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4018 {
4019 FloatRelation compare = float32_compare(a, b, s);
4020 return compare == float_relation_greater;
4021 }
4022
4023 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4024 {
4025 FloatRelation compare = float64_compare(a, b, s);
4026 return compare == float_relation_greater;
4027 }
4028
4029 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4030 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4031 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4032
4033 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4034 {
4035 FloatRelation compare = float16_compare(a, b, s);
4036 return compare == float_relation_greater ||
4037 compare == float_relation_equal;
4038 }
4039
4040 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4041 {
4042 FloatRelation compare = float32_compare(a, b, s);
4043 return compare == float_relation_greater ||
4044 compare == float_relation_equal;
4045 }
4046
4047 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4048 {
4049 FloatRelation compare = float64_compare(a, b, s);
4050 return compare == float_relation_greater ||
4051 compare == float_relation_equal;
4052 }
4053
4054 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4055 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4056 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4057
4058 /* Vector Floating-Point Classify Instruction */
4059 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4060 static void do_##NAME(void *vd, void *vs2, int i) \
4061 { \
4062 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4063 *((TD *)vd + HD(i)) = OP(s2); \
4064 }
4065
4066 #define GEN_VEXT_V(NAME, ESZ, DSZ) \
4067 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4068 CPURISCVState *env, uint32_t desc) \
4069 { \
4070 uint32_t vm = vext_vm(desc); \
4071 uint32_t vl = env->vl; \
4072 uint32_t i; \
4073 \
4074 for (i = env->vstart; i < vl; i++) { \
4075 if (!vm && !vext_elem_mask(v0, i)) { \
4076 continue; \
4077 } \
4078 do_##NAME(vd, vs2, i); \
4079 } \
4080 env->vstart = 0; \
4081 }
4082
4083 target_ulong fclass_h(uint64_t frs1)
4084 {
4085 float16 f = frs1;
4086 bool sign = float16_is_neg(f);
4087
4088 if (float16_is_infinity(f)) {
4089 return sign ? 1 << 0 : 1 << 7;
4090 } else if (float16_is_zero(f)) {
4091 return sign ? 1 << 3 : 1 << 4;
4092 } else if (float16_is_zero_or_denormal(f)) {
4093 return sign ? 1 << 2 : 1 << 5;
4094 } else if (float16_is_any_nan(f)) {
4095 float_status s = { }; /* for snan_bit_is_one */
4096 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4097 } else {
4098 return sign ? 1 << 1 : 1 << 6;
4099 }
4100 }
4101
4102 target_ulong fclass_s(uint64_t frs1)
4103 {
4104 float32 f = frs1;
4105 bool sign = float32_is_neg(f);
4106
4107 if (float32_is_infinity(f)) {
4108 return sign ? 1 << 0 : 1 << 7;
4109 } else if (float32_is_zero(f)) {
4110 return sign ? 1 << 3 : 1 << 4;
4111 } else if (float32_is_zero_or_denormal(f)) {
4112 return sign ? 1 << 2 : 1 << 5;
4113 } else if (float32_is_any_nan(f)) {
4114 float_status s = { }; /* for snan_bit_is_one */
4115 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4116 } else {
4117 return sign ? 1 << 1 : 1 << 6;
4118 }
4119 }
4120
4121 target_ulong fclass_d(uint64_t frs1)
4122 {
4123 float64 f = frs1;
4124 bool sign = float64_is_neg(f);
4125
4126 if (float64_is_infinity(f)) {
4127 return sign ? 1 << 0 : 1 << 7;
4128 } else if (float64_is_zero(f)) {
4129 return sign ? 1 << 3 : 1 << 4;
4130 } else if (float64_is_zero_or_denormal(f)) {
4131 return sign ? 1 << 2 : 1 << 5;
4132 } else if (float64_is_any_nan(f)) {
4133 float_status s = { }; /* for snan_bit_is_one */
4134 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4135 } else {
4136 return sign ? 1 << 1 : 1 << 6;
4137 }
4138 }
4139
4140 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4141 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4142 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4143 GEN_VEXT_V(vfclass_v_h, 2, 2)
4144 GEN_VEXT_V(vfclass_v_w, 4, 4)
4145 GEN_VEXT_V(vfclass_v_d, 8, 8)
4146
4147 /* Vector Floating-Point Merge Instruction */
4148 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4149 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4150 CPURISCVState *env, uint32_t desc) \
4151 { \
4152 uint32_t vm = vext_vm(desc); \
4153 uint32_t vl = env->vl; \
4154 uint32_t i; \
4155 \
4156 for (i = env->vstart; i < vl; i++) { \
4157 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4158 *((ETYPE *)vd + H(i)) \
4159 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4160 } \
4161 env->vstart = 0; \
4162 }
4163
4164 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4165 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4166 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4167
4168 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4169 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4170 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4171 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4172 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4173 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4174 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4175 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4176
4177 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4178 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4179 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4180 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4181 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4182 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4183 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4184
4185 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4186 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4187 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4188 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4189 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4190 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4191 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4192
4193 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4194 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4195 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4196 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4197 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4198 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4199 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4200
4201 /* Widening Floating-Point/Integer Type-Convert Instructions */
4202 /* (TD, T2, TX2) */
4203 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4204 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4205 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4206 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4207 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4208 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4209 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4210 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4211
4212 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4213 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4214 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4215 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4216 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4217
4218 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4219 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4220 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4221 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4222 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4223 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4224 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4225
4226 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4227 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4228 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4229 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4230 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4231 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4232 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4233
4234 /*
4235 * vfwcvt.f.f.v vd, vs2, vm
4236 * Convert single-width float to double-width float.
4237 */
4238 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4239 {
4240 return float16_to_float32(a, true, s);
4241 }
4242
4243 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4244 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4245 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4246 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4247
4248 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4249 /* (TD, T2, TX2) */
4250 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4251 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4252 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4253 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4254 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4255 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4256 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4257 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4258 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4259 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4260
4261 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4262 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4263 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4264 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4265 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4266 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4267 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4268
4269 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4270 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4271 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4272 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4273 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4274
4275 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4276 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4277 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4278 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4279 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4280
4281 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4282 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4283 {
4284 return float32_to_float16(a, true, s);
4285 }
4286
4287 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4288 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4289 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4290 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4291
4292 /*
4293 *** Vector Reduction Operations
4294 */
4295 /* Vector Single-Width Integer Reduction Instructions */
4296 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4297 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4298 void *vs2, CPURISCVState *env, uint32_t desc) \
4299 { \
4300 uint32_t vm = vext_vm(desc); \
4301 uint32_t vl = env->vl; \
4302 uint32_t i; \
4303 TD s1 = *((TD *)vs1 + HD(0)); \
4304 \
4305 for (i = env->vstart; i < vl; i++) { \
4306 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4307 if (!vm && !vext_elem_mask(v0, i)) { \
4308 continue; \
4309 } \
4310 s1 = OP(s1, (TD)s2); \
4311 } \
4312 *((TD *)vd + HD(0)) = s1; \
4313 env->vstart = 0; \
4314 }
4315
4316 /* vd[0] = sum(vs1[0], vs2[*]) */
4317 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4318 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4319 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4320 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4321
4322 /* vd[0] = maxu(vs1[0], vs2[*]) */
4323 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4324 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4325 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4326 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4327
4328 /* vd[0] = max(vs1[0], vs2[*]) */
4329 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4330 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4331 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4332 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4333
4334 /* vd[0] = minu(vs1[0], vs2[*]) */
4335 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4336 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4337 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4338 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4339
4340 /* vd[0] = min(vs1[0], vs2[*]) */
4341 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4342 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4343 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4344 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4345
4346 /* vd[0] = and(vs1[0], vs2[*]) */
4347 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4348 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4349 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4350 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4351
4352 /* vd[0] = or(vs1[0], vs2[*]) */
4353 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4354 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4355 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4356 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4357
4358 /* vd[0] = xor(vs1[0], vs2[*]) */
4359 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4360 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4361 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4362 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4363
4364 /* Vector Widening Integer Reduction Instructions */
4365 /* signed sum reduction into double-width accumulator */
4366 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4367 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4368 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4369
4370 /* Unsigned sum reduction into double-width accumulator */
4371 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4372 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4373 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4374
4375 /* Vector Single-Width Floating-Point Reduction Instructions */
4376 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4377 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4378 void *vs2, CPURISCVState *env, \
4379 uint32_t desc) \
4380 { \
4381 uint32_t vm = vext_vm(desc); \
4382 uint32_t vl = env->vl; \
4383 uint32_t i; \
4384 TD s1 = *((TD *)vs1 + HD(0)); \
4385 \
4386 for (i = env->vstart; i < vl; i++) { \
4387 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4388 if (!vm && !vext_elem_mask(v0, i)) { \
4389 continue; \
4390 } \
4391 s1 = OP(s1, (TD)s2, &env->fp_status); \
4392 } \
4393 *((TD *)vd + HD(0)) = s1; \
4394 env->vstart = 0; \
4395 }
4396
4397 /* Unordered sum */
4398 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4399 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4400 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4401
4402 /* Maximum value */
4403 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4404 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4405 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4406
4407 /* Minimum value */
4408 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4409 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4410 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4411
4412 /* Vector Widening Floating-Point Reduction Instructions */
4413 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4414 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4415 void *vs2, CPURISCVState *env, uint32_t desc)
4416 {
4417 uint32_t vm = vext_vm(desc);
4418 uint32_t vl = env->vl;
4419 uint32_t i;
4420 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4421
4422 for (i = env->vstart; i < vl; i++) {
4423 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4424 if (!vm && !vext_elem_mask(v0, i)) {
4425 continue;
4426 }
4427 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4428 &env->fp_status);
4429 }
4430 *((uint32_t *)vd + H4(0)) = s1;
4431 env->vstart = 0;
4432 }
4433
4434 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4435 void *vs2, CPURISCVState *env, uint32_t desc)
4436 {
4437 uint32_t vm = vext_vm(desc);
4438 uint32_t vl = env->vl;
4439 uint32_t i;
4440 uint64_t s1 = *((uint64_t *)vs1);
4441
4442 for (i = env->vstart; i < vl; i++) {
4443 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4444 if (!vm && !vext_elem_mask(v0, i)) {
4445 continue;
4446 }
4447 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4448 &env->fp_status);
4449 }
4450 *((uint64_t *)vd) = s1;
4451 env->vstart = 0;
4452 }
4453
4454 /*
4455 *** Vector Mask Operations
4456 */
4457 /* Vector Mask-Register Logical Instructions */
4458 #define GEN_VEXT_MASK_VV(NAME, OP) \
4459 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4460 void *vs2, CPURISCVState *env, \
4461 uint32_t desc) \
4462 { \
4463 uint32_t vl = env->vl; \
4464 uint32_t i; \
4465 int a, b; \
4466 \
4467 for (i = env->vstart; i < vl; i++) { \
4468 a = vext_elem_mask(vs1, i); \
4469 b = vext_elem_mask(vs2, i); \
4470 vext_set_elem_mask(vd, i, OP(b, a)); \
4471 } \
4472 env->vstart = 0; \
4473 }
4474
4475 #define DO_NAND(N, M) (!(N & M))
4476 #define DO_ANDNOT(N, M) (N & !M)
4477 #define DO_NOR(N, M) (!(N | M))
4478 #define DO_ORNOT(N, M) (N | !M)
4479 #define DO_XNOR(N, M) (!(N ^ M))
4480
4481 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4482 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4483 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4484 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4485 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4486 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4487 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4488 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4489
4490 /* Vector count population in mask vcpop */
4491 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4492 uint32_t desc)
4493 {
4494 target_ulong cnt = 0;
4495 uint32_t vm = vext_vm(desc);
4496 uint32_t vl = env->vl;
4497 int i;
4498
4499 for (i = env->vstart; i < vl; i++) {
4500 if (vm || vext_elem_mask(v0, i)) {
4501 if (vext_elem_mask(vs2, i)) {
4502 cnt++;
4503 }
4504 }
4505 }
4506 env->vstart = 0;
4507 return cnt;
4508 }
4509
4510 /* vfirst find-first-set mask bit*/
4511 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4512 uint32_t desc)
4513 {
4514 uint32_t vm = vext_vm(desc);
4515 uint32_t vl = env->vl;
4516 int i;
4517
4518 for (i = env->vstart; i < vl; i++) {
4519 if (vm || vext_elem_mask(v0, i)) {
4520 if (vext_elem_mask(vs2, i)) {
4521 return i;
4522 }
4523 }
4524 }
4525 env->vstart = 0;
4526 return -1LL;
4527 }
4528
4529 enum set_mask_type {
4530 ONLY_FIRST = 1,
4531 INCLUDE_FIRST,
4532 BEFORE_FIRST,
4533 };
4534
4535 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4536 uint32_t desc, enum set_mask_type type)
4537 {
4538 uint32_t vm = vext_vm(desc);
4539 uint32_t vl = env->vl;
4540 int i;
4541 bool first_mask_bit = false;
4542
4543 for (i = env->vstart; i < vl; i++) {
4544 if (!vm && !vext_elem_mask(v0, i)) {
4545 continue;
4546 }
4547 /* write a zero to all following active elements */
4548 if (first_mask_bit) {
4549 vext_set_elem_mask(vd, i, 0);
4550 continue;
4551 }
4552 if (vext_elem_mask(vs2, i)) {
4553 first_mask_bit = true;
4554 if (type == BEFORE_FIRST) {
4555 vext_set_elem_mask(vd, i, 0);
4556 } else {
4557 vext_set_elem_mask(vd, i, 1);
4558 }
4559 } else {
4560 if (type == ONLY_FIRST) {
4561 vext_set_elem_mask(vd, i, 0);
4562 } else {
4563 vext_set_elem_mask(vd, i, 1);
4564 }
4565 }
4566 }
4567 env->vstart = 0;
4568 }
4569
4570 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4571 uint32_t desc)
4572 {
4573 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4574 }
4575
4576 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4577 uint32_t desc)
4578 {
4579 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4580 }
4581
4582 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4583 uint32_t desc)
4584 {
4585 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4586 }
4587
4588 /* Vector Iota Instruction */
4589 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4590 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4591 uint32_t desc) \
4592 { \
4593 uint32_t vm = vext_vm(desc); \
4594 uint32_t vl = env->vl; \
4595 uint32_t sum = 0; \
4596 int i; \
4597 \
4598 for (i = env->vstart; i < vl; i++) { \
4599 if (!vm && !vext_elem_mask(v0, i)) { \
4600 continue; \
4601 } \
4602 *((ETYPE *)vd + H(i)) = sum; \
4603 if (vext_elem_mask(vs2, i)) { \
4604 sum++; \
4605 } \
4606 } \
4607 env->vstart = 0; \
4608 }
4609
4610 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4611 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4612 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4613 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4614
4615 /* Vector Element Index Instruction */
4616 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4617 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4618 { \
4619 uint32_t vm = vext_vm(desc); \
4620 uint32_t vl = env->vl; \
4621 int i; \
4622 \
4623 for (i = env->vstart; i < vl; i++) { \
4624 if (!vm && !vext_elem_mask(v0, i)) { \
4625 continue; \
4626 } \
4627 *((ETYPE *)vd + H(i)) = i; \
4628 } \
4629 env->vstart = 0; \
4630 }
4631
4632 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4633 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4634 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4635 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4636
4637 /*
4638 *** Vector Permutation Instructions
4639 */
4640
4641 /* Vector Slide Instructions */
4642 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
4643 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4644 CPURISCVState *env, uint32_t desc) \
4645 { \
4646 uint32_t vm = vext_vm(desc); \
4647 uint32_t vl = env->vl; \
4648 target_ulong offset = s1, i_min, i; \
4649 \
4650 i_min = MAX(env->vstart, offset); \
4651 for (i = i_min; i < vl; i++) { \
4652 if (!vm && !vext_elem_mask(v0, i)) { \
4653 continue; \
4654 } \
4655 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4656 } \
4657 }
4658
4659 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4660 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4661 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4662 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4663 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4664
4665 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
4666 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4667 CPURISCVState *env, uint32_t desc) \
4668 { \
4669 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4670 uint32_t vm = vext_vm(desc); \
4671 uint32_t vl = env->vl; \
4672 target_ulong i_max, i; \
4673 \
4674 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4675 for (i = env->vstart; i < i_max; ++i) { \
4676 if (vm || vext_elem_mask(v0, i)) { \
4677 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4678 } \
4679 } \
4680 \
4681 for (i = i_max; i < vl; ++i) { \
4682 if (vm || vext_elem_mask(v0, i)) { \
4683 *((ETYPE *)vd + H(i)) = 0; \
4684 } \
4685 } \
4686 \
4687 env->vstart = 0; \
4688 }
4689
4690 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4691 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4692 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4693 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4694 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4695
4696 #define GEN_VEXT_VSLIE1UP(ESZ, H) \
4697 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4698 CPURISCVState *env, uint32_t desc) \
4699 { \
4700 typedef uint##ESZ##_t ETYPE; \
4701 uint32_t vm = vext_vm(desc); \
4702 uint32_t vl = env->vl; \
4703 uint32_t i; \
4704 \
4705 for (i = env->vstart; i < vl; i++) { \
4706 if (!vm && !vext_elem_mask(v0, i)) { \
4707 continue; \
4708 } \
4709 if (i == 0) { \
4710 *((ETYPE *)vd + H(i)) = s1; \
4711 } else { \
4712 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4713 } \
4714 } \
4715 env->vstart = 0; \
4716 }
4717
4718 GEN_VEXT_VSLIE1UP(8, H1)
4719 GEN_VEXT_VSLIE1UP(16, H2)
4720 GEN_VEXT_VSLIE1UP(32, H4)
4721 GEN_VEXT_VSLIE1UP(64, H8)
4722
4723 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \
4724 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4725 CPURISCVState *env, uint32_t desc) \
4726 { \
4727 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
4728 }
4729
4730 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4731 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4732 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4733 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4734 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4735
4736 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \
4737 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4738 CPURISCVState *env, uint32_t desc) \
4739 { \
4740 typedef uint##ESZ##_t ETYPE; \
4741 uint32_t vm = vext_vm(desc); \
4742 uint32_t vl = env->vl; \
4743 uint32_t i; \
4744 \
4745 for (i = env->vstart; i < vl; i++) { \
4746 if (!vm && !vext_elem_mask(v0, i)) { \
4747 continue; \
4748 } \
4749 if (i == vl - 1) { \
4750 *((ETYPE *)vd + H(i)) = s1; \
4751 } else { \
4752 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4753 } \
4754 } \
4755 env->vstart = 0; \
4756 }
4757
4758 GEN_VEXT_VSLIDE1DOWN(8, H1)
4759 GEN_VEXT_VSLIDE1DOWN(16, H2)
4760 GEN_VEXT_VSLIDE1DOWN(32, H4)
4761 GEN_VEXT_VSLIDE1DOWN(64, H8)
4762
4763 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \
4764 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4765 CPURISCVState *env, uint32_t desc) \
4766 { \
4767 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
4768 }
4769
4770 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4771 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4772 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4773 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4774 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4775
4776 /* Vector Floating-Point Slide Instructions */
4777 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \
4778 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4779 CPURISCVState *env, uint32_t desc) \
4780 { \
4781 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
4782 }
4783
4784 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4785 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4786 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4787 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4788
4789 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \
4790 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4791 CPURISCVState *env, uint32_t desc) \
4792 { \
4793 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
4794 }
4795
4796 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4797 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4798 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4799 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4800
4801 /* Vector Register Gather Instruction */
4802 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
4803 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4804 CPURISCVState *env, uint32_t desc) \
4805 { \
4806 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
4807 uint32_t vm = vext_vm(desc); \
4808 uint32_t vl = env->vl; \
4809 uint64_t index; \
4810 uint32_t i; \
4811 \
4812 for (i = env->vstart; i < vl; i++) { \
4813 if (!vm && !vext_elem_mask(v0, i)) { \
4814 continue; \
4815 } \
4816 index = *((TS1 *)vs1 + HS1(i)); \
4817 if (index >= vlmax) { \
4818 *((TS2 *)vd + HS2(i)) = 0; \
4819 } else { \
4820 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
4821 } \
4822 } \
4823 env->vstart = 0; \
4824 }
4825
4826 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4827 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4828 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4829 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4830 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4831
4832 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
4833 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4834 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4835 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4836
4837 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
4838 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4839 CPURISCVState *env, uint32_t desc) \
4840 { \
4841 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4842 uint32_t vm = vext_vm(desc); \
4843 uint32_t vl = env->vl; \
4844 uint64_t index = s1; \
4845 uint32_t i; \
4846 \
4847 for (i = env->vstart; i < vl; i++) { \
4848 if (!vm && !vext_elem_mask(v0, i)) { \
4849 continue; \
4850 } \
4851 if (index >= vlmax) { \
4852 *((ETYPE *)vd + H(i)) = 0; \
4853 } else { \
4854 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
4855 } \
4856 } \
4857 env->vstart = 0; \
4858 }
4859
4860 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4861 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
4862 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4863 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4864 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4865
4866 /* Vector Compress Instruction */
4867 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
4868 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4869 CPURISCVState *env, uint32_t desc) \
4870 { \
4871 uint32_t vl = env->vl; \
4872 uint32_t num = 0, i; \
4873 \
4874 for (i = env->vstart; i < vl; i++) { \
4875 if (!vext_elem_mask(vs1, i)) { \
4876 continue; \
4877 } \
4878 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
4879 num++; \
4880 } \
4881 env->vstart = 0; \
4882 }
4883
4884 /* Compress into vd elements of vs2 where vs1 is enabled */
4885 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
4886 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4887 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4888 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4889
4890 /* Vector Whole Register Move */
4891 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
4892 {
4893 /* EEW = SEW */
4894 uint32_t maxsz = simd_maxsz(desc);
4895 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
4896 uint32_t startb = env->vstart * sewb;
4897 uint32_t i = startb;
4898
4899 memcpy((uint8_t *)vd + H1(i),
4900 (uint8_t *)vs2 + H1(i),
4901 maxsz - startb);
4902
4903 env->vstart = 0;
4904 }
4905
4906 /* Vector Integer Extension */
4907 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
4908 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4909 CPURISCVState *env, uint32_t desc) \
4910 { \
4911 uint32_t vl = env->vl; \
4912 uint32_t vm = vext_vm(desc); \
4913 uint32_t i; \
4914 \
4915 for (i = env->vstart; i < vl; i++) { \
4916 if (!vm && !vext_elem_mask(v0, i)) { \
4917 continue; \
4918 } \
4919 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
4920 } \
4921 env->vstart = 0; \
4922 }
4923
4924 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
4925 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4926 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4927 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
4928 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4929 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
4930
4931 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
4932 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4933 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4934 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
4935 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4936 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)