1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * LoongArch vector translate functions
4 * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
7 #ifndef CONFIG_USER_ONLY
9 static bool check_vec(DisasContext *ctx, uint32_t oprsz)
11 if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
12 generate_exception(ctx, EXCCODE_SXD);
16 if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
17 generate_exception(ctx, EXCCODE_ASXD);
26 static bool check_vec(DisasContext *ctx, uint32_t oprsz)
33 static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
34 gen_helper_gvec_4_ptr *fn)
36 tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
37 vec_full_offset(a->vj),
38 vec_full_offset(a->vk),
39 vec_full_offset(a->va),
41 oprsz, ctx->vl / 8, 0, fn);
45 static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
46 gen_helper_gvec_4_ptr *fn)
48 if (!check_vec(ctx, 16)) {
52 return gen_vvvv_ptr_vl(ctx, a, 16, fn);
55 static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
56 gen_helper_gvec_4 *fn)
58 tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
59 vec_full_offset(a->vj),
60 vec_full_offset(a->vk),
61 vec_full_offset(a->va),
62 oprsz, ctx->vl / 8, 0, fn);
66 static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
67 gen_helper_gvec_4 *fn)
69 if (!check_vec(ctx, 16)) {
73 return gen_vvvv_vl(ctx, a, 16, fn);
76 static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
77 gen_helper_gvec_3_ptr *fn)
79 tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
80 vec_full_offset(a->vj),
81 vec_full_offset(a->vk),
83 oprsz, ctx->vl / 8, 0, fn);
87 static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
88 gen_helper_gvec_3_ptr *fn)
90 if (!check_vec(ctx, 16)) {
94 return gen_vvv_ptr_vl(ctx, a, 16, fn);
97 static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
98 gen_helper_gvec_3 *fn)
100 if (!check_vec(ctx, oprsz)) {
104 tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
105 vec_full_offset(a->vj),
106 vec_full_offset(a->vk),
107 oprsz, ctx->vl / 8, 0, fn);
111 static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
113 return gen_vvv_vl(ctx, a, 16, fn);
116 static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
118 return gen_vvv_vl(ctx, a, 32, fn);
121 static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
122 gen_helper_gvec_2_ptr *fn)
124 tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
125 vec_full_offset(a->vj),
127 oprsz, ctx->vl / 8, 0, fn);
131 static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
132 gen_helper_gvec_2_ptr *fn)
134 if (!check_vec(ctx, 16)) {
138 return gen_vv_ptr_vl(ctx, a, 16, fn);
141 static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
142 gen_helper_gvec_2 *fn)
144 tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
145 vec_full_offset(a->vj),
146 oprsz, ctx->vl / 8, 0, fn);
150 static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
152 if (!check_vec(ctx, 16)) {
156 return gen_vv_vl(ctx, a, 16, fn);
159 static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
160 gen_helper_gvec_2i *fn)
162 tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
163 vec_full_offset(a->vj),
164 tcg_constant_i64(a->imm),
165 oprsz, ctx->vl / 8, 0, fn);
169 static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
171 if (!check_vec(ctx, 16)) {
175 return gen_vv_i_vl(ctx, a, 16, fn);
178 static bool gen_cv(DisasContext *ctx, arg_cv *a,
179 void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
181 TCGv_i32 vj = tcg_constant_i32(a->vj);
182 TCGv_i32 cd = tcg_constant_i32(a->cd);
184 if (!check_vec(ctx, 16)) {
188 func(cpu_env, cd, vj);
192 static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
193 uint32_t oprsz, MemOp mop,
194 void (*func)(unsigned, uint32_t, uint32_t,
195 uint32_t, uint32_t, uint32_t))
197 uint32_t vd_ofs = vec_full_offset(a->vd);
198 uint32_t vj_ofs = vec_full_offset(a->vj);
199 uint32_t vk_ofs = vec_full_offset(a->vk);
201 if (!check_vec(ctx, oprsz)) {
205 func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
209 static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
210 void (*func)(unsigned, uint32_t, uint32_t,
211 uint32_t, uint32_t, uint32_t))
213 return gvec_vvv_vl(ctx, a, 16, mop, func);
216 static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
217 void (*func)(unsigned, uint32_t, uint32_t,
218 uint32_t, uint32_t, uint32_t))
220 return gvec_vvv_vl(ctx, a, 32, mop, func);
223 static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
224 uint32_t oprsz, MemOp mop,
225 void (*func)(unsigned, uint32_t, uint32_t,
228 uint32_t vd_ofs = vec_full_offset(a->vd);
229 uint32_t vj_ofs = vec_full_offset(a->vj);
231 if (!check_vec(ctx, oprsz)) {
235 func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
240 static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
241 void (*func)(unsigned, uint32_t, uint32_t,
244 return gvec_vv_vl(ctx, a, 16, mop, func);
247 static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
248 void (*func)(unsigned, uint32_t, uint32_t,
251 return gvec_vv_vl(ctx, a, 32, mop, func);
254 static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
255 uint32_t oprsz, MemOp mop,
256 void (*func)(unsigned, uint32_t, uint32_t,
257 int64_t, uint32_t, uint32_t))
259 uint32_t vd_ofs = vec_full_offset(a->vd);
260 uint32_t vj_ofs = vec_full_offset(a->vj);
262 if (!check_vec(ctx, oprsz)) {
266 func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
270 static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
271 void (*func)(unsigned, uint32_t, uint32_t,
272 int64_t, uint32_t, uint32_t))
274 return gvec_vv_i_vl(ctx, a, 16, mop, func);
277 static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
278 void (*func)(unsigned, uint32_t, uint32_t,
279 int64_t, uint32_t, uint32_t))
281 return gvec_vv_i_vl(ctx,a, 32, mop, func);
284 static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
285 uint32_t oprsz, MemOp mop)
287 uint32_t vd_ofs = vec_full_offset(a->vd);
288 uint32_t vj_ofs = vec_full_offset(a->vj);
290 if (!check_vec(ctx, oprsz)) {
294 tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
298 static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
300 return gvec_subi_vl(ctx, a, 16, mop);
303 static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
305 return gvec_subi_vl(ctx, a, 32, mop);
308 TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
309 TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
310 TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
311 TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
312 TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
313 TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
314 TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
315 TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
317 static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
318 void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
319 TCGv_i64, TCGv_i64, TCGv_i64))
322 TCGv_i64 rh, rl, ah, al, bh, bl;
324 if (!check_vec(ctx, oprsz)) {
328 rh = tcg_temp_new_i64();
329 rl = tcg_temp_new_i64();
330 ah = tcg_temp_new_i64();
331 al = tcg_temp_new_i64();
332 bh = tcg_temp_new_i64();
333 bl = tcg_temp_new_i64();
335 for (i = 0; i < oprsz / 16; i++) {
336 get_vreg64(ah, a->vj, 1 + i * 2);
337 get_vreg64(al, a->vj, i * 2);
338 get_vreg64(bh, a->vk, 1 + i * 2);
339 get_vreg64(bl, a->vk, i * 2);
341 func(rl, rh, al, ah, bl, bh);
343 set_vreg64(rh, a->vd, 1 + i * 2);
344 set_vreg64(rl, a->vd, i * 2);
349 static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
350 void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
351 TCGv_i64, TCGv_i64, TCGv_i64))
353 return gen_vaddsub_q_vl(ctx, a, 16, func);
356 static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
357 void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
358 TCGv_i64, TCGv_i64, TCGv_i64))
360 return gen_vaddsub_q_vl(ctx, a, 32, func);
363 TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
364 TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
365 TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
366 TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
367 TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
368 TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
369 TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
370 TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
372 TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
373 TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
374 TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
375 TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
377 TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
378 TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
379 TRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
380 TRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
381 TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
382 TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
383 TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
384 TRANS(vsubi_du, LSX, gvec_subi, MO_64)
385 TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
386 TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
387 TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
388 TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
389 TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
390 TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
391 TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
392 TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
394 TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
395 TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
396 TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
397 TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
398 TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
399 TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
400 TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
401 TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
403 TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
404 TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
405 TRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
406 TRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
407 TRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
408 TRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
409 TRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
410 TRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
411 TRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
412 TRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
413 TRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
414 TRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
415 TRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
416 TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
417 TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
418 TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
420 TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
421 TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
422 TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
423 TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
424 TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
425 TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
426 TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
427 TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
428 TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
429 TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
430 TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
431 TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
432 TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
433 TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
434 TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
435 TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
437 TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
438 TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
439 TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
440 TRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d)
441 TRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu)
442 TRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu)
443 TRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu)
444 TRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du)
445 TRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b)
446 TRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h)
447 TRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w)
448 TRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d)
449 TRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu)
450 TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
451 TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
452 TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
454 TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
455 TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
456 TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
457 TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
458 TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
459 TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
460 TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
461 TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
462 TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
463 TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
464 TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
465 TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
466 TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
467 TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
468 TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
469 TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
471 static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
475 int halfbits = 4 << vece;
477 t1 = tcg_temp_new_vec_matching(a);
478 t2 = tcg_temp_new_vec_matching(b);
480 /* Sign-extend the even elements from a */
481 tcg_gen_shli_vec(vece, t1, a, halfbits);
482 tcg_gen_sari_vec(vece, t1, t1, halfbits);
484 /* Sign-extend the even elements from b */
485 tcg_gen_shli_vec(vece, t2, b, halfbits);
486 tcg_gen_sari_vec(vece, t2, t2, halfbits);
488 tcg_gen_add_vec(vece, t, t1, t2);
491 static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
495 t1 = tcg_temp_new_i32();
496 t2 = tcg_temp_new_i32();
497 tcg_gen_ext16s_i32(t1, a);
498 tcg_gen_ext16s_i32(t2, b);
499 tcg_gen_add_i32(t, t1, t2);
502 static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
506 t1 = tcg_temp_new_i64();
507 t2 = tcg_temp_new_i64();
508 tcg_gen_ext32s_i64(t1, a);
509 tcg_gen_ext32s_i64(t2, b);
510 tcg_gen_add_i64(t, t1, t2);
513 static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
514 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
516 static const TCGOpcode vecop_list[] = {
517 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
519 static const GVecGen3 op[4] = {
521 .fniv = gen_vaddwev_s,
522 .fno = gen_helper_vaddwev_h_b,
523 .opt_opc = vecop_list,
527 .fni4 = gen_vaddwev_w_h,
528 .fniv = gen_vaddwev_s,
529 .fno = gen_helper_vaddwev_w_h,
530 .opt_opc = vecop_list,
534 .fni8 = gen_vaddwev_d_w,
535 .fniv = gen_vaddwev_s,
536 .fno = gen_helper_vaddwev_d_w,
537 .opt_opc = vecop_list,
541 .fno = gen_helper_vaddwev_q_d,
546 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
549 TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
550 TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
551 TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
552 TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
553 TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
554 TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
555 TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
556 TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
558 static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
562 t1 = tcg_temp_new_i32();
563 t2 = tcg_temp_new_i32();
564 tcg_gen_sari_i32(t1, a, 16);
565 tcg_gen_sari_i32(t2, b, 16);
566 tcg_gen_add_i32(t, t1, t2);
569 static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
573 t1 = tcg_temp_new_i64();
574 t2 = tcg_temp_new_i64();
575 tcg_gen_sari_i64(t1, a, 32);
576 tcg_gen_sari_i64(t2, b, 32);
577 tcg_gen_add_i64(t, t1, t2);
580 static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
584 int halfbits = 4 << vece;
586 t1 = tcg_temp_new_vec_matching(a);
587 t2 = tcg_temp_new_vec_matching(b);
589 /* Sign-extend the odd elements for vector */
590 tcg_gen_sari_vec(vece, t1, a, halfbits);
591 tcg_gen_sari_vec(vece, t2, b, halfbits);
593 tcg_gen_add_vec(vece, t, t1, t2);
596 static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
597 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
599 static const TCGOpcode vecop_list[] = {
600 INDEX_op_sari_vec, INDEX_op_add_vec, 0
602 static const GVecGen3 op[4] = {
604 .fniv = gen_vaddwod_s,
605 .fno = gen_helper_vaddwod_h_b,
606 .opt_opc = vecop_list,
610 .fni4 = gen_vaddwod_w_h,
611 .fniv = gen_vaddwod_s,
612 .fno = gen_helper_vaddwod_w_h,
613 .opt_opc = vecop_list,
617 .fni8 = gen_vaddwod_d_w,
618 .fniv = gen_vaddwod_s,
619 .fno = gen_helper_vaddwod_d_w,
620 .opt_opc = vecop_list,
624 .fno = gen_helper_vaddwod_q_d,
629 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
632 TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
633 TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
634 TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
635 TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
636 TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
637 TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
638 TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
639 TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
642 static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
646 int halfbits = 4 << vece;
648 t1 = tcg_temp_new_vec_matching(a);
649 t2 = tcg_temp_new_vec_matching(b);
651 /* Sign-extend the even elements from a */
652 tcg_gen_shli_vec(vece, t1, a, halfbits);
653 tcg_gen_sari_vec(vece, t1, t1, halfbits);
655 /* Sign-extend the even elements from b */
656 tcg_gen_shli_vec(vece, t2, b, halfbits);
657 tcg_gen_sari_vec(vece, t2, t2, halfbits);
659 tcg_gen_sub_vec(vece, t, t1, t2);
662 static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
666 t1 = tcg_temp_new_i32();
667 t2 = tcg_temp_new_i32();
668 tcg_gen_ext16s_i32(t1, a);
669 tcg_gen_ext16s_i32(t2, b);
670 tcg_gen_sub_i32(t, t1, t2);
673 static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
677 t1 = tcg_temp_new_i64();
678 t2 = tcg_temp_new_i64();
679 tcg_gen_ext32s_i64(t1, a);
680 tcg_gen_ext32s_i64(t2, b);
681 tcg_gen_sub_i64(t, t1, t2);
684 static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
685 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
687 static const TCGOpcode vecop_list[] = {
688 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
690 static const GVecGen3 op[4] = {
692 .fniv = gen_vsubwev_s,
693 .fno = gen_helper_vsubwev_h_b,
694 .opt_opc = vecop_list,
698 .fni4 = gen_vsubwev_w_h,
699 .fniv = gen_vsubwev_s,
700 .fno = gen_helper_vsubwev_w_h,
701 .opt_opc = vecop_list,
705 .fni8 = gen_vsubwev_d_w,
706 .fniv = gen_vsubwev_s,
707 .fno = gen_helper_vsubwev_d_w,
708 .opt_opc = vecop_list,
712 .fno = gen_helper_vsubwev_q_d,
717 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
720 TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
721 TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
722 TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
723 TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
724 TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
725 TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
726 TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
727 TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
729 static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
733 int halfbits = 4 << vece;
735 t1 = tcg_temp_new_vec_matching(a);
736 t2 = tcg_temp_new_vec_matching(b);
738 /* Sign-extend the odd elements for vector */
739 tcg_gen_sari_vec(vece, t1, a, halfbits);
740 tcg_gen_sari_vec(vece, t2, b, halfbits);
742 tcg_gen_sub_vec(vece, t, t1, t2);
745 static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
749 t1 = tcg_temp_new_i32();
750 t2 = tcg_temp_new_i32();
751 tcg_gen_sari_i32(t1, a, 16);
752 tcg_gen_sari_i32(t2, b, 16);
753 tcg_gen_sub_i32(t, t1, t2);
756 static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
760 t1 = tcg_temp_new_i64();
761 t2 = tcg_temp_new_i64();
762 tcg_gen_sari_i64(t1, a, 32);
763 tcg_gen_sari_i64(t2, b, 32);
764 tcg_gen_sub_i64(t, t1, t2);
767 static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
768 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
770 static const TCGOpcode vecop_list[] = {
771 INDEX_op_sari_vec, INDEX_op_sub_vec, 0
773 static const GVecGen3 op[4] = {
775 .fniv = gen_vsubwod_s,
776 .fno = gen_helper_vsubwod_h_b,
777 .opt_opc = vecop_list,
781 .fni4 = gen_vsubwod_w_h,
782 .fniv = gen_vsubwod_s,
783 .fno = gen_helper_vsubwod_w_h,
784 .opt_opc = vecop_list,
788 .fni8 = gen_vsubwod_d_w,
789 .fniv = gen_vsubwod_s,
790 .fno = gen_helper_vsubwod_d_w,
791 .opt_opc = vecop_list,
795 .fno = gen_helper_vsubwod_q_d,
800 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
803 TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
804 TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
805 TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
806 TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
807 TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
808 TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
809 TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
810 TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
812 static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
816 t1 = tcg_temp_new_vec_matching(a);
817 t2 = tcg_temp_new_vec_matching(b);
818 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
819 tcg_gen_and_vec(vece, t1, a, t3);
820 tcg_gen_and_vec(vece, t2, b, t3);
821 tcg_gen_add_vec(vece, t, t1, t2);
824 static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
828 t1 = tcg_temp_new_i32();
829 t2 = tcg_temp_new_i32();
830 tcg_gen_ext16u_i32(t1, a);
831 tcg_gen_ext16u_i32(t2, b);
832 tcg_gen_add_i32(t, t1, t2);
835 static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
839 t1 = tcg_temp_new_i64();
840 t2 = tcg_temp_new_i64();
841 tcg_gen_ext32u_i64(t1, a);
842 tcg_gen_ext32u_i64(t2, b);
843 tcg_gen_add_i64(t, t1, t2);
846 static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
847 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
849 static const TCGOpcode vecop_list[] = {
852 static const GVecGen3 op[4] = {
854 .fniv = gen_vaddwev_u,
855 .fno = gen_helper_vaddwev_h_bu,
856 .opt_opc = vecop_list,
860 .fni4 = gen_vaddwev_w_hu,
861 .fniv = gen_vaddwev_u,
862 .fno = gen_helper_vaddwev_w_hu,
863 .opt_opc = vecop_list,
867 .fni8 = gen_vaddwev_d_wu,
868 .fniv = gen_vaddwev_u,
869 .fno = gen_helper_vaddwev_d_wu,
870 .opt_opc = vecop_list,
874 .fno = gen_helper_vaddwev_q_du,
879 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
882 TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
883 TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
884 TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
885 TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
886 TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
887 TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
888 TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
889 TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
891 static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
895 int halfbits = 4 << vece;
897 t1 = tcg_temp_new_vec_matching(a);
898 t2 = tcg_temp_new_vec_matching(b);
900 /* Zero-extend the odd elements for vector */
901 tcg_gen_shri_vec(vece, t1, a, halfbits);
902 tcg_gen_shri_vec(vece, t2, b, halfbits);
904 tcg_gen_add_vec(vece, t, t1, t2);
907 static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
911 t1 = tcg_temp_new_i32();
912 t2 = tcg_temp_new_i32();
913 tcg_gen_shri_i32(t1, a, 16);
914 tcg_gen_shri_i32(t2, b, 16);
915 tcg_gen_add_i32(t, t1, t2);
918 static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
922 t1 = tcg_temp_new_i64();
923 t2 = tcg_temp_new_i64();
924 tcg_gen_shri_i64(t1, a, 32);
925 tcg_gen_shri_i64(t2, b, 32);
926 tcg_gen_add_i64(t, t1, t2);
929 static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
930 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
932 static const TCGOpcode vecop_list[] = {
933 INDEX_op_shri_vec, INDEX_op_add_vec, 0
935 static const GVecGen3 op[4] = {
937 .fniv = gen_vaddwod_u,
938 .fno = gen_helper_vaddwod_h_bu,
939 .opt_opc = vecop_list,
943 .fni4 = gen_vaddwod_w_hu,
944 .fniv = gen_vaddwod_u,
945 .fno = gen_helper_vaddwod_w_hu,
946 .opt_opc = vecop_list,
950 .fni8 = gen_vaddwod_d_wu,
951 .fniv = gen_vaddwod_u,
952 .fno = gen_helper_vaddwod_d_wu,
953 .opt_opc = vecop_list,
957 .fno = gen_helper_vaddwod_q_du,
962 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
965 TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
966 TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
967 TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
968 TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
969 TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
970 TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
971 TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
972 TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
974 static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
978 t1 = tcg_temp_new_vec_matching(a);
979 t2 = tcg_temp_new_vec_matching(b);
980 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
981 tcg_gen_and_vec(vece, t1, a, t3);
982 tcg_gen_and_vec(vece, t2, b, t3);
983 tcg_gen_sub_vec(vece, t, t1, t2);
986 static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
990 t1 = tcg_temp_new_i32();
991 t2 = tcg_temp_new_i32();
992 tcg_gen_ext16u_i32(t1, a);
993 tcg_gen_ext16u_i32(t2, b);
994 tcg_gen_sub_i32(t, t1, t2);
997 static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1001 t1 = tcg_temp_new_i64();
1002 t2 = tcg_temp_new_i64();
1003 tcg_gen_ext32u_i64(t1, a);
1004 tcg_gen_ext32u_i64(t2, b);
1005 tcg_gen_sub_i64(t, t1, t2);
1008 static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1009 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1011 static const TCGOpcode vecop_list[] = {
1014 static const GVecGen3 op[4] = {
1016 .fniv = gen_vsubwev_u,
1017 .fno = gen_helper_vsubwev_h_bu,
1018 .opt_opc = vecop_list,
1022 .fni4 = gen_vsubwev_w_hu,
1023 .fniv = gen_vsubwev_u,
1024 .fno = gen_helper_vsubwev_w_hu,
1025 .opt_opc = vecop_list,
1029 .fni8 = gen_vsubwev_d_wu,
1030 .fniv = gen_vsubwev_u,
1031 .fno = gen_helper_vsubwev_d_wu,
1032 .opt_opc = vecop_list,
1036 .fno = gen_helper_vsubwev_q_du,
1041 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1044 TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
1045 TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
1046 TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
1047 TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
1048 TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
1049 TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
1050 TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
1051 TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
1053 static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1057 int halfbits = 4 << vece;
1059 t1 = tcg_temp_new_vec_matching(a);
1060 t2 = tcg_temp_new_vec_matching(b);
1062 /* Zero-extend the odd elements for vector */
1063 tcg_gen_shri_vec(vece, t1, a, halfbits);
1064 tcg_gen_shri_vec(vece, t2, b, halfbits);
1066 tcg_gen_sub_vec(vece, t, t1, t2);
1069 static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1073 t1 = tcg_temp_new_i32();
1074 t2 = tcg_temp_new_i32();
1075 tcg_gen_shri_i32(t1, a, 16);
1076 tcg_gen_shri_i32(t2, b, 16);
1077 tcg_gen_sub_i32(t, t1, t2);
1080 static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1084 t1 = tcg_temp_new_i64();
1085 t2 = tcg_temp_new_i64();
1086 tcg_gen_shri_i64(t1, a, 32);
1087 tcg_gen_shri_i64(t2, b, 32);
1088 tcg_gen_sub_i64(t, t1, t2);
1091 static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1092 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1094 static const TCGOpcode vecop_list[] = {
1095 INDEX_op_shri_vec, INDEX_op_sub_vec, 0
1097 static const GVecGen3 op[4] = {
1099 .fniv = gen_vsubwod_u,
1100 .fno = gen_helper_vsubwod_h_bu,
1101 .opt_opc = vecop_list,
1105 .fni4 = gen_vsubwod_w_hu,
1106 .fniv = gen_vsubwod_u,
1107 .fno = gen_helper_vsubwod_w_hu,
1108 .opt_opc = vecop_list,
1112 .fni8 = gen_vsubwod_d_wu,
1113 .fniv = gen_vsubwod_u,
1114 .fno = gen_helper_vsubwod_d_wu,
1115 .opt_opc = vecop_list,
1119 .fno = gen_helper_vsubwod_q_du,
1124 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1127 TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
1128 TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
1129 TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
1130 TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
1131 TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
1132 TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
1133 TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
1134 TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
1136 static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1138 TCGv_vec t1, t2, t3;
1140 int halfbits = 4 << vece;
1142 t1 = tcg_temp_new_vec_matching(a);
1143 t2 = tcg_temp_new_vec_matching(b);
1144 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
1146 /* Zero-extend the even elements from a */
1147 tcg_gen_and_vec(vece, t1, a, t3);
1149 /* Sign-extend the even elements from b */
1150 tcg_gen_shli_vec(vece, t2, b, halfbits);
1151 tcg_gen_sari_vec(vece, t2, t2, halfbits);
1153 tcg_gen_add_vec(vece, t, t1, t2);
1156 static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1160 t1 = tcg_temp_new_i32();
1161 t2 = tcg_temp_new_i32();
1162 tcg_gen_ext16u_i32(t1, a);
1163 tcg_gen_ext16s_i32(t2, b);
1164 tcg_gen_add_i32(t, t1, t2);
1167 static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1171 t1 = tcg_temp_new_i64();
1172 t2 = tcg_temp_new_i64();
1173 tcg_gen_ext32u_i64(t1, a);
1174 tcg_gen_ext32s_i64(t2, b);
1175 tcg_gen_add_i64(t, t1, t2);
1178 static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1179 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1181 static const TCGOpcode vecop_list[] = {
1182 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
1184 static const GVecGen3 op[4] = {
1186 .fniv = gen_vaddwev_u_s,
1187 .fno = gen_helper_vaddwev_h_bu_b,
1188 .opt_opc = vecop_list,
1192 .fni4 = gen_vaddwev_w_hu_h,
1193 .fniv = gen_vaddwev_u_s,
1194 .fno = gen_helper_vaddwev_w_hu_h,
1195 .opt_opc = vecop_list,
1199 .fni8 = gen_vaddwev_d_wu_w,
1200 .fniv = gen_vaddwev_u_s,
1201 .fno = gen_helper_vaddwev_d_wu_w,
1202 .opt_opc = vecop_list,
1206 .fno = gen_helper_vaddwev_q_du_d,
1211 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1214 TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
1215 TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
1216 TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
1217 TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
1218 TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
1219 TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
1220 TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
1221 TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
1223 static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1227 int halfbits = 4 << vece;
1229 t1 = tcg_temp_new_vec_matching(a);
1230 t2 = tcg_temp_new_vec_matching(b);
1232 /* Zero-extend the odd elements from a */
1233 tcg_gen_shri_vec(vece, t1, a, halfbits);
1234 /* Sign-extend the odd elements from b */
1235 tcg_gen_sari_vec(vece, t2, b, halfbits);
1237 tcg_gen_add_vec(vece, t, t1, t2);
1240 static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1244 t1 = tcg_temp_new_i32();
1245 t2 = tcg_temp_new_i32();
1246 tcg_gen_shri_i32(t1, a, 16);
1247 tcg_gen_sari_i32(t2, b, 16);
1248 tcg_gen_add_i32(t, t1, t2);
1251 static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1255 t1 = tcg_temp_new_i64();
1256 t2 = tcg_temp_new_i64();
1257 tcg_gen_shri_i64(t1, a, 32);
1258 tcg_gen_sari_i64(t2, b, 32);
1259 tcg_gen_add_i64(t, t1, t2);
1262 static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1263 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1265 static const TCGOpcode vecop_list[] = {
1266 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
1268 static const GVecGen3 op[4] = {
1270 .fniv = gen_vaddwod_u_s,
1271 .fno = gen_helper_vaddwod_h_bu_b,
1272 .opt_opc = vecop_list,
1276 .fni4 = gen_vaddwod_w_hu_h,
1277 .fniv = gen_vaddwod_u_s,
1278 .fno = gen_helper_vaddwod_w_hu_h,
1279 .opt_opc = vecop_list,
1283 .fni8 = gen_vaddwod_d_wu_w,
1284 .fniv = gen_vaddwod_u_s,
1285 .fno = gen_helper_vaddwod_d_wu_w,
1286 .opt_opc = vecop_list,
1290 .fno = gen_helper_vaddwod_q_du_d,
1295 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1298 TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
1299 TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
1300 TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
1301 TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
1302 TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
1303 TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
1304 TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
1305 TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
1307 static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
1308 void (*gen_shr_vec)(unsigned, TCGv_vec,
1310 void (*gen_round_vec)(unsigned, TCGv_vec,
1311 TCGv_vec, TCGv_vec))
1313 TCGv_vec tmp = tcg_temp_new_vec_matching(t);
1314 gen_round_vec(vece, tmp, a, b);
1315 tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
1316 gen_shr_vec(vece, a, a, 1);
1317 gen_shr_vec(vece, b, b, 1);
1318 tcg_gen_add_vec(vece, t, a, b);
1319 tcg_gen_add_vec(vece, t, t, tmp);
1322 static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1324 do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
1327 static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1329 do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
1332 static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1334 do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
1337 static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1339 do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
1342 static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1343 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1345 static const TCGOpcode vecop_list[] = {
1346 INDEX_op_sari_vec, INDEX_op_add_vec, 0
1348 static const GVecGen3 op[4] = {
1351 .fno = gen_helper_vavg_b,
1352 .opt_opc = vecop_list,
1357 .fno = gen_helper_vavg_h,
1358 .opt_opc = vecop_list,
1363 .fno = gen_helper_vavg_w,
1364 .opt_opc = vecop_list,
1369 .fno = gen_helper_vavg_d,
1370 .opt_opc = vecop_list,
1375 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1378 static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1379 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1381 static const TCGOpcode vecop_list[] = {
1382 INDEX_op_shri_vec, INDEX_op_add_vec, 0
1384 static const GVecGen3 op[4] = {
1387 .fno = gen_helper_vavg_bu,
1388 .opt_opc = vecop_list,
1393 .fno = gen_helper_vavg_hu,
1394 .opt_opc = vecop_list,
1399 .fno = gen_helper_vavg_wu,
1400 .opt_opc = vecop_list,
1405 .fno = gen_helper_vavg_du,
1406 .opt_opc = vecop_list,
1411 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1414 TRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s)
1415 TRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s)
1416 TRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s)
1417 TRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s)
1418 TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
1419 TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
1420 TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
1421 TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
1422 TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
1423 TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
1424 TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
1425 TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
1426 TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
1427 TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
1428 TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
1429 TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
1431 static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1432 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1434 static const TCGOpcode vecop_list[] = {
1435 INDEX_op_sari_vec, INDEX_op_add_vec, 0
1437 static const GVecGen3 op[4] = {
1439 .fniv = gen_vavgr_s,
1440 .fno = gen_helper_vavgr_b,
1441 .opt_opc = vecop_list,
1445 .fniv = gen_vavgr_s,
1446 .fno = gen_helper_vavgr_h,
1447 .opt_opc = vecop_list,
1451 .fniv = gen_vavgr_s,
1452 .fno = gen_helper_vavgr_w,
1453 .opt_opc = vecop_list,
1457 .fniv = gen_vavgr_s,
1458 .fno = gen_helper_vavgr_d,
1459 .opt_opc = vecop_list,
1464 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1467 static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1468 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1470 static const TCGOpcode vecop_list[] = {
1471 INDEX_op_shri_vec, INDEX_op_add_vec, 0
1473 static const GVecGen3 op[4] = {
1475 .fniv = gen_vavgr_u,
1476 .fno = gen_helper_vavgr_bu,
1477 .opt_opc = vecop_list,
1481 .fniv = gen_vavgr_u,
1482 .fno = gen_helper_vavgr_hu,
1483 .opt_opc = vecop_list,
1487 .fniv = gen_vavgr_u,
1488 .fno = gen_helper_vavgr_wu,
1489 .opt_opc = vecop_list,
1493 .fniv = gen_vavgr_u,
1494 .fno = gen_helper_vavgr_du,
1495 .opt_opc = vecop_list,
1500 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1503 TRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s)
1504 TRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s)
1505 TRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s)
1506 TRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s)
1507 TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
1508 TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
1509 TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
1510 TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
1511 TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
1512 TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
1513 TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
1514 TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
1515 TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
1516 TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
1517 TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
1518 TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
1520 static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1522 tcg_gen_smax_vec(vece, t, a, b);
1523 tcg_gen_smin_vec(vece, a, a, b);
1524 tcg_gen_sub_vec(vece, t, t, a);
1527 static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1528 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1530 static const TCGOpcode vecop_list[] = {
1531 INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
1533 static const GVecGen3 op[4] = {
1535 .fniv = gen_vabsd_s,
1536 .fno = gen_helper_vabsd_b,
1537 .opt_opc = vecop_list,
1541 .fniv = gen_vabsd_s,
1542 .fno = gen_helper_vabsd_h,
1543 .opt_opc = vecop_list,
1547 .fniv = gen_vabsd_s,
1548 .fno = gen_helper_vabsd_w,
1549 .opt_opc = vecop_list,
1553 .fniv = gen_vabsd_s,
1554 .fno = gen_helper_vabsd_d,
1555 .opt_opc = vecop_list,
1560 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1563 static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1565 tcg_gen_umax_vec(vece, t, a, b);
1566 tcg_gen_umin_vec(vece, a, a, b);
1567 tcg_gen_sub_vec(vece, t, t, a);
1570 static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1571 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1573 static const TCGOpcode vecop_list[] = {
1574 INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
1576 static const GVecGen3 op[4] = {
1578 .fniv = gen_vabsd_u,
1579 .fno = gen_helper_vabsd_bu,
1580 .opt_opc = vecop_list,
1584 .fniv = gen_vabsd_u,
1585 .fno = gen_helper_vabsd_hu,
1586 .opt_opc = vecop_list,
1590 .fniv = gen_vabsd_u,
1591 .fno = gen_helper_vabsd_wu,
1592 .opt_opc = vecop_list,
1596 .fniv = gen_vabsd_u,
1597 .fno = gen_helper_vabsd_du,
1598 .opt_opc = vecop_list,
1603 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1606 TRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s)
1607 TRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s)
1608 TRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s)
1609 TRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s)
1610 TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
1611 TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
1612 TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
1613 TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
1614 TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
1615 TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
1616 TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
1617 TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
1618 TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
1619 TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
1620 TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
1621 TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
1623 static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1627 t1 = tcg_temp_new_vec_matching(a);
1628 t2 = tcg_temp_new_vec_matching(b);
1630 tcg_gen_abs_vec(vece, t1, a);
1631 tcg_gen_abs_vec(vece, t2, b);
1632 tcg_gen_add_vec(vece, t, t1, t2);
1635 static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1636 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1638 static const TCGOpcode vecop_list[] = {
1639 INDEX_op_abs_vec, INDEX_op_add_vec, 0
1641 static const GVecGen3 op[4] = {
1644 .fno = gen_helper_vadda_b,
1645 .opt_opc = vecop_list,
1650 .fno = gen_helper_vadda_h,
1651 .opt_opc = vecop_list,
1656 .fno = gen_helper_vadda_w,
1657 .opt_opc = vecop_list,
1662 .fno = gen_helper_vadda_d,
1663 .opt_opc = vecop_list,
1668 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1671 TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
1672 TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
1673 TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
1674 TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
1676 TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
1677 TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
1678 TRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax)
1679 TRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax)
1680 TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
1681 TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
1682 TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
1683 TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
1685 TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
1686 TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
1687 TRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin)
1688 TRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin)
1689 TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
1690 TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
1691 TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
1692 TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
1694 static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1696 tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1699 static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1701 tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1704 static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1706 tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1709 static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1711 tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1714 static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1715 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1717 static const TCGOpcode vecop_list[] = {
1718 INDEX_op_smin_vec, 0
1720 static const GVecGen2i op[4] = {
1722 .fniv = gen_vmini_s,
1723 .fnoi = gen_helper_vmini_b,
1724 .opt_opc = vecop_list,
1728 .fniv = gen_vmini_s,
1729 .fnoi = gen_helper_vmini_h,
1730 .opt_opc = vecop_list,
1734 .fniv = gen_vmini_s,
1735 .fnoi = gen_helper_vmini_w,
1736 .opt_opc = vecop_list,
1740 .fniv = gen_vmini_s,
1741 .fnoi = gen_helper_vmini_d,
1742 .opt_opc = vecop_list,
1747 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1750 static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1751 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1753 static const TCGOpcode vecop_list[] = {
1754 INDEX_op_umin_vec, 0
1756 static const GVecGen2i op[4] = {
1758 .fniv = gen_vmini_u,
1759 .fnoi = gen_helper_vmini_bu,
1760 .opt_opc = vecop_list,
1764 .fniv = gen_vmini_u,
1765 .fnoi = gen_helper_vmini_hu,
1766 .opt_opc = vecop_list,
1770 .fniv = gen_vmini_u,
1771 .fnoi = gen_helper_vmini_wu,
1772 .opt_opc = vecop_list,
1776 .fniv = gen_vmini_u,
1777 .fnoi = gen_helper_vmini_du,
1778 .opt_opc = vecop_list,
1783 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1786 TRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s)
1787 TRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s)
1788 TRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s)
1789 TRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s)
1790 TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
1791 TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
1792 TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
1793 TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
1795 static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1796 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1798 static const TCGOpcode vecop_list[] = {
1799 INDEX_op_smax_vec, 0
1801 static const GVecGen2i op[4] = {
1803 .fniv = gen_vmaxi_s,
1804 .fnoi = gen_helper_vmaxi_b,
1805 .opt_opc = vecop_list,
1809 .fniv = gen_vmaxi_s,
1810 .fnoi = gen_helper_vmaxi_h,
1811 .opt_opc = vecop_list,
1815 .fniv = gen_vmaxi_s,
1816 .fnoi = gen_helper_vmaxi_w,
1817 .opt_opc = vecop_list,
1821 .fniv = gen_vmaxi_s,
1822 .fnoi = gen_helper_vmaxi_d,
1823 .opt_opc = vecop_list,
1828 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1831 static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1832 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1834 static const TCGOpcode vecop_list[] = {
1835 INDEX_op_umax_vec, 0
1837 static const GVecGen2i op[4] = {
1839 .fniv = gen_vmaxi_u,
1840 .fnoi = gen_helper_vmaxi_bu,
1841 .opt_opc = vecop_list,
1845 .fniv = gen_vmaxi_u,
1846 .fnoi = gen_helper_vmaxi_hu,
1847 .opt_opc = vecop_list,
1851 .fniv = gen_vmaxi_u,
1852 .fnoi = gen_helper_vmaxi_wu,
1853 .opt_opc = vecop_list,
1857 .fniv = gen_vmaxi_u,
1858 .fnoi = gen_helper_vmaxi_du,
1859 .opt_opc = vecop_list,
1864 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1867 TRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s)
1868 TRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s)
1869 TRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s)
1870 TRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s)
1871 TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
1872 TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
1873 TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
1874 TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
1876 TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
1877 TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
1878 TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
1879 TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
1881 static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1883 TCGv_i32 discard = tcg_temp_new_i32();
1884 tcg_gen_muls2_i32(discard, t, a, b);
1887 static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1889 TCGv_i64 discard = tcg_temp_new_i64();
1890 tcg_gen_muls2_i64(discard, t, a, b);
1893 static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1894 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1896 static const GVecGen3 op[4] = {
1898 .fno = gen_helper_vmuh_b,
1902 .fno = gen_helper_vmuh_h,
1907 .fno = gen_helper_vmuh_w,
1912 .fno = gen_helper_vmuh_d,
1917 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1920 TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
1921 TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
1922 TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
1923 TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
1925 static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1927 TCGv_i32 discard = tcg_temp_new_i32();
1928 tcg_gen_mulu2_i32(discard, t, a, b);
1931 static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1933 TCGv_i64 discard = tcg_temp_new_i64();
1934 tcg_gen_mulu2_i64(discard, t, a, b);
1937 static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1938 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1940 static const GVecGen3 op[4] = {
1942 .fno = gen_helper_vmuh_bu,
1946 .fno = gen_helper_vmuh_hu,
1950 .fni4 = gen_vmuh_wu,
1951 .fno = gen_helper_vmuh_wu,
1955 .fni8 = gen_vmuh_du,
1956 .fno = gen_helper_vmuh_du,
1961 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1964 TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u)
1965 TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
1966 TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
1967 TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
1969 static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1972 int halfbits = 4 << vece;
1974 t1 = tcg_temp_new_vec_matching(a);
1975 t2 = tcg_temp_new_vec_matching(b);
1976 tcg_gen_shli_vec(vece, t1, a, halfbits);
1977 tcg_gen_sari_vec(vece, t1, t1, halfbits);
1978 tcg_gen_shli_vec(vece, t2, b, halfbits);
1979 tcg_gen_sari_vec(vece, t2, t2, halfbits);
1980 tcg_gen_mul_vec(vece, t, t1, t2);
1983 static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1987 t1 = tcg_temp_new_i32();
1988 t2 = tcg_temp_new_i32();
1989 tcg_gen_ext16s_i32(t1, a);
1990 tcg_gen_ext16s_i32(t2, b);
1991 tcg_gen_mul_i32(t, t1, t2);
1994 static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1998 t1 = tcg_temp_new_i64();
1999 t2 = tcg_temp_new_i64();
2000 tcg_gen_ext32s_i64(t1, a);
2001 tcg_gen_ext32s_i64(t2, b);
2002 tcg_gen_mul_i64(t, t1, t2);
2005 static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2006 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2008 static const TCGOpcode vecop_list[] = {
2009 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2011 static const GVecGen3 op[3] = {
2013 .fniv = gen_vmulwev_s,
2014 .fno = gen_helper_vmulwev_h_b,
2015 .opt_opc = vecop_list,
2019 .fni4 = gen_vmulwev_w_h,
2020 .fniv = gen_vmulwev_s,
2021 .fno = gen_helper_vmulwev_w_h,
2022 .opt_opc = vecop_list,
2026 .fni8 = gen_vmulwev_d_w,
2027 .fniv = gen_vmulwev_s,
2028 .fno = gen_helper_vmulwev_d_w,
2029 .opt_opc = vecop_list,
2034 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2037 TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
2038 TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
2039 TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
2041 static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
2042 TCGv_i64 arg1, TCGv_i64 arg2)
2044 tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
2047 #define VMUL_Q(NAME, FN, idx1, idx2) \
2048 static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
2050 TCGv_i64 rh, rl, arg1, arg2; \
2052 if (!avail_LSX(ctx)) { \
2056 rh = tcg_temp_new_i64(); \
2057 rl = tcg_temp_new_i64(); \
2058 arg1 = tcg_temp_new_i64(); \
2059 arg2 = tcg_temp_new_i64(); \
2061 get_vreg64(arg1, a->vj, idx1); \
2062 get_vreg64(arg2, a->vk, idx2); \
2064 tcg_gen_## FN ##_i64(rl, rh, arg1, arg2); \
2066 set_vreg64(rh, a->vd, 1); \
2067 set_vreg64(rl, a->vd, 0); \
2072 VMUL_Q(vmulwev_q_d, muls2, 0, 0)
2073 VMUL_Q(vmulwod_q_d, muls2, 1, 1)
2074 VMUL_Q(vmulwev_q_du, mulu2, 0, 0)
2075 VMUL_Q(vmulwod_q_du, mulu2, 1, 1)
2076 VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0)
2077 VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1)
2079 static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2082 int halfbits = 4 << vece;
2084 t1 = tcg_temp_new_vec_matching(a);
2085 t2 = tcg_temp_new_vec_matching(b);
2086 tcg_gen_sari_vec(vece, t1, a, halfbits);
2087 tcg_gen_sari_vec(vece, t2, b, halfbits);
2088 tcg_gen_mul_vec(vece, t, t1, t2);
2091 static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2095 t1 = tcg_temp_new_i32();
2096 t2 = tcg_temp_new_i32();
2097 tcg_gen_sari_i32(t1, a, 16);
2098 tcg_gen_sari_i32(t2, b, 16);
2099 tcg_gen_mul_i32(t, t1, t2);
2102 static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2106 t1 = tcg_temp_new_i64();
2107 t2 = tcg_temp_new_i64();
2108 tcg_gen_sari_i64(t1, a, 32);
2109 tcg_gen_sari_i64(t2, b, 32);
2110 tcg_gen_mul_i64(t, t1, t2);
2113 static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2114 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2116 static const TCGOpcode vecop_list[] = {
2117 INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2119 static const GVecGen3 op[3] = {
2121 .fniv = gen_vmulwod_s,
2122 .fno = gen_helper_vmulwod_h_b,
2123 .opt_opc = vecop_list,
2127 .fni4 = gen_vmulwod_w_h,
2128 .fniv = gen_vmulwod_s,
2129 .fno = gen_helper_vmulwod_w_h,
2130 .opt_opc = vecop_list,
2134 .fni8 = gen_vmulwod_d_w,
2135 .fniv = gen_vmulwod_s,
2136 .fno = gen_helper_vmulwod_d_w,
2137 .opt_opc = vecop_list,
2142 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2145 TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
2146 TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
2147 TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
2149 static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2151 TCGv_vec t1, t2, mask;
2153 t1 = tcg_temp_new_vec_matching(a);
2154 t2 = tcg_temp_new_vec_matching(b);
2155 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2156 tcg_gen_and_vec(vece, t1, a, mask);
2157 tcg_gen_and_vec(vece, t2, b, mask);
2158 tcg_gen_mul_vec(vece, t, t1, t2);
2161 static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2165 t1 = tcg_temp_new_i32();
2166 t2 = tcg_temp_new_i32();
2167 tcg_gen_ext16u_i32(t1, a);
2168 tcg_gen_ext16u_i32(t2, b);
2169 tcg_gen_mul_i32(t, t1, t2);
2172 static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2176 t1 = tcg_temp_new_i64();
2177 t2 = tcg_temp_new_i64();
2178 tcg_gen_ext32u_i64(t1, a);
2179 tcg_gen_ext32u_i64(t2, b);
2180 tcg_gen_mul_i64(t, t1, t2);
2183 static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2184 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2186 static const TCGOpcode vecop_list[] = {
2189 static const GVecGen3 op[3] = {
2191 .fniv = gen_vmulwev_u,
2192 .fno = gen_helper_vmulwev_h_bu,
2193 .opt_opc = vecop_list,
2197 .fni4 = gen_vmulwev_w_hu,
2198 .fniv = gen_vmulwev_u,
2199 .fno = gen_helper_vmulwev_w_hu,
2200 .opt_opc = vecop_list,
2204 .fni8 = gen_vmulwev_d_wu,
2205 .fniv = gen_vmulwev_u,
2206 .fno = gen_helper_vmulwev_d_wu,
2207 .opt_opc = vecop_list,
2212 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2215 TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
2216 TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
2217 TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
2219 static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2222 int halfbits = 4 << vece;
2224 t1 = tcg_temp_new_vec_matching(a);
2225 t2 = tcg_temp_new_vec_matching(b);
2226 tcg_gen_shri_vec(vece, t1, a, halfbits);
2227 tcg_gen_shri_vec(vece, t2, b, halfbits);
2228 tcg_gen_mul_vec(vece, t, t1, t2);
2231 static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2235 t1 = tcg_temp_new_i32();
2236 t2 = tcg_temp_new_i32();
2237 tcg_gen_shri_i32(t1, a, 16);
2238 tcg_gen_shri_i32(t2, b, 16);
2239 tcg_gen_mul_i32(t, t1, t2);
2242 static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2246 t1 = tcg_temp_new_i64();
2247 t2 = tcg_temp_new_i64();
2248 tcg_gen_shri_i64(t1, a, 32);
2249 tcg_gen_shri_i64(t2, b, 32);
2250 tcg_gen_mul_i64(t, t1, t2);
2253 static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2254 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2256 static const TCGOpcode vecop_list[] = {
2257 INDEX_op_shri_vec, INDEX_op_mul_vec, 0
2259 static const GVecGen3 op[3] = {
2261 .fniv = gen_vmulwod_u,
2262 .fno = gen_helper_vmulwod_h_bu,
2263 .opt_opc = vecop_list,
2267 .fni4 = gen_vmulwod_w_hu,
2268 .fniv = gen_vmulwod_u,
2269 .fno = gen_helper_vmulwod_w_hu,
2270 .opt_opc = vecop_list,
2274 .fni8 = gen_vmulwod_d_wu,
2275 .fniv = gen_vmulwod_u,
2276 .fno = gen_helper_vmulwod_d_wu,
2277 .opt_opc = vecop_list,
2282 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2285 TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
2286 TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
2287 TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
2289 static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2291 TCGv_vec t1, t2, mask;
2292 int halfbits = 4 << vece;
2294 t1 = tcg_temp_new_vec_matching(a);
2295 t2 = tcg_temp_new_vec_matching(b);
2296 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2297 tcg_gen_and_vec(vece, t1, a, mask);
2298 tcg_gen_shli_vec(vece, t2, b, halfbits);
2299 tcg_gen_sari_vec(vece, t2, t2, halfbits);
2300 tcg_gen_mul_vec(vece, t, t1, t2);
2303 static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2307 t1 = tcg_temp_new_i32();
2308 t2 = tcg_temp_new_i32();
2309 tcg_gen_ext16u_i32(t1, a);
2310 tcg_gen_ext16s_i32(t2, b);
2311 tcg_gen_mul_i32(t, t1, t2);
2314 static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2318 t1 = tcg_temp_new_i64();
2319 t2 = tcg_temp_new_i64();
2320 tcg_gen_ext32u_i64(t1, a);
2321 tcg_gen_ext32s_i64(t2, b);
2322 tcg_gen_mul_i64(t, t1, t2);
2325 static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2326 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2328 static const TCGOpcode vecop_list[] = {
2329 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2331 static const GVecGen3 op[3] = {
2333 .fniv = gen_vmulwev_u_s,
2334 .fno = gen_helper_vmulwev_h_bu_b,
2335 .opt_opc = vecop_list,
2339 .fni4 = gen_vmulwev_w_hu_h,
2340 .fniv = gen_vmulwev_u_s,
2341 .fno = gen_helper_vmulwev_w_hu_h,
2342 .opt_opc = vecop_list,
2346 .fni8 = gen_vmulwev_d_wu_w,
2347 .fniv = gen_vmulwev_u_s,
2348 .fno = gen_helper_vmulwev_d_wu_w,
2349 .opt_opc = vecop_list,
2354 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2357 TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
2358 TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
2359 TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
2361 static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2364 int halfbits = 4 << vece;
2366 t1 = tcg_temp_new_vec_matching(a);
2367 t2 = tcg_temp_new_vec_matching(b);
2368 tcg_gen_shri_vec(vece, t1, a, halfbits);
2369 tcg_gen_sari_vec(vece, t2, b, halfbits);
2370 tcg_gen_mul_vec(vece, t, t1, t2);
2373 static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2377 t1 = tcg_temp_new_i32();
2378 t2 = tcg_temp_new_i32();
2379 tcg_gen_shri_i32(t1, a, 16);
2380 tcg_gen_sari_i32(t2, b, 16);
2381 tcg_gen_mul_i32(t, t1, t2);
2383 static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2387 t1 = tcg_temp_new_i64();
2388 t2 = tcg_temp_new_i64();
2389 tcg_gen_shri_i64(t1, a, 32);
2390 tcg_gen_sari_i64(t2, b, 32);
2391 tcg_gen_mul_i64(t, t1, t2);
2394 static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2395 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2397 static const TCGOpcode vecop_list[] = {
2398 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2400 static const GVecGen3 op[3] = {
2402 .fniv = gen_vmulwod_u_s,
2403 .fno = gen_helper_vmulwod_h_bu_b,
2404 .opt_opc = vecop_list,
2408 .fni4 = gen_vmulwod_w_hu_h,
2409 .fniv = gen_vmulwod_u_s,
2410 .fno = gen_helper_vmulwod_w_hu_h,
2411 .opt_opc = vecop_list,
2415 .fni8 = gen_vmulwod_d_wu_w,
2416 .fniv = gen_vmulwod_u_s,
2417 .fno = gen_helper_vmulwod_d_wu_w,
2418 .opt_opc = vecop_list,
2423 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2426 TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
2427 TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
2428 TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
2430 static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2434 t1 = tcg_temp_new_vec_matching(t);
2435 tcg_gen_mul_vec(vece, t1, a, b);
2436 tcg_gen_add_vec(vece, t, t, t1);
2439 static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2443 t1 = tcg_temp_new_i32();
2444 tcg_gen_mul_i32(t1, a, b);
2445 tcg_gen_add_i32(t, t, t1);
2448 static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2452 t1 = tcg_temp_new_i64();
2453 tcg_gen_mul_i64(t1, a, b);
2454 tcg_gen_add_i64(t, t, t1);
2457 static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2458 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2460 static const TCGOpcode vecop_list[] = {
2461 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2463 static const GVecGen3 op[4] = {
2466 .fno = gen_helper_vmadd_b,
2468 .opt_opc = vecop_list,
2473 .fno = gen_helper_vmadd_h,
2475 .opt_opc = vecop_list,
2479 .fni4 = gen_vmadd_w,
2481 .fno = gen_helper_vmadd_w,
2483 .opt_opc = vecop_list,
2487 .fni8 = gen_vmadd_d,
2489 .fno = gen_helper_vmadd_d,
2491 .opt_opc = vecop_list,
2496 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2499 TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
2500 TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
2501 TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
2502 TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
2504 static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2508 t1 = tcg_temp_new_vec_matching(t);
2509 tcg_gen_mul_vec(vece, t1, a, b);
2510 tcg_gen_sub_vec(vece, t, t, t1);
2513 static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2517 t1 = tcg_temp_new_i32();
2518 tcg_gen_mul_i32(t1, a, b);
2519 tcg_gen_sub_i32(t, t, t1);
2522 static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2526 t1 = tcg_temp_new_i64();
2527 tcg_gen_mul_i64(t1, a, b);
2528 tcg_gen_sub_i64(t, t, t1);
2531 static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2532 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2534 static const TCGOpcode vecop_list[] = {
2535 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
2537 static const GVecGen3 op[4] = {
2540 .fno = gen_helper_vmsub_b,
2542 .opt_opc = vecop_list,
2547 .fno = gen_helper_vmsub_h,
2549 .opt_opc = vecop_list,
2553 .fni4 = gen_vmsub_w,
2555 .fno = gen_helper_vmsub_w,
2557 .opt_opc = vecop_list,
2561 .fni8 = gen_vmsub_d,
2563 .fno = gen_helper_vmsub_d,
2565 .opt_opc = vecop_list,
2570 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2573 TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
2574 TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
2575 TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
2576 TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
2578 static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2580 TCGv_vec t1, t2, t3;
2581 int halfbits = 4 << vece;
2583 t1 = tcg_temp_new_vec_matching(a);
2584 t2 = tcg_temp_new_vec_matching(b);
2585 t3 = tcg_temp_new_vec_matching(t);
2586 tcg_gen_shli_vec(vece, t1, a, halfbits);
2587 tcg_gen_sari_vec(vece, t1, t1, halfbits);
2588 tcg_gen_shli_vec(vece, t2, b, halfbits);
2589 tcg_gen_sari_vec(vece, t2, t2, halfbits);
2590 tcg_gen_mul_vec(vece, t3, t1, t2);
2591 tcg_gen_add_vec(vece, t, t, t3);
2594 static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2598 t1 = tcg_temp_new_i32();
2599 gen_vmulwev_w_h(t1, a, b);
2600 tcg_gen_add_i32(t, t, t1);
2603 static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2607 t1 = tcg_temp_new_i64();
2608 gen_vmulwev_d_w(t1, a, b);
2609 tcg_gen_add_i64(t, t, t1);
2612 static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2613 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2615 static const TCGOpcode vecop_list[] = {
2616 INDEX_op_shli_vec, INDEX_op_sari_vec,
2617 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2619 static const GVecGen3 op[3] = {
2621 .fniv = gen_vmaddwev_s,
2622 .fno = gen_helper_vmaddwev_h_b,
2624 .opt_opc = vecop_list,
2628 .fni4 = gen_vmaddwev_w_h,
2629 .fniv = gen_vmaddwev_s,
2630 .fno = gen_helper_vmaddwev_w_h,
2632 .opt_opc = vecop_list,
2636 .fni8 = gen_vmaddwev_d_w,
2637 .fniv = gen_vmaddwev_s,
2638 .fno = gen_helper_vmaddwev_d_w,
2640 .opt_opc = vecop_list,
2645 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2648 TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
2649 TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
2650 TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
2652 #define VMADD_Q(NAME, FN, idx1, idx2) \
2653 static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
2655 TCGv_i64 rh, rl, arg1, arg2, th, tl; \
2657 if (!avail_LSX(ctx)) { \
2661 rh = tcg_temp_new_i64(); \
2662 rl = tcg_temp_new_i64(); \
2663 arg1 = tcg_temp_new_i64(); \
2664 arg2 = tcg_temp_new_i64(); \
2665 th = tcg_temp_new_i64(); \
2666 tl = tcg_temp_new_i64(); \
2668 get_vreg64(arg1, a->vj, idx1); \
2669 get_vreg64(arg2, a->vk, idx2); \
2670 get_vreg64(rh, a->vd, 1); \
2671 get_vreg64(rl, a->vd, 0); \
2673 tcg_gen_## FN ##_i64(tl, th, arg1, arg2); \
2674 tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); \
2676 set_vreg64(rh, a->vd, 1); \
2677 set_vreg64(rl, a->vd, 0); \
2682 VMADD_Q(vmaddwev_q_d, muls2, 0, 0)
2683 VMADD_Q(vmaddwod_q_d, muls2, 1, 1)
2684 VMADD_Q(vmaddwev_q_du, mulu2, 0, 0)
2685 VMADD_Q(vmaddwod_q_du, mulu2, 1, 1)
2686 VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0)
2687 VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1)
2689 static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2691 TCGv_vec t1, t2, t3;
2692 int halfbits = 4 << vece;
2694 t1 = tcg_temp_new_vec_matching(a);
2695 t2 = tcg_temp_new_vec_matching(b);
2696 t3 = tcg_temp_new_vec_matching(t);
2697 tcg_gen_sari_vec(vece, t1, a, halfbits);
2698 tcg_gen_sari_vec(vece, t2, b, halfbits);
2699 tcg_gen_mul_vec(vece, t3, t1, t2);
2700 tcg_gen_add_vec(vece, t, t, t3);
2703 static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2707 t1 = tcg_temp_new_i32();
2708 gen_vmulwod_w_h(t1, a, b);
2709 tcg_gen_add_i32(t, t, t1);
2712 static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2716 t1 = tcg_temp_new_i64();
2717 gen_vmulwod_d_w(t1, a, b);
2718 tcg_gen_add_i64(t, t, t1);
2721 static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2722 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2724 static const TCGOpcode vecop_list[] = {
2725 INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2727 static const GVecGen3 op[3] = {
2729 .fniv = gen_vmaddwod_s,
2730 .fno = gen_helper_vmaddwod_h_b,
2732 .opt_opc = vecop_list,
2736 .fni4 = gen_vmaddwod_w_h,
2737 .fniv = gen_vmaddwod_s,
2738 .fno = gen_helper_vmaddwod_w_h,
2740 .opt_opc = vecop_list,
2744 .fni8 = gen_vmaddwod_d_w,
2745 .fniv = gen_vmaddwod_s,
2746 .fno = gen_helper_vmaddwod_d_w,
2748 .opt_opc = vecop_list,
2753 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2756 TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
2757 TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
2758 TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
2760 static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2762 TCGv_vec t1, t2, mask;
2764 t1 = tcg_temp_new_vec_matching(t);
2765 t2 = tcg_temp_new_vec_matching(b);
2766 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2767 tcg_gen_and_vec(vece, t1, a, mask);
2768 tcg_gen_and_vec(vece, t2, b, mask);
2769 tcg_gen_mul_vec(vece, t1, t1, t2);
2770 tcg_gen_add_vec(vece, t, t, t1);
2773 static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2777 t1 = tcg_temp_new_i32();
2778 gen_vmulwev_w_hu(t1, a, b);
2779 tcg_gen_add_i32(t, t, t1);
2782 static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2786 t1 = tcg_temp_new_i64();
2787 gen_vmulwev_d_wu(t1, a, b);
2788 tcg_gen_add_i64(t, t, t1);
2791 static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2792 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2794 static const TCGOpcode vecop_list[] = {
2795 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2797 static const GVecGen3 op[3] = {
2799 .fniv = gen_vmaddwev_u,
2800 .fno = gen_helper_vmaddwev_h_bu,
2802 .opt_opc = vecop_list,
2806 .fni4 = gen_vmaddwev_w_hu,
2807 .fniv = gen_vmaddwev_u,
2808 .fno = gen_helper_vmaddwev_w_hu,
2810 .opt_opc = vecop_list,
2814 .fni8 = gen_vmaddwev_d_wu,
2815 .fniv = gen_vmaddwev_u,
2816 .fno = gen_helper_vmaddwev_d_wu,
2818 .opt_opc = vecop_list,
2823 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2826 TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
2827 TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
2828 TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
2830 static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2832 TCGv_vec t1, t2, t3;
2833 int halfbits = 4 << vece;
2835 t1 = tcg_temp_new_vec_matching(a);
2836 t2 = tcg_temp_new_vec_matching(b);
2837 t3 = tcg_temp_new_vec_matching(t);
2838 tcg_gen_shri_vec(vece, t1, a, halfbits);
2839 tcg_gen_shri_vec(vece, t2, b, halfbits);
2840 tcg_gen_mul_vec(vece, t3, t1, t2);
2841 tcg_gen_add_vec(vece, t, t, t3);
2844 static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2848 t1 = tcg_temp_new_i32();
2849 gen_vmulwod_w_hu(t1, a, b);
2850 tcg_gen_add_i32(t, t, t1);
2853 static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2857 t1 = tcg_temp_new_i64();
2858 gen_vmulwod_d_wu(t1, a, b);
2859 tcg_gen_add_i64(t, t, t1);
2862 static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2863 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2865 static const TCGOpcode vecop_list[] = {
2866 INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2868 static const GVecGen3 op[3] = {
2870 .fniv = gen_vmaddwod_u,
2871 .fno = gen_helper_vmaddwod_h_bu,
2873 .opt_opc = vecop_list,
2877 .fni4 = gen_vmaddwod_w_hu,
2878 .fniv = gen_vmaddwod_u,
2879 .fno = gen_helper_vmaddwod_w_hu,
2881 .opt_opc = vecop_list,
2885 .fni8 = gen_vmaddwod_d_wu,
2886 .fniv = gen_vmaddwod_u,
2887 .fno = gen_helper_vmaddwod_d_wu,
2889 .opt_opc = vecop_list,
2894 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2897 TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
2898 TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
2899 TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
2901 static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2903 TCGv_vec t1, t2, mask;
2904 int halfbits = 4 << vece;
2906 t1 = tcg_temp_new_vec_matching(a);
2907 t2 = tcg_temp_new_vec_matching(b);
2908 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2909 tcg_gen_and_vec(vece, t1, a, mask);
2910 tcg_gen_shli_vec(vece, t2, b, halfbits);
2911 tcg_gen_sari_vec(vece, t2, t2, halfbits);
2912 tcg_gen_mul_vec(vece, t1, t1, t2);
2913 tcg_gen_add_vec(vece, t, t, t1);
2916 static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2920 t1 = tcg_temp_new_i32();
2921 gen_vmulwev_w_hu_h(t1, a, b);
2922 tcg_gen_add_i32(t, t, t1);
2925 static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2929 t1 = tcg_temp_new_i64();
2930 gen_vmulwev_d_wu_w(t1, a, b);
2931 tcg_gen_add_i64(t, t, t1);
2934 static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2935 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2937 static const TCGOpcode vecop_list[] = {
2938 INDEX_op_shli_vec, INDEX_op_sari_vec,
2939 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2941 static const GVecGen3 op[3] = {
2943 .fniv = gen_vmaddwev_u_s,
2944 .fno = gen_helper_vmaddwev_h_bu_b,
2946 .opt_opc = vecop_list,
2950 .fni4 = gen_vmaddwev_w_hu_h,
2951 .fniv = gen_vmaddwev_u_s,
2952 .fno = gen_helper_vmaddwev_w_hu_h,
2954 .opt_opc = vecop_list,
2958 .fni8 = gen_vmaddwev_d_wu_w,
2959 .fniv = gen_vmaddwev_u_s,
2960 .fno = gen_helper_vmaddwev_d_wu_w,
2962 .opt_opc = vecop_list,
2967 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2970 TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
2971 TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
2972 TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
2974 static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2976 TCGv_vec t1, t2, t3;
2977 int halfbits = 4 << vece;
2979 t1 = tcg_temp_new_vec_matching(a);
2980 t2 = tcg_temp_new_vec_matching(b);
2981 t3 = tcg_temp_new_vec_matching(t);
2982 tcg_gen_shri_vec(vece, t1, a, halfbits);
2983 tcg_gen_sari_vec(vece, t2, b, halfbits);
2984 tcg_gen_mul_vec(vece, t3, t1, t2);
2985 tcg_gen_add_vec(vece, t, t, t3);
2988 static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2992 t1 = tcg_temp_new_i32();
2993 gen_vmulwod_w_hu_h(t1, a, b);
2994 tcg_gen_add_i32(t, t, t1);
2997 static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
3001 t1 = tcg_temp_new_i64();
3002 gen_vmulwod_d_wu_w(t1, a, b);
3003 tcg_gen_add_i64(t, t, t1);
3006 static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3007 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3009 static const TCGOpcode vecop_list[] = {
3010 INDEX_op_shri_vec, INDEX_op_sari_vec,
3011 INDEX_op_mul_vec, INDEX_op_add_vec, 0
3013 static const GVecGen3 op[3] = {
3015 .fniv = gen_vmaddwod_u_s,
3016 .fno = gen_helper_vmaddwod_h_bu_b,
3018 .opt_opc = vecop_list,
3022 .fni4 = gen_vmaddwod_w_hu_h,
3023 .fniv = gen_vmaddwod_u_s,
3024 .fno = gen_helper_vmaddwod_w_hu_h,
3026 .opt_opc = vecop_list,
3030 .fni8 = gen_vmaddwod_d_wu_w,
3031 .fniv = gen_vmaddwod_u_s,
3032 .fno = gen_helper_vmaddwod_d_wu_w,
3034 .opt_opc = vecop_list,
3039 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3042 TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
3043 TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
3044 TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
3046 TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
3047 TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
3048 TRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w)
3049 TRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d)
3050 TRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu)
3051 TRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu)
3052 TRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu)
3053 TRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du)
3054 TRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b)
3055 TRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h)
3056 TRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w)
3057 TRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d)
3058 TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
3059 TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
3060 TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
3061 TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
3063 static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3067 min = tcg_temp_new_vec_matching(t);
3068 tcg_gen_not_vec(vece, min, max);
3069 tcg_gen_smax_vec(vece, t, a, min);
3070 tcg_gen_smin_vec(vece, t, t, max);
3073 static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3074 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3076 static const TCGOpcode vecop_list[] = {
3077 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
3079 static const GVecGen2s op[4] = {
3082 .fno = gen_helper_vsat_b,
3083 .opt_opc = vecop_list,
3088 .fno = gen_helper_vsat_h,
3089 .opt_opc = vecop_list,
3094 .fno = gen_helper_vsat_w,
3095 .opt_opc = vecop_list,
3100 .fno = gen_helper_vsat_d,
3101 .opt_opc = vecop_list,
3106 tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3107 tcg_constant_i64((1ll<< imm) -1), &op[vece]);
3110 TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
3111 TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
3112 TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
3113 TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
3115 static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
3117 tcg_gen_umin_vec(vece, t, a, max);
3120 static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3121 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3124 static const TCGOpcode vecop_list[] = {
3125 INDEX_op_umin_vec, 0
3127 static const GVecGen2s op[4] = {
3130 .fno = gen_helper_vsat_bu,
3131 .opt_opc = vecop_list,
3136 .fno = gen_helper_vsat_hu,
3137 .opt_opc = vecop_list,
3142 .fno = gen_helper_vsat_wu,
3143 .opt_opc = vecop_list,
3148 .fno = gen_helper_vsat_du,
3149 .opt_opc = vecop_list,
3154 max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
3155 tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
3156 tcg_constant_i64(max), &op[vece]);
3159 TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
3160 TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
3161 TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
3162 TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
3164 TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
3165 TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
3166 TRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w)
3167 TRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d)
3168 TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
3169 TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
3170 TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
3171 TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
3173 static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3177 t1 = tcg_temp_new_vec_matching(t);
3178 zero = tcg_constant_vec_matching(t, vece, 0);
3180 tcg_gen_neg_vec(vece, t1, b);
3181 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
3182 tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
3185 static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3186 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3188 static const TCGOpcode vecop_list[] = {
3189 INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
3191 static const GVecGen3 op[4] = {
3193 .fniv = gen_vsigncov,
3194 .fno = gen_helper_vsigncov_b,
3195 .opt_opc = vecop_list,
3199 .fniv = gen_vsigncov,
3200 .fno = gen_helper_vsigncov_h,
3201 .opt_opc = vecop_list,
3205 .fniv = gen_vsigncov,
3206 .fno = gen_helper_vsigncov_w,
3207 .opt_opc = vecop_list,
3211 .fniv = gen_vsigncov,
3212 .fno = gen_helper_vsigncov_d,
3213 .opt_opc = vecop_list,
3218 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3221 TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
3222 TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
3223 TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
3224 TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
3226 TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
3227 TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
3228 TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
3229 TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
3230 TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
3231 TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
3233 #define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0))
3235 static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
3241 * imm bit [11:8] is mode, mode value is 0-12.
3242 * other values are invalid.
3244 mode = (imm >> 8) & 0xf;
3248 /* data: {2{24'0, imm[7:0]}} */
3249 data = (t << 32) | t ;
3252 /* data: {2{16'0, imm[7:0], 8'0}} */
3253 data = (t << 24) | (t << 8);
3256 /* data: {2{8'0, imm[7:0], 16'0}} */
3257 data = (t << 48) | (t << 16);
3260 /* data: {2{imm[7:0], 24'0}} */
3261 data = (t << 56) | (t << 24);
3264 /* data: {4{8'0, imm[7:0]}} */
3265 data = (t << 48) | (t << 32) | (t << 16) | t;
3268 /* data: {4{imm[7:0], 8'0}} */
3269 data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
3272 /* data: {2{16'0, imm[7:0], 8'1}} */
3273 data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
3276 /* data: {2{8'0, imm[7:0], 16'1}} */
3277 data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff;
3280 /* data: {8{imm[7:0]}} */
3281 data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
3282 (t << 24) | (t << 16) | (t << 8) | t;
3285 /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
3287 uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
3289 b1 = (t & 0x2) >> 1;
3290 b2 = (t & 0x4) >> 2;
3291 b3 = (t & 0x8) >> 3;
3292 b4 = (t & 0x10) >> 4;
3293 b5 = (t & 0x20) >> 5;
3294 b6 = (t & 0x40) >> 6;
3295 b7 = (t & 0x80) >> 7;
3296 data = (EXPAND_BYTE(b7) << 56) |
3297 (EXPAND_BYTE(b6) << 48) |
3298 (EXPAND_BYTE(b5) << 40) |
3299 (EXPAND_BYTE(b4) << 32) |
3300 (EXPAND_BYTE(b3) << 24) |
3301 (EXPAND_BYTE(b2) << 16) |
3302 (EXPAND_BYTE(b1) << 8) |
3307 /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
3311 b6 = (imm & 0x40) >> 6;
3312 b7 = (imm & 0x80) >> 7;
3314 t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
3315 data = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
3319 /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
3323 b6 = (imm & 0x40) >> 6;
3324 b7 = (imm & 0x80) >> 7;
3326 t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
3327 data = (t1 << 25) | (t0 << 19);
3331 /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
3335 b6 = (imm & 0x40) >> 6;
3336 b7 = (imm & 0x80) >> 7;
3338 t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
3339 data = (t1 << 54) | (t0 << 48);
3343 generate_exception(ctx, EXCCODE_INE);
3344 g_assert_not_reached();
3349 static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
3354 if (!avail_LSX(ctx)) {
3358 if (!check_vec(ctx, 16)) {
3362 sel = (a->imm >> 12) & 0x1;
3365 value = vldi_get_value(ctx, a->imm);
3368 value = ((int32_t)(a->imm << 22)) >> 22;
3369 vece = (a->imm >> 10) & 0x3;
3372 tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8,
3373 tcg_constant_i64(value));
3377 TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
3378 TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
3379 TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
3380 TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
3382 static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
3384 uint32_t vd_ofs, vj_ofs, vk_ofs;
3386 if (!avail_LSX(ctx)) {
3390 if (!check_vec(ctx, 16)) {
3394 vd_ofs = vec_full_offset(a->vd);
3395 vj_ofs = vec_full_offset(a->vj);
3396 vk_ofs = vec_full_offset(a->vk);
3398 tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8);
3401 TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
3402 TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
3403 TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
3404 TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
3406 static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3410 t1 = tcg_constant_vec_matching(t, vece, imm);
3411 tcg_gen_nor_vec(vece, t, a, t1);
3414 static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
3416 tcg_gen_movi_i64(t, dup_const(MO_8, imm));
3417 tcg_gen_nor_i64(t, a, t);
3420 static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3421 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3423 static const TCGOpcode vecop_list[] = {
3426 static const GVecGen2i op = {
3427 .fni8 = gen_vnori_b,
3429 .fnoi = gen_helper_vnori_b,
3430 .opt_opc = vecop_list,
3434 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
3437 TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
3439 TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
3440 TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
3441 TRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
3442 TRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
3443 TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
3444 TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
3445 TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
3446 TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
3448 TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
3449 TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
3450 TRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
3451 TRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
3452 TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
3453 TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
3454 TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
3455 TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
3457 TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
3458 TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
3459 TRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
3460 TRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
3461 TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
3462 TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
3463 TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
3464 TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
3466 TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
3467 TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
3468 TRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
3469 TRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
3470 TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
3471 TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
3472 TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
3473 TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
3475 TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
3476 TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
3477 TRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w)
3478 TRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d)
3479 TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
3480 TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
3481 TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
3482 TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
3484 TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
3485 TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
3486 TRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w)
3487 TRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d)
3488 TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
3489 TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
3490 TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
3491 TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
3493 TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
3494 TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
3495 TRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w)
3496 TRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d)
3497 TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
3498 TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
3499 TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
3500 TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
3502 TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
3503 TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
3504 TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
3505 TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
3506 TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
3507 TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
3509 TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
3510 TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
3511 TRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d)
3512 TRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q)
3513 TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
3514 TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
3515 TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
3516 TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
3518 TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
3519 TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
3520 TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
3521 TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
3522 TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
3523 TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
3525 TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
3526 TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
3527 TRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d)
3528 TRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q)
3529 TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
3530 TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
3531 TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
3532 TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
3534 TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
3535 TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
3536 TRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d)
3537 TRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h)
3538 TRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w)
3539 TRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d)
3540 TRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h)
3541 TRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w)
3542 TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
3543 TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
3544 TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
3545 TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
3547 TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
3548 TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
3549 TRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d)
3550 TRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q)
3551 TRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h)
3552 TRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w)
3553 TRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d)
3554 TRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q)
3555 TRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h)
3556 TRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w)
3557 TRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d)
3558 TRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q)
3559 TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
3560 TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
3561 TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
3562 TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
3564 TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
3565 TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
3566 TRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d)
3567 TRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h)
3568 TRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w)
3569 TRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d)
3570 TRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h)
3571 TRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w)
3572 TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
3573 TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
3574 TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
3575 TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
3577 TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
3578 TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
3579 TRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d)
3580 TRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q)
3581 TRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h)
3582 TRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w)
3583 TRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d)
3584 TRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q)
3585 TRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h)
3586 TRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w)
3587 TRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d)
3588 TRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q)
3589 TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
3590 TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
3591 TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
3592 TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
3594 TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
3595 TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
3596 TRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w)
3597 TRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d)
3598 TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
3599 TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
3600 TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
3601 TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
3603 TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
3604 TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
3605 TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
3606 TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
3608 static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
3609 void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
3611 TCGv_vec mask, lsh, t1, one;
3613 lsh = tcg_temp_new_vec_matching(t);
3614 t1 = tcg_temp_new_vec_matching(t);
3615 mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
3616 one = tcg_constant_vec_matching(t, vece, 1);
3618 tcg_gen_and_vec(vece, lsh, b, mask);
3619 tcg_gen_shlv_vec(vece, t1, one, lsh);
3620 func(vece, t, a, t1);
3623 static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3625 do_vbit(vece, t, a, b, tcg_gen_andc_vec);
3628 static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3630 do_vbit(vece, t, a, b, tcg_gen_or_vec);
3633 static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
3635 do_vbit(vece, t, a, b, tcg_gen_xor_vec);
3638 static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3639 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3641 static const TCGOpcode vecop_list[] = {
3642 INDEX_op_shlv_vec, INDEX_op_andc_vec, 0
3644 static const GVecGen3 op[4] = {
3646 .fniv = gen_vbitclr,
3647 .fno = gen_helper_vbitclr_b,
3648 .opt_opc = vecop_list,
3652 .fniv = gen_vbitclr,
3653 .fno = gen_helper_vbitclr_h,
3654 .opt_opc = vecop_list,
3658 .fniv = gen_vbitclr,
3659 .fno = gen_helper_vbitclr_w,
3660 .opt_opc = vecop_list,
3664 .fniv = gen_vbitclr,
3665 .fno = gen_helper_vbitclr_d,
3666 .opt_opc = vecop_list,
3671 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3674 TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
3675 TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
3676 TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
3677 TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
3679 static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
3680 void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
3685 lsh = imm & ((8 << vece) -1);
3686 t1 = tcg_temp_new_vec_matching(t);
3687 one = tcg_constant_vec_matching(t, vece, 1);
3689 tcg_gen_shli_vec(vece, t1, one, lsh);
3690 func(vece, t, a, t1);
3693 static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3695 do_vbiti(vece, t, a, imm, tcg_gen_andc_vec);
3698 static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3700 do_vbiti(vece, t, a, imm, tcg_gen_or_vec);
3703 static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
3705 do_vbiti(vece, t, a, imm, tcg_gen_xor_vec);
3708 static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3709 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3711 static const TCGOpcode vecop_list[] = {
3712 INDEX_op_shli_vec, INDEX_op_andc_vec, 0
3714 static const GVecGen2i op[4] = {
3716 .fniv = gen_vbitclri,
3717 .fnoi = gen_helper_vbitclri_b,
3718 .opt_opc = vecop_list,
3722 .fniv = gen_vbitclri,
3723 .fnoi = gen_helper_vbitclri_h,
3724 .opt_opc = vecop_list,
3728 .fniv = gen_vbitclri,
3729 .fnoi = gen_helper_vbitclri_w,
3730 .opt_opc = vecop_list,
3734 .fniv = gen_vbitclri,
3735 .fnoi = gen_helper_vbitclri_d,
3736 .opt_opc = vecop_list,
3741 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
3744 TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
3745 TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
3746 TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
3747 TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
3749 static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3750 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3752 static const TCGOpcode vecop_list[] = {
3753 INDEX_op_shlv_vec, 0
3755 static const GVecGen3 op[4] = {
3757 .fniv = gen_vbitset,
3758 .fno = gen_helper_vbitset_b,
3759 .opt_opc = vecop_list,
3763 .fniv = gen_vbitset,
3764 .fno = gen_helper_vbitset_h,
3765 .opt_opc = vecop_list,
3769 .fniv = gen_vbitset,
3770 .fno = gen_helper_vbitset_w,
3771 .opt_opc = vecop_list,
3775 .fniv = gen_vbitset,
3776 .fno = gen_helper_vbitset_d,
3777 .opt_opc = vecop_list,
3782 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3785 TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
3786 TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
3787 TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
3788 TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
3790 static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3791 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3793 static const TCGOpcode vecop_list[] = {
3794 INDEX_op_shli_vec, 0
3796 static const GVecGen2i op[4] = {
3798 .fniv = gen_vbitseti,
3799 .fnoi = gen_helper_vbitseti_b,
3800 .opt_opc = vecop_list,
3804 .fniv = gen_vbitseti,
3805 .fnoi = gen_helper_vbitseti_h,
3806 .opt_opc = vecop_list,
3810 .fniv = gen_vbitseti,
3811 .fnoi = gen_helper_vbitseti_w,
3812 .opt_opc = vecop_list,
3816 .fniv = gen_vbitseti,
3817 .fnoi = gen_helper_vbitseti_d,
3818 .opt_opc = vecop_list,
3823 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
3826 TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
3827 TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
3828 TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
3829 TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
3831 static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3832 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
3834 static const TCGOpcode vecop_list[] = {
3835 INDEX_op_shlv_vec, 0
3837 static const GVecGen3 op[4] = {
3839 .fniv = gen_vbitrev,
3840 .fno = gen_helper_vbitrev_b,
3841 .opt_opc = vecop_list,
3845 .fniv = gen_vbitrev,
3846 .fno = gen_helper_vbitrev_h,
3847 .opt_opc = vecop_list,
3851 .fniv = gen_vbitrev,
3852 .fno = gen_helper_vbitrev_w,
3853 .opt_opc = vecop_list,
3857 .fniv = gen_vbitrev,
3858 .fno = gen_helper_vbitrev_d,
3859 .opt_opc = vecop_list,
3864 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
3867 TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
3868 TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
3869 TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
3870 TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
3872 static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
3873 int64_t imm, uint32_t oprsz, uint32_t maxsz)
3875 static const TCGOpcode vecop_list[] = {
3876 INDEX_op_shli_vec, 0
3878 static const GVecGen2i op[4] = {
3880 .fniv = gen_vbitrevi,
3881 .fnoi = gen_helper_vbitrevi_b,
3882 .opt_opc = vecop_list,
3886 .fniv = gen_vbitrevi,
3887 .fnoi = gen_helper_vbitrevi_h,
3888 .opt_opc = vecop_list,
3892 .fniv = gen_vbitrevi,
3893 .fnoi = gen_helper_vbitrevi_w,
3894 .opt_opc = vecop_list,
3898 .fniv = gen_vbitrevi,
3899 .fnoi = gen_helper_vbitrevi_d,
3900 .opt_opc = vecop_list,
3905 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
3908 TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
3909 TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
3910 TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
3911 TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
3913 TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
3914 TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
3915 TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
3916 TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
3918 TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
3919 TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
3920 TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
3921 TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
3922 TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
3923 TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
3924 TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
3925 TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
3927 TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
3928 TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
3929 TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
3930 TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
3931 TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
3932 TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
3933 TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
3934 TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
3936 TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
3937 TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
3938 TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
3939 TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
3941 TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
3942 TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
3943 TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
3944 TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
3946 TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
3947 TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
3949 TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
3950 TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
3952 TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
3953 TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
3954 TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
3955 TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
3956 TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
3957 TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
3959 TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
3960 TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
3961 TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
3962 TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
3963 TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
3964 TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
3966 TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
3967 TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
3968 TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
3969 TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
3970 TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
3971 TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
3972 TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
3973 TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
3974 TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
3975 TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
3977 TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
3978 TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
3979 TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
3980 TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
3981 TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
3982 TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
3983 TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
3984 TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
3985 TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
3986 TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
3987 TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
3988 TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
3989 TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
3990 TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
3991 TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
3992 TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
3993 TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
3994 TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
3995 TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
3996 TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
3997 TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
3998 TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
3999 TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
4000 TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
4001 TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
4002 TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
4003 TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
4004 TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
4005 TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
4007 TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
4008 TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
4009 TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
4010 TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
4011 TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
4012 TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
4013 TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
4015 static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
4017 uint32_t vd_ofs, vj_ofs, vk_ofs;
4019 if (!check_vec(ctx, 16)) {
4023 vd_ofs = vec_full_offset(a->vd);
4024 vj_ofs = vec_full_offset(a->vj);
4025 vk_ofs = vec_full_offset(a->vk);
4027 tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
4031 static void do_cmpi_vec(TCGCond cond,
4032 unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4034 tcg_gen_cmp_vec(cond, vece, t, a, tcg_constant_vec_matching(t, vece, imm));
4037 static void gen_vseqi_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4039 do_cmpi_vec(TCG_COND_EQ, vece, t, a, imm);
4042 static void gen_vslei_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4044 do_cmpi_vec(TCG_COND_LE, vece, t, a, imm);
4047 static void gen_vslti_s_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4049 do_cmpi_vec(TCG_COND_LT, vece, t, a, imm);
4052 static void gen_vslei_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4054 do_cmpi_vec(TCG_COND_LEU, vece, t, a, imm);
4057 static void gen_vslti_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
4059 do_cmpi_vec(TCG_COND_LTU, vece, t, a, imm);
4062 #define DO_CMPI_S(NAME) \
4063 static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
4065 uint32_t vd_ofs, vj_ofs; \
4067 if (!check_vec(ctx, 16)) { \
4071 static const TCGOpcode vecop_list[] = { \
4072 INDEX_op_cmp_vec, 0 \
4074 static const GVecGen2i op[4] = { \
4076 .fniv = gen_## NAME ##_s_vec, \
4077 .fnoi = gen_helper_## NAME ##_b, \
4078 .opt_opc = vecop_list, \
4082 .fniv = gen_## NAME ##_s_vec, \
4083 .fnoi = gen_helper_## NAME ##_h, \
4084 .opt_opc = vecop_list, \
4088 .fniv = gen_## NAME ##_s_vec, \
4089 .fnoi = gen_helper_## NAME ##_w, \
4090 .opt_opc = vecop_list, \
4094 .fniv = gen_## NAME ##_s_vec, \
4095 .fnoi = gen_helper_## NAME ##_d, \
4096 .opt_opc = vecop_list, \
4101 vd_ofs = vec_full_offset(a->vd); \
4102 vj_ofs = vec_full_offset(a->vj); \
4104 tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
4113 #define DO_CMPI_U(NAME) \
4114 static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
4116 uint32_t vd_ofs, vj_ofs; \
4118 if (!check_vec(ctx, 16)) { \
4122 static const TCGOpcode vecop_list[] = { \
4123 INDEX_op_cmp_vec, 0 \
4125 static const GVecGen2i op[4] = { \
4127 .fniv = gen_## NAME ##_u_vec, \
4128 .fnoi = gen_helper_## NAME ##_bu, \
4129 .opt_opc = vecop_list, \
4133 .fniv = gen_## NAME ##_u_vec, \
4134 .fnoi = gen_helper_## NAME ##_hu, \
4135 .opt_opc = vecop_list, \
4139 .fniv = gen_## NAME ##_u_vec, \
4140 .fnoi = gen_helper_## NAME ##_wu, \
4141 .opt_opc = vecop_list, \
4145 .fniv = gen_## NAME ##_u_vec, \
4146 .fnoi = gen_helper_## NAME ##_du, \
4147 .opt_opc = vecop_list, \
4152 vd_ofs = vec_full_offset(a->vd); \
4153 vj_ofs = vec_full_offset(a->vj); \
4155 tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
4163 TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
4164 TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
4165 TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
4166 TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
4167 TRANS(vseqi_b, LSX, do_vseqi_s, MO_8)
4168 TRANS(vseqi_h, LSX, do_vseqi_s, MO_16)
4169 TRANS(vseqi_w, LSX, do_vseqi_s, MO_32)
4170 TRANS(vseqi_d, LSX, do_vseqi_s, MO_64)
4172 TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
4173 TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
4174 TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
4175 TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
4176 TRANS(vslei_b, LSX, do_vslei_s, MO_8)
4177 TRANS(vslei_h, LSX, do_vslei_s, MO_16)
4178 TRANS(vslei_w, LSX, do_vslei_s, MO_32)
4179 TRANS(vslei_d, LSX, do_vslei_s, MO_64)
4180 TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
4181 TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
4182 TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
4183 TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
4184 TRANS(vslei_bu, LSX, do_vslei_u, MO_8)
4185 TRANS(vslei_hu, LSX, do_vslei_u, MO_16)
4186 TRANS(vslei_wu, LSX, do_vslei_u, MO_32)
4187 TRANS(vslei_du, LSX, do_vslei_u, MO_64)
4189 TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
4190 TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
4191 TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
4192 TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
4193 TRANS(vslti_b, LSX, do_vslti_s, MO_8)
4194 TRANS(vslti_h, LSX, do_vslti_s, MO_16)
4195 TRANS(vslti_w, LSX, do_vslti_s, MO_32)
4196 TRANS(vslti_d, LSX, do_vslti_s, MO_64)
4197 TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
4198 TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
4199 TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
4200 TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
4201 TRANS(vslti_bu, LSX, do_vslti_u, MO_8)
4202 TRANS(vslti_hu, LSX, do_vslti_u, MO_16)
4203 TRANS(vslti_wu, LSX, do_vslti_u, MO_32)
4204 TRANS(vslti_du, LSX, do_vslti_u, MO_64)
4206 static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
4209 void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4210 TCGv_i32 vd = tcg_constant_i32(a->vd);
4211 TCGv_i32 vj = tcg_constant_i32(a->vj);
4212 TCGv_i32 vk = tcg_constant_i32(a->vk);
4214 if (!avail_LSX(ctx)) {
4218 if (!check_vec(ctx, 16)) {
4222 fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
4223 flags = get_fcmp_flags(a->fcond >> 1);
4224 fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
4229 static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
4232 void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
4233 TCGv_i32 vd = tcg_constant_i32(a->vd);
4234 TCGv_i32 vj = tcg_constant_i32(a->vj);
4235 TCGv_i32 vk = tcg_constant_i32(a->vk);
4237 if (!avail_LSX(ctx)) {
4241 if (!check_vec(ctx, 16)) {
4245 fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
4246 flags = get_fcmp_flags(a->fcond >> 1);
4247 fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
4252 static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
4254 if (!avail_LSX(ctx)) {
4258 if (!check_vec(ctx, 16)) {
4262 tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
4263 vec_full_offset(a->vk), vec_full_offset(a->vj),
4268 static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
4270 tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
4273 static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
4275 static const GVecGen2i op = {
4276 .fniv = gen_vbitseli,
4277 .fnoi = gen_helper_vbitseli_b,
4282 if (!avail_LSX(ctx)) {
4286 if (!check_vec(ctx, 16)) {
4290 tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
4291 16, ctx->vl/8, a->imm, &op);
4295 #define VSET(NAME, COND) \
4296 static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
4298 TCGv_i64 t1, al, ah; \
4300 al = tcg_temp_new_i64(); \
4301 ah = tcg_temp_new_i64(); \
4302 t1 = tcg_temp_new_i64(); \
4304 get_vreg64(ah, a->vj, 1); \
4305 get_vreg64(al, a->vj, 0); \
4307 if (!avail_LSX(ctx)) { \
4311 if (!check_vec(ctx, 16)) { \
4315 tcg_gen_or_i64(t1, al, ah); \
4316 tcg_gen_setcondi_i64(COND, t1, t1, 0); \
4317 tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
4322 VSET(vseteqz_v, TCG_COND_EQ)
4323 VSET(vsetnez_v, TCG_COND_NE)
4325 TRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b)
4326 TRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h)
4327 TRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w)
4328 TRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d)
4329 TRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b)
4330 TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
4331 TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
4332 TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
4334 static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
4336 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4338 if (!avail_LSX(ctx)) {
4342 if (!check_vec(ctx, 16)) {
4346 tcg_gen_st8_i64(src, cpu_env,
4347 offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm)));
4351 static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a)
4353 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4355 if (!avail_LSX(ctx)) {
4359 if (!check_vec(ctx, 16)) {
4363 tcg_gen_st16_i64(src, cpu_env,
4364 offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm)));
4368 static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
4370 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4372 if (!avail_LSX(ctx)) {
4376 if (!check_vec(ctx, 16)) {
4380 tcg_gen_st32_i64(src, cpu_env,
4381 offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
4385 static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
4387 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4389 if (!avail_LSX(ctx)) {
4393 if (!check_vec(ctx, 16)) {
4397 tcg_gen_st_i64(src, cpu_env,
4398 offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm)));
4402 static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a)
4404 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4406 if (!avail_LSX(ctx)) {
4410 if (!check_vec(ctx, 16)) {
4414 tcg_gen_ld8s_i64(dst, cpu_env,
4415 offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
4419 static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a)
4421 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4423 if (!avail_LSX(ctx)) {
4427 if (!check_vec(ctx, 16)) {
4431 tcg_gen_ld16s_i64(dst, cpu_env,
4432 offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
4436 static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
4438 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4440 if (!avail_LSX(ctx)) {
4444 if (!check_vec(ctx, 16)) {
4448 tcg_gen_ld32s_i64(dst, cpu_env,
4449 offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
4453 static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
4455 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4457 if (!avail_LSX(ctx)) {
4461 if (!check_vec(ctx, 16)) {
4465 tcg_gen_ld_i64(dst, cpu_env,
4466 offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
4470 static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a)
4472 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4474 if (!avail_LSX(ctx)) {
4478 if (!check_vec(ctx, 16)) {
4482 tcg_gen_ld8u_i64(dst, cpu_env,
4483 offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
4487 static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a)
4489 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4491 if (!avail_LSX(ctx)) {
4495 if (!check_vec(ctx, 16)) {
4499 tcg_gen_ld16u_i64(dst, cpu_env,
4500 offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
4504 static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
4506 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4508 if (!avail_LSX(ctx)) {
4512 if (!check_vec(ctx, 16)) {
4516 tcg_gen_ld32u_i64(dst, cpu_env,
4517 offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
4521 static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
4523 TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
4525 if (!avail_LSX(ctx)) {
4529 if (!check_vec(ctx, 16)) {
4533 tcg_gen_ld_i64(dst, cpu_env,
4534 offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
4538 static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
4539 uint32_t oprsz, MemOp mop)
4541 TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
4543 if (!check_vec(ctx, oprsz)) {
4547 tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
4548 oprsz, ctx->vl/8, src);
4552 static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
4554 return gvec_dup_vl(ctx, a, 16, mop);
4557 static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
4559 return gvec_dup_vl(ctx, a, 32, mop);
4562 TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
4563 TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
4564 TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
4565 TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
4566 TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
4567 TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
4568 TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
4569 TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
4571 static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
4573 if (!avail_LSX(ctx)) {
4577 if (!check_vec(ctx, 16)) {
4581 tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
4582 offsetof(CPULoongArchState,
4583 fpr[a->vj].vreg.B((a->imm))),
4588 static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
4590 if (!avail_LSX(ctx)) {
4594 if (!check_vec(ctx, 16)) {
4598 tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
4599 offsetof(CPULoongArchState,
4600 fpr[a->vj].vreg.H((a->imm))),
4604 static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
4606 if (!avail_LSX(ctx)) {
4610 if (!check_vec(ctx, 16)) {
4614 tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
4615 offsetof(CPULoongArchState,
4616 fpr[a->vj].vreg.W((a->imm))),
4620 static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
4622 if (!avail_LSX(ctx)) {
4626 if (!check_vec(ctx, 16)) {
4630 tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
4631 offsetof(CPULoongArchState,
4632 fpr[a->vj].vreg.D((a->imm))),
4637 static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
4638 void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
4640 TCGv_i64 t0 = tcg_temp_new_i64();
4641 TCGv_ptr t1 = tcg_temp_new_ptr();
4642 TCGv_i64 t2 = tcg_temp_new_i64();
4644 if (!avail_LSX(ctx)) {
4648 if (!check_vec(ctx, 16)) {
4652 tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
4653 tcg_gen_shli_i64(t0, t0, vece);
4654 if (HOST_BIG_ENDIAN) {
4655 tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1));
4658 tcg_gen_trunc_i64_ptr(t1, t0);
4659 tcg_gen_add_ptr(t1, t1, cpu_env);
4660 func(t2, t1, vec_full_offset(a->vj));
4661 tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2);
4666 TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64)
4667 TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
4668 TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
4669 TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
4671 static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
4674 TCGv_i64 desthigh, destlow, high, low;
4676 if (!avail_LSX(ctx)) {
4680 if (!check_vec(ctx, 16)) {
4684 desthigh = tcg_temp_new_i64();
4685 destlow = tcg_temp_new_i64();
4686 high = tcg_temp_new_i64();
4687 low = tcg_temp_new_i64();
4689 get_vreg64(low, a->vj, 0);
4691 ofs = ((a->imm) & 0xf) * 8;
4693 get_vreg64(high, a->vj, 1);
4694 tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
4695 tcg_gen_shli_i64(destlow, low, ofs);
4697 tcg_gen_shli_i64(desthigh, low, ofs - 64);
4698 destlow = tcg_constant_i64(0);
4701 set_vreg64(desthigh, a->vd, 1);
4702 set_vreg64(destlow, a->vd, 0);
4707 static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
4709 TCGv_i64 desthigh, destlow, high, low;
4712 if (!avail_LSX(ctx)) {
4716 if (!check_vec(ctx, 16)) {
4720 desthigh = tcg_temp_new_i64();
4721 destlow = tcg_temp_new_i64();
4722 high = tcg_temp_new_i64();
4723 low = tcg_temp_new_i64();
4725 get_vreg64(high, a->vj, 1);
4727 ofs = ((a->imm) & 0xf) * 8;
4729 get_vreg64(low, a->vj, 0);
4730 tcg_gen_extract2_i64(destlow, low, high, ofs);
4731 tcg_gen_shri_i64(desthigh, high, ofs);
4733 tcg_gen_shri_i64(destlow, high, ofs - 64);
4734 desthigh = tcg_constant_i64(0);
4737 set_vreg64(desthigh, a->vd, 1);
4738 set_vreg64(destlow, a->vd, 0);
4743 TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
4744 TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
4745 TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
4746 TRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d)
4747 TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
4748 TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
4749 TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
4750 TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
4752 TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
4753 TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
4754 TRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w)
4755 TRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d)
4756 TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
4757 TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
4758 TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
4759 TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
4761 TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
4762 TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
4763 TRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w)
4764 TRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d)
4765 TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
4766 TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
4767 TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
4768 TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
4770 TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
4771 TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
4772 TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
4773 TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
4774 TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
4775 TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
4776 TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
4777 TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
4779 TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
4781 TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
4782 TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
4783 TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
4784 TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
4786 static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
4792 if (!avail_LSX(ctx)) {
4796 if (!check_vec(ctx, 16)) {
4800 addr = gpr_src(ctx, a->rj, EXT_NONE);
4801 val = tcg_temp_new_i128();
4802 rl = tcg_temp_new_i64();
4803 rh = tcg_temp_new_i64();
4805 addr = make_address_i(ctx, addr, a->imm);
4807 tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
4808 tcg_gen_extr_i128_i64(rl, rh, val);
4809 set_vreg64(rh, a->vd, 1);
4810 set_vreg64(rl, a->vd, 0);
4815 static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
4821 if (!avail_LSX(ctx)) {
4825 if (!check_vec(ctx, 16)) {
4829 addr = gpr_src(ctx, a->rj, EXT_NONE);
4830 val = tcg_temp_new_i128();
4831 ah = tcg_temp_new_i64();
4832 al = tcg_temp_new_i64();
4834 addr = make_address_i(ctx, addr, a->imm);
4836 get_vreg64(ah, a->vd, 1);
4837 get_vreg64(al, a->vd, 0);
4838 tcg_gen_concat_i64_i128(val, al, ah);
4839 tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
4844 static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
4846 TCGv addr, src1, src2;
4850 if (!avail_LSX(ctx)) {
4854 if (!check_vec(ctx, 16)) {
4858 src1 = gpr_src(ctx, a->rj, EXT_NONE);
4859 src2 = gpr_src(ctx, a->rk, EXT_NONE);
4860 val = tcg_temp_new_i128();
4861 rl = tcg_temp_new_i64();
4862 rh = tcg_temp_new_i64();
4864 addr = make_address_x(ctx, src1, src2);
4865 tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
4866 tcg_gen_extr_i128_i64(rl, rh, val);
4867 set_vreg64(rh, a->vd, 1);
4868 set_vreg64(rl, a->vd, 0);
4873 static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
4875 TCGv addr, src1, src2;
4879 if (!avail_LSX(ctx)) {
4883 if (!check_vec(ctx, 16)) {
4887 src1 = gpr_src(ctx, a->rj, EXT_NONE);
4888 src2 = gpr_src(ctx, a->rk, EXT_NONE);
4889 val = tcg_temp_new_i128();
4890 ah = tcg_temp_new_i64();
4891 al = tcg_temp_new_i64();
4893 addr = make_address_x(ctx, src1, src2);
4894 get_vreg64(ah, a->vd, 1);
4895 get_vreg64(al, a->vd, 0);
4896 tcg_gen_concat_i64_i128(val, al, ah);
4897 tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
4902 #define VLDREPL(NAME, MO) \
4903 static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \
4908 if (!avail_LSX(ctx)) { \
4912 if (!check_vec(ctx, 16)) { \
4916 addr = gpr_src(ctx, a->rj, EXT_NONE); \
4917 val = tcg_temp_new_i64(); \
4919 addr = make_address_i(ctx, addr, a->imm); \
4921 tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, MO); \
4922 tcg_gen_gvec_dup_i64(MO, vec_full_offset(a->vd), 16, ctx->vl/8, val); \
4927 VLDREPL(vldrepl_b, MO_8)
4928 VLDREPL(vldrepl_h, MO_16)
4929 VLDREPL(vldrepl_w, MO_32)
4930 VLDREPL(vldrepl_d, MO_64)
4932 #define VSTELM(NAME, MO, E) \
4933 static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \
4938 if (!avail_LSX(ctx)) { \
4942 if (!check_vec(ctx, 16)) { \
4946 addr = gpr_src(ctx, a->rj, EXT_NONE); \
4947 val = tcg_temp_new_i64(); \
4949 addr = make_address_i(ctx, addr, a->imm); \
4951 tcg_gen_ld_i64(val, cpu_env, \
4952 offsetof(CPULoongArchState, fpr[a->vd].vreg.E(a->imm2))); \
4953 tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, MO); \
4958 VSTELM(vstelm_b, MO_8, B)
4959 VSTELM(vstelm_h, MO_16, H)
4960 VSTELM(vstelm_w, MO_32, W)
4961 VSTELM(vstelm_d, MO_64, D)