#define TCG_CT_CONST_U12 0x800
#define TCG_CT_CONST_C12 0x1000
#define TCG_CT_CONST_WSZ 0x2000
+#define TCG_CT_CONST_VCMP 0x4000
+#define TCG_CT_CONST_VADD 0x8000
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
}
/* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
{
if (ct & TCG_CT_CONST) {
return true;
if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
return true;
}
+ int64_t vec_val = sextract64(val, 0, 8 << vece);
+ if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) {
+ return true;
+ }
+ if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) {
+ return true;
+ }
return false;
}
}
}
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi,
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
+{
+ TCGLabelQemuLdst *ldst;
+ HostAddress h;
+
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
+
+ if (h.aa.atom == MO_128) {
+ /*
+ * Use VLDX/VSTX when 128-bit atomicity is required.
+ * If address is aligned to 16-bytes, the 128-bit load/store is atomic.
+ */
+ if (is_ld) {
+ tcg_out_opc_vldx(s, TCG_VEC_TMP0, h.base, h.index);
+ tcg_out_opc_vpickve2gr_d(s, data_lo, TCG_VEC_TMP0, 0);
+ tcg_out_opc_vpickve2gr_d(s, data_hi, TCG_VEC_TMP0, 1);
+ } else {
+ tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_lo, 0);
+ tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_hi, 1);
+ tcg_out_opc_vstx(s, TCG_VEC_TMP0, h.base, h.index);
+ }
+ } else {
+ /* Otherwise use a pair of LD/ST. */
+ tcg_out_opc_add_d(s, TCG_REG_TMP0, h.base, h.index);
+ if (is_ld) {
+ tcg_out_opc_ld_d(s, data_lo, TCG_REG_TMP0, 0);
+ tcg_out_opc_ld_d(s, data_hi, TCG_REG_TMP0, 8);
+ } else {
+ tcg_out_opc_st_d(s, data_lo, TCG_REG_TMP0, 0);
+ tcg_out_opc_st_d(s, data_hi, TCG_REG_TMP0, 8);
+ }
+ }
+
+ if (ldst) {
+ ldst->type = TCG_TYPE_I128;
+ ldst->datalo_reg = data_lo;
+ ldst->datahi_reg = data_hi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ }
+}
+
/*
* Entry-points
*/
TCGArg a0 = args[0];
TCGArg a1 = args[1];
TCGArg a2 = args[2];
+ TCGArg a3 = args[3];
int c2 = const_args[2];
switch (opc) {
case INDEX_op_qemu_ld_a64_i64:
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
break;
+ case INDEX_op_qemu_ld_a32_i128:
+ case INDEX_op_qemu_ld_a64_i128:
+ tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true);
+ break;
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
break;
+ case INDEX_op_qemu_st_a32_i128:
+ case INDEX_op_qemu_st_a64_i128:
+ tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false);
+ break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
}
}
+static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
+ const TCGArg a1, const TCGArg a2,
+ bool a2_is_const, bool is_add)
+{
+ static const LoongArchInsn add_vec_insn[4] = {
+ OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D
+ };
+ static const LoongArchInsn add_vec_imm_insn[4] = {
+ OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU
+ };
+ static const LoongArchInsn sub_vec_insn[4] = {
+ OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D
+ };
+ static const LoongArchInsn sub_vec_imm_insn[4] = {
+ OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
+ };
+
+ if (a2_is_const) {
+ int64_t value = sextract64(a2, 0, 8 << vece);
+ if (!is_add) {
+ value = -value;
+ }
+
+ /* Try vaddi/vsubi */
+ if (0 <= value && value <= 0x1f) {
+ tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \
+ a1, value));
+ return;
+ } else if (-0x1f <= value && value < 0) {
+ tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \
+ a1, -value));
+ return;
+ }
+
+ /* constraint TCG_CT_CONST_VADD ensures unreachable */
+ g_assert_not_reached();
+ }
+
+ if (is_add) {
+ tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2));
+ } else {
+ tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2));
+ }
+}
+
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
{
TCGType type = vecl + TCG_TYPE_V64;
- TCGArg a0, a1, a2;
+ TCGArg a0, a1, a2, a3;
TCGReg temp = TCG_REG_TMP0;
+ TCGReg temp_vec = TCG_VEC_TMP0;
+
+ static const LoongArchInsn cmp_vec_insn[16][4] = {
+ [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
+ [TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D},
+ [TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU},
+ [TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D},
+ [TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU},
+ };
+ static const LoongArchInsn cmp_vec_imm_insn[16][4] = {
+ [TCG_COND_EQ] = {OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D},
+ [TCG_COND_LE] = {OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D},
+ [TCG_COND_LEU] = {OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU},
+ [TCG_COND_LT] = {OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D},
+ [TCG_COND_LTU] = {OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU},
+ };
+ LoongArchInsn insn;
+ static const LoongArchInsn neg_vec_insn[4] = {
+ OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D
+ };
+ static const LoongArchInsn mul_vec_insn[4] = {
+ OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D
+ };
+ static const LoongArchInsn smin_vec_insn[4] = {
+ OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D
+ };
+ static const LoongArchInsn umin_vec_insn[4] = {
+ OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU
+ };
+ static const LoongArchInsn smax_vec_insn[4] = {
+ OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D
+ };
+ static const LoongArchInsn umax_vec_insn[4] = {
+ OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU
+ };
+ static const LoongArchInsn ssadd_vec_insn[4] = {
+ OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D
+ };
+ static const LoongArchInsn usadd_vec_insn[4] = {
+ OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU
+ };
+ static const LoongArchInsn sssub_vec_insn[4] = {
+ OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D
+ };
+ static const LoongArchInsn ussub_vec_insn[4] = {
+ OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU
+ };
+ static const LoongArchInsn shlv_vec_insn[4] = {
+ OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D
+ };
+ static const LoongArchInsn shrv_vec_insn[4] = {
+ OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D
+ };
+ static const LoongArchInsn sarv_vec_insn[4] = {
+ OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D
+ };
+ static const LoongArchInsn shli_vec_insn[4] = {
+ OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D
+ };
+ static const LoongArchInsn shri_vec_insn[4] = {
+ OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D
+ };
+ static const LoongArchInsn sari_vec_insn[4] = {
+ OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D
+ };
+ static const LoongArchInsn rotrv_vec_insn[4] = {
+ OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
+ };
a0 = args[0];
a1 = args[1];
a2 = args[2];
+ a3 = args[3];
/* Currently only supports V128 */
tcg_debug_assert(type == TCG_TYPE_V128);
tcg_out_opc_vldx(s, a0, a1, temp);
}
break;
+ case INDEX_op_and_vec:
+ tcg_out_opc_vand_v(s, a0, a1, a2);
+ break;
+ case INDEX_op_andc_vec:
+ /*
+ * vandn vd, vj, vk: vd = vk & ~vj
+ * andc_vec vd, vj, vk: vd = vj & ~vk
+ * vk and vk are swapped
+ */
+ tcg_out_opc_vandn_v(s, a0, a2, a1);
+ break;
+ case INDEX_op_or_vec:
+ tcg_out_opc_vor_v(s, a0, a1, a2);
+ break;
+ case INDEX_op_orc_vec:
+ tcg_out_opc_vorn_v(s, a0, a1, a2);
+ break;
+ case INDEX_op_xor_vec:
+ tcg_out_opc_vxor_v(s, a0, a1, a2);
+ break;
+ case INDEX_op_nor_vec:
+ tcg_out_opc_vnor_v(s, a0, a1, a2);
+ break;
+ case INDEX_op_not_vec:
+ tcg_out_opc_vnor_v(s, a0, a1, a1);
+ break;
+ case INDEX_op_cmp_vec:
+ TCGCond cond = args[3];
+ if (const_args[2]) {
+ /*
+ * cmp_vec dest, src, value
+ * Try vseqi/vslei/vslti
+ */
+ int64_t value = sextract64(a2, 0, 8 << vece);
+ if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
+ cond == TCG_COND_LT) && (-0x10 <= value && value <= 0x0f)) {
+ tcg_out32(s, encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
+ a0, a1, value));
+ break;
+ } else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
+ (0x00 <= value && value <= 0x1f)) {
+ tcg_out32(s, encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
+ a0, a1, value));
+ break;
+ }
+
+ /*
+ * Fallback to:
+ * dupi_vec temp, a2
+ * cmp_vec a0, a1, temp, cond
+ */
+ tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
+ a2 = temp_vec;
+ }
+
+ insn = cmp_vec_insn[cond][vece];
+ if (insn == 0) {
+ TCGArg t;
+ t = a1, a1 = a2, a2 = t;
+ cond = tcg_swap_cond(cond);
+ insn = cmp_vec_insn[cond][vece];
+ tcg_debug_assert(insn != 0);
+ }
+ tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
+ break;
+ case INDEX_op_add_vec:
+ tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
+ break;
+ case INDEX_op_sub_vec:
+ tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false);
+ break;
+ case INDEX_op_neg_vec:
+ tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
+ break;
+ case INDEX_op_mul_vec:
+ tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_smin_vec:
+ tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_smax_vec:
+ tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_umin_vec:
+ tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_umax_vec:
+ tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_ssadd_vec:
+ tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_usadd_vec:
+ tcg_out32(s, encode_vdvjvk_insn(usadd_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_sssub_vec:
+ tcg_out32(s, encode_vdvjvk_insn(sssub_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_ussub_vec:
+ tcg_out32(s, encode_vdvjvk_insn(ussub_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_shlv_vec:
+ tcg_out32(s, encode_vdvjvk_insn(shlv_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_shrv_vec:
+ tcg_out32(s, encode_vdvjvk_insn(shrv_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_sarv_vec:
+ tcg_out32(s, encode_vdvjvk_insn(sarv_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_shli_vec:
+ tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_shri_vec:
+ tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_sari_vec:
+ tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_rotrv_vec:
+ tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1, a2));
+ break;
+ case INDEX_op_rotlv_vec:
+ /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
+ tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], temp_vec, a2));
+ tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1,
+ temp_vec));
+ break;
+ case INDEX_op_rotli_vec:
+ /* rotli_vec a1, a2 = rotri_vec a1, -a2 */
+ a2 = extract32(-a2, 0, 3 + vece);
+ switch (vece) {
+ case MO_8:
+ tcg_out_opc_vrotri_b(s, a0, a1, a2);
+ break;
+ case MO_16:
+ tcg_out_opc_vrotri_h(s, a0, a1, a2);
+ break;
+ case MO_32:
+ tcg_out_opc_vrotri_w(s, a0, a1, a2);
+ break;
+ case MO_64:
+ tcg_out_opc_vrotri_d(s, a0, a1, a2);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+ case INDEX_op_bitsel_vec:
+ /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
+ tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
+ break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_st_vec:
case INDEX_op_dup_vec:
case INDEX_op_dupm_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_nor_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_bitsel_vec:
return 1;
default:
return 0;
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(rZ, r);
+ case INDEX_op_qemu_ld_a32_i128:
+ case INDEX_op_qemu_ld_a64_i128:
+ return C_O2_I1(r, r, r);
+
+ case INDEX_op_qemu_st_a32_i128:
+ case INDEX_op_qemu_st_a64_i128:
+ return C_O0_I3(r, r, r);
+
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
return C_O0_I2(rZ, rZ);
case INDEX_op_st_vec:
return C_O0_I2(w, r);
+ case INDEX_op_cmp_vec:
+ return C_O1_I2(w, w, wM);
+
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ return C_O1_I2(w, w, wA);
+
+ case INDEX_op_and_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_nor_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_rotrv_vec:
+ case INDEX_op_rotlv_vec:
+ return C_O1_I2(w, w, w);
+
+ case INDEX_op_not_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_rotli_vec:
+ return C_O1_I1(w, w);
+
+ case INDEX_op_bitsel_vec:
+ return C_O1_I3(w, w, w, w);
+
default:
g_assert_not_reached();
}
tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static void tcg_target_init(TCGContext *s)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);