From: Richard Henderson Date: Fri, 18 Nov 2016 11:50:50 +0000 (+0100) Subject: tcg/i386: Hoist common arguments in tcg_out_op X-Git-Tag: v2.9.0-rc2~210^2~19 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=42d5b514928a8a0d2f55a4c243d1333f9675815b;p=mirror_qemu.git tcg/i386: Hoist common arguments in tcg_out_op Signed-off-by: Richard Henderson --- diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index e497befeb2..83572ac788 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1791,7 +1791,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { - int c, vexop, rexw = 0; + TCGArg a0, a1, a2; + int c, const_a2, vexop, rexw = 0; #if TCG_TARGET_REG_BITS == 64 # define OP_32_64(x) \ @@ -1803,9 +1804,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case glue(glue(INDEX_op_, x), _i32) #endif - switch(opc) { + /* Hoist the loads of the most common arguments. */ + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + const_a2 = const_args[2]; + + switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); tcg_out_jmp(s, tb_ret_addr); break; case INDEX_op_goto_tb: @@ -1820,57 +1827,53 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_nopn(s, gap - 1); } tcg_out8(s, OPC_JMP_long); /* jmp im */ - s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); tcg_out32(s, 0); } else { /* indirect jump method */ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, - (intptr_t)(s->tb_jmp_target_addr + args[0])); + (intptr_t)(s->tb_jmp_target_addr + a0)); } - s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); + s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: - tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); + tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); break; OP_32_64(ld8u): /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); + tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2); break; OP_32_64(ld8s): - tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2); break; OP_32_64(ld16u): /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); + tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2); break; OP_32_64(ld16s): - tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2); break; #if TCG_TARGET_REG_BITS == 64 case INDEX_op_ld32u_i64: #endif case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); + tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2); break; OP_32_64(st8): if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVB_EvIz, - 0, args[1], args[2]); - tcg_out8(s, args[0]); + tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2); + tcg_out8(s, a0); } else { - tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, - args[0], args[1], args[2]); + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2); } break; OP_32_64(st16): if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, - 0, args[1], args[2]); - tcg_out16(s, args[0]); + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2); + tcg_out16(s, a0); } else { - tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, - args[0], args[1], args[2]); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2); } break; #if TCG_TARGET_REG_BITS == 64 @@ -1878,19 +1881,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, #endif case INDEX_op_st_i32: if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]); - tcg_out32(s, args[0]); + tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2); + tcg_out32(s, a0); } else { - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2); } break; OP_32_64(add): /* For 3-operand addition, use LEA. */ - if (args[0] != args[1]) { - TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; - - if (const_args[2]) { + if (a0 != a1) { + TCGArg c3 = 0; + if (const_a2) { c3 = a2, a2 = -1; } else if (a0 == a2) { /* Watch out for dest = src + dest, since we've removed @@ -1917,36 +1919,35 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, c = ARITH_XOR; goto gen_arith; gen_arith: - if (const_args[2]) { - tgen_arithi(s, c + rexw, args[0], args[2], 0); + if (const_a2) { + tgen_arithi(s, c + rexw, a0, a2, 0); } else { - tgen_arithr(s, c + rexw, args[0], args[2]); + tgen_arithr(s, c + rexw, a0, a2); } break; OP_32_64(andc): - if (const_args[2]) { - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, - args[0], args[1]); - tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0); + if (const_a2) { + tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); + tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0); } else { - tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]); + tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); } break; OP_32_64(mul): - if (const_args[2]) { + if (const_a2) { int32_t val; - val = args[2]; + val = a2; if (val == (int8_t)val) { - tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); tcg_out8(s, val); } else { - tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); tcg_out32(s, val); } } else { - tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); } break; @@ -1976,57 +1977,54 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, c = SHIFT_ROR; goto gen_shift; gen_shift_maybe_vex: - if (have_bmi2 && !const_args[2]) { - tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]); + if (have_bmi2 && !const_a2) { + tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1); break; } /* FALLTHRU */ gen_shift: - if (const_args[2]) { - tcg_out_shifti(s, c + rexw, args[0], args[2]); + if (const_a2) { + tcg_out_shifti(s, c + rexw, a0, a2); } else { - tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0); } break; case INDEX_op_brcond_i32: - tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), 0); + tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); break; case INDEX_op_setcond_i32: - tcg_out_setcond32(s, args[3], args[0], args[1], - args[2], const_args[2]); + tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2); break; case INDEX_op_movcond_i32: - tcg_out_movcond32(s, args[5], args[0], args[1], - args[2], const_args[2], args[3]); + tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]); break; OP_32_64(bswap16): - tcg_out_rolw_8(s, args[0]); + tcg_out_rolw_8(s, a0); break; OP_32_64(bswap32): - tcg_out_bswap32(s, args[0]); + tcg_out_bswap32(s, a0); break; OP_32_64(neg): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); break; OP_32_64(not): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); break; OP_32_64(ext8s): - tcg_out_ext8s(s, args[0], args[1], rexw); + tcg_out_ext8s(s, a0, a1, rexw); break; OP_32_64(ext16s): - tcg_out_ext16s(s, args[0], args[1], rexw); + tcg_out_ext16s(s, a0, a1, rexw); break; OP_32_64(ext8u): - tcg_out_ext8u(s, args[0], args[1]); + tcg_out_ext8u(s, a0, a1); break; OP_32_64(ext16u): - tcg_out_ext16u(s, args[0], args[1]); + tcg_out_ext16u(s, a0, a1); break; case INDEX_op_qemu_ld_i32: @@ -2050,26 +2048,26 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; OP_32_64(add2): if (const_args[4]) { - tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); + tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1); } else { - tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); + tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]); } if (const_args[5]) { - tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); + tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1); } else { - tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); + tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]); } break; OP_32_64(sub2): if (const_args[4]) { - tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); + tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1); } else { - tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); + tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]); } if (const_args[5]) { - tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); + tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1); } else { - tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); + tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]); } break; @@ -2082,68 +2080,63 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; #else /* TCG_TARGET_REG_BITS == 64 */ case INDEX_op_ld32s_i64: - tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); + tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2); break; case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2); break; case INDEX_op_st_i64: if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, - 0, args[1], args[2]); - tcg_out32(s, args[0]); + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2); + tcg_out32(s, a0); } else { - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2); } break; case INDEX_op_brcond_i64: - tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), 0); + tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); break; case INDEX_op_setcond_i64: - tcg_out_setcond64(s, args[3], args[0], args[1], - args[2], const_args[2]); + tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2); break; case INDEX_op_movcond_i64: - tcg_out_movcond64(s, args[5], args[0], args[1], - args[2], const_args[2], args[3]); + tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]); break; case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, args[0]); + tcg_out_bswap64(s, a0); break; case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: - tcg_out_ext32u(s, args[0], args[1]); + tcg_out_ext32u(s, a0, a1); break; case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: - tcg_out_ext32s(s, args[0], args[1]); + tcg_out_ext32s(s, a0, a1); break; #endif OP_32_64(deposit): if (args[3] == 0 && args[4] == 8) { /* load bits 0..7 */ - tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, - args[2], args[0]); + tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); } else if (args[3] == 8 && args[4] == 8) { /* load bits 8..15 */ - tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4); + tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); } else if (args[3] == 0 && args[4] == 16) { /* load bits 0..15 */ - tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]); + tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); } else { tcg_abort(); } break; case INDEX_op_extract_i64: - if (args[2] + args[3] == 32) { + if (a2 + args[3] == 32) { /* This is a 32-bit zero-extending right shift. */ - tcg_out_mov(s, TCG_TYPE_I32, args[0], args[1]); - tcg_out_shifti(s, SHIFT_SHR, args[0], args[2]); + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + tcg_out_shifti(s, SHIFT_SHR, a0, a2); break; } /* FALLTHRU */ @@ -2151,12 +2144,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* On the off-chance that we can use the high-byte registers. Otherwise we emit the same ext16 + shift pattern that we would have gotten from the normal tcg-op.c expansion. */ - tcg_debug_assert(args[2] == 8 && args[3] == 8); - if (args[1] < 4 && args[0] < 8) { - tcg_out_modrm(s, OPC_MOVZBL, args[0], args[1] + 4); + tcg_debug_assert(a2 == 8 && args[3] == 8); + if (a1 < 4 && a0 < 8) { + tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); } else { - tcg_out_ext16u(s, args[0], args[1]); - tcg_out_shifti(s, SHIFT_SHR, args[0], 8); + tcg_out_ext16u(s, a0, a1); + tcg_out_shifti(s, SHIFT_SHR, a0, 8); } break; @@ -2164,17 +2157,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* We don't implement sextract_i64, as we cannot sign-extend to 64-bits without using the REX prefix that explicitly excludes access to the high-byte registers. */ - tcg_debug_assert(args[2] == 8 && args[3] == 8); - if (args[1] < 4 && args[0] < 8) { - tcg_out_modrm(s, OPC_MOVSBL, args[0], args[1] + 4); + tcg_debug_assert(a2 == 8 && args[3] == 8); + if (a1 < 4 && a0 < 8) { + tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); } else { - tcg_out_ext16s(s, args[0], args[1], 0); - tcg_out_shifti(s, SHIFT_SAR, args[0], 8); + tcg_out_ext16s(s, a0, a1, 0); + tcg_out_shifti(s, SHIFT_SAR, a0, 8); } break; case INDEX_op_mb: - tcg_out_mb(s, args[0]); + tcg_out_mb(s, a0); break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: