]> git.proxmox.com Git - mirror_qemu.git/blobdiff - tci.c
Merge remote-tracking branch 'remotes/stsquad/tags/pull-mttcg-fixups-for-rc2-280317...
[mirror_qemu.git] / tci.c
diff --git a/tci.c b/tci.c
index 0202ed97d14ac6bb4bcb3aadc83886200c1d4a97..4bdc645f2a55ef8a15950772f70ce17f393593c9 100644 (file)
--- a/tci.c
+++ b/tci.c
@@ -1,7 +1,7 @@
 /*
  * Tiny Code Interpreter for QEMU
  *
- * Copyright (c) 2009, 2011 Stefan Weil
+ * Copyright (c) 2009, 2011, 2016 Stefan Weil
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "config.h"
+#include "qemu/osdep.h"
 
-/* Defining NDEBUG disables assertions (which makes the code faster). */
-#if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
-# define NDEBUG
+/* Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
+ * Without assertions, the interpreter runs much faster. */
+#if defined(CONFIG_DEBUG_TCG)
+# define tci_assert(cond) assert(cond)
+#else
+# define tci_assert(cond) ((void)0)
 #endif
 
 #include "qemu-common.h"
-#include "exec/exec-all.h"           /* MAX_OPC_PARAM_IARGS */
+#include "tcg/tcg.h"           /* MAX_OPC_PARAM_IARGS */
+#include "exec/cpu_ldst.h"
 #include "tcg-op.h"
 
 /* Marker for missing code. */
@@ -51,17 +55,11 @@ typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong);
 #endif
 
-/* Targets which don't use GETPC also don't need tci_tb_ptr
-   which makes them a little faster. */
-#if defined(GETPC)
-uintptr_t tci_tb_ptr;
-#endif
-
 static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS];
 
 static tcg_target_ulong tci_read_reg(TCGReg index)
 {
-    assert(index < ARRAY_SIZE(tci_reg));
+    tci_assert(index < ARRAY_SIZE(tci_reg));
     return tci_reg[index];
 }
 
@@ -110,22 +108,12 @@ static uint64_t tci_read_reg64(TCGReg index)
 
 static void tci_write_reg(TCGReg index, tcg_target_ulong value)
 {
-    assert(index < ARRAY_SIZE(tci_reg));
-    assert(index != TCG_AREG0);
-    assert(index != TCG_REG_CALL_STACK);
+    tci_assert(index < ARRAY_SIZE(tci_reg));
+    tci_assert(index != TCG_AREG0);
+    tci_assert(index != TCG_REG_CALL_STACK);
     tci_reg[index] = value;
 }
 
-static void tci_write_reg8s(TCGReg index, int8_t value)
-{
-    tci_write_reg(index, value);
-}
-
-static void tci_write_reg16s(TCGReg index, int16_t value)
-{
-    tci_write_reg(index, value);
-}
-
 #if TCG_TARGET_REG_BITS == 64
 static void tci_write_reg32s(TCGReg index, int32_t value)
 {
@@ -138,11 +126,6 @@ static void tci_write_reg8(TCGReg index, uint8_t value)
     tci_write_reg(index, value);
 }
 
-static void tci_write_reg16(TCGReg index, uint16_t value)
-{
-    tci_write_reg(index, value);
-}
-
 static void tci_write_reg32(TCGReg index, uint32_t value)
 {
     tci_write_reg(index, value);
@@ -345,7 +328,7 @@ static uint64_t tci_read_ri64(uint8_t **tb_ptr)
 static tcg_target_ulong tci_read_label(uint8_t **tb_ptr)
 {
     tcg_target_ulong label = tci_read_i(tb_ptr);
-    assert(label != 0);
+    tci_assert(label != 0);
     return label;
 }
 
@@ -433,20 +416,66 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
     return result;
 }
 
+#ifdef CONFIG_SOFTMMU
+# define qemu_ld_ub \
+    helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_leuw \
+    helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_leul \
+    helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_leq \
+    helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_beuw \
+    helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_beul \
+    helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_ld_beq \
+    helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
+# define qemu_st_b(X) \
+    helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_lew(X) \
+    helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_lel(X) \
+    helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_leq(X) \
+    helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_bew(X) \
+    helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_bel(X) \
+    helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+# define qemu_st_beq(X) \
+    helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
+#else
+# define qemu_ld_ub      ldub_p(g2h(taddr))
+# define qemu_ld_leuw    lduw_le_p(g2h(taddr))
+# define qemu_ld_leul    (uint32_t)ldl_le_p(g2h(taddr))
+# define qemu_ld_leq     ldq_le_p(g2h(taddr))
+# define qemu_ld_beuw    lduw_be_p(g2h(taddr))
+# define qemu_ld_beul    (uint32_t)ldl_be_p(g2h(taddr))
+# define qemu_ld_beq     ldq_be_p(g2h(taddr))
+# define qemu_st_b(X)    stb_p(g2h(taddr), X)
+# define qemu_st_lew(X)  stw_le_p(g2h(taddr), X)
+# define qemu_st_lel(X)  stl_le_p(g2h(taddr), X)
+# define qemu_st_leq(X)  stq_le_p(g2h(taddr), X)
+# define qemu_st_bew(X)  stw_be_p(g2h(taddr), X)
+# define qemu_st_bel(X)  stl_be_p(g2h(taddr), X)
+# define qemu_st_beq(X)  stq_be_p(g2h(taddr), X)
+#endif
+
 /* Interpret pseudo code in tb. */
 uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 {
     long tcg_temps[CPU_TEMP_BUF_NLONGS];
     uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
-    uintptr_t next_tb = 0;
+    uintptr_t ret = 0;
 
     tci_reg[TCG_AREG0] = (tcg_target_ulong)env;
     tci_reg[TCG_REG_CALL_STACK] = sp_value;
-    assert(tb_ptr);
+    tci_assert(tb_ptr);
 
     for (;;) {
         TCGOpcode opc = tb_ptr[0];
-#if !defined(NDEBUG)
+#if defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
         uint8_t op_size = tb_ptr[1];
         uint8_t *old_code_ptr = tb_ptr;
 #endif
@@ -456,9 +485,6 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
         tcg_target_ulong label;
         TCGCond condition;
         target_ulong taddr;
-#ifndef CONFIG_SOFTMMU
-        tcg_target_ulong host_addr;
-#endif
         uint8_t tmp8;
         uint16_t tmp16;
         uint32_t tmp32;
@@ -466,6 +492,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 #if TCG_TARGET_REG_BITS == 32
         uint64_t v64;
 #endif
+        TCGMemOpIdx oi;
 
 #if defined(GETPC)
         tci_tb_ptr = (uintptr_t)tb_ptr;
@@ -475,19 +502,6 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
         tb_ptr += 2;
 
         switch (opc) {
-        case INDEX_op_end:
-        case INDEX_op_nop:
-            break;
-        case INDEX_op_nop1:
-        case INDEX_op_nop2:
-        case INDEX_op_nop3:
-        case INDEX_op_nopn:
-        case INDEX_op_discard:
-            TODO();
-            break;
-        case INDEX_op_set_label:
-            TODO();
-            break;
         case INDEX_op_call:
             t0 = tci_read_ri(&tb_ptr);
 #if TCG_TARGET_REG_BITS == 32
@@ -514,7 +528,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             break;
         case INDEX_op_br:
             label = tci_read_label(&tb_ptr);
-            assert(tb_ptr == old_code_ptr + op_size);
+            tci_assert(tb_ptr == old_code_ptr + op_size);
             tb_ptr = (uint8_t *)label;
             continue;
         case INDEX_op_setcond_i32:
@@ -589,7 +603,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             t0 = tci_read_r32(&tb_ptr);
             t1 = tci_read_r(&tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
-            assert(t1 != sp_value || (int32_t)t2 < 0);
+            tci_assert(t1 != sp_value || (int32_t)t2 < 0);
             *(uint32_t *)(t1 + t2) = t0;
             break;
 
@@ -669,32 +683,32 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             t0 = *tb_ptr++;
             t1 = tci_read_ri32(&tb_ptr);
             t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 << t2);
+            tci_write_reg32(t0, t1 << (t2 & 31));
             break;
         case INDEX_op_shr_i32:
             t0 = *tb_ptr++;
             t1 = tci_read_ri32(&tb_ptr);
             t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 >> t2);
+            tci_write_reg32(t0, t1 >> (t2 & 31));
             break;
         case INDEX_op_sar_i32:
             t0 = *tb_ptr++;
             t1 = tci_read_ri32(&tb_ptr);
             t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, ((int32_t)t1 >> t2));
+            tci_write_reg32(t0, ((int32_t)t1 >> (t2 & 31)));
             break;
 #if TCG_TARGET_HAS_rot_i32
         case INDEX_op_rotl_i32:
             t0 = *tb_ptr++;
             t1 = tci_read_ri32(&tb_ptr);
             t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, rol32(t1, t2));
+            tci_write_reg32(t0, rol32(t1, t2 & 31));
             break;
         case INDEX_op_rotr_i32:
             t0 = *tb_ptr++;
             t1 = tci_read_ri32(&tb_ptr);
             t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, ror32(t1, t2));
+            tci_write_reg32(t0, ror32(t1, t2 & 31));
             break;
 #endif
 #if TCG_TARGET_HAS_deposit_i32
@@ -714,7 +728,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             condition = *tb_ptr++;
             label = tci_read_label(&tb_ptr);
             if (tci_compare32(t0, t1, condition)) {
-                assert(tb_ptr == old_code_ptr + op_size);
+                tci_assert(tb_ptr == old_code_ptr + op_size);
                 tb_ptr = (uint8_t *)label;
                 continue;
             }
@@ -740,7 +754,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             condition = *tb_ptr++;
             label = tci_read_label(&tb_ptr);
             if (tci_compare64(tmp64, v64, condition)) {
-                assert(tb_ptr == old_code_ptr + op_size);
+                tci_assert(tb_ptr == old_code_ptr + op_size);
                 tb_ptr = (uint8_t *)label;
                 continue;
             }
@@ -874,7 +888,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             t0 = tci_read_r64(&tb_ptr);
             t1 = tci_read_r(&tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
-            assert(t1 != sp_value || (int32_t)t2 < 0);
+            tci_assert(t1 != sp_value || (int32_t)t2 < 0);
             *(uint64_t *)(t1 + t2) = t0;
             break;
 
@@ -936,32 +950,32 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             t0 = *tb_ptr++;
             t1 = tci_read_ri64(&tb_ptr);
             t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 << t2);
+            tci_write_reg64(t0, t1 << (t2 & 63));
             break;
         case INDEX_op_shr_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_ri64(&tb_ptr);
             t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 >> t2);
+            tci_write_reg64(t0, t1 >> (t2 & 63));
             break;
         case INDEX_op_sar_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_ri64(&tb_ptr);
             t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, ((int64_t)t1 >> t2));
+            tci_write_reg64(t0, ((int64_t)t1 >> (t2 & 63)));
             break;
 #if TCG_TARGET_HAS_rot_i64
         case INDEX_op_rotl_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_ri64(&tb_ptr);
             t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, rol64(t1, t2));
+            tci_write_reg64(t0, rol64(t1, t2 & 63));
             break;
         case INDEX_op_rotr_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_ri64(&tb_ptr);
             t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, ror64(t1, t2));
+            tci_write_reg64(t0, ror64(t1, t2 & 63));
             break;
 #endif
 #if TCG_TARGET_HAS_deposit_i64
@@ -981,7 +995,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
             condition = *tb_ptr++;
             label = tci_read_label(&tb_ptr);
             if (tci_compare64(t0, t1, condition)) {
-                assert(tb_ptr == old_code_ptr + op_size);
+                tci_assert(tb_ptr == old_code_ptr + op_size);
                 tb_ptr = (uint8_t *)label;
                 continue;
             }
@@ -1016,18 +1030,20 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 #endif
 #if TCG_TARGET_HAS_ext32s_i64
         case INDEX_op_ext32s_i64:
+#endif
+        case INDEX_op_ext_i32_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_r32s(&tb_ptr);
             tci_write_reg64(t0, t1);
             break;
-#endif
 #if TCG_TARGET_HAS_ext32u_i64
         case INDEX_op_ext32u_i64:
+#endif
+        case INDEX_op_extu_i32_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_r32(&tb_ptr);
             tci_write_reg64(t0, t1);
             break;
-#endif
 #if TCG_TARGET_HAS_bswap16_i64
         case INDEX_op_bswap16_i64:
             TODO();
@@ -1068,170 +1084,168 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 
             /* QEMU specific operations. */
 
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        case INDEX_op_debug_insn_start:
-            TODO();
-            break;
-#else
-        case INDEX_op_debug_insn_start:
-            TODO();
-            break;
-#endif
         case INDEX_op_exit_tb:
-            next_tb = *(uint64_t *)tb_ptr;
+            ret = *(uint64_t *)tb_ptr;
             goto exit;
             break;
         case INDEX_op_goto_tb:
-            t0 = tci_read_i32(&tb_ptr);
-            assert(tb_ptr == old_code_ptr + op_size);
+            /* Jump address is aligned */
+            tb_ptr = QEMU_ALIGN_PTR_UP(tb_ptr, 4);
+            t0 = atomic_read((int32_t *)tb_ptr);
+            tb_ptr += sizeof(int32_t);
+            tci_assert(tb_ptr == old_code_ptr + op_size);
             tb_ptr += (int32_t)t0;
             continue;
-        case INDEX_op_qemu_ld8u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp8 = helper_ldb_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp8 = *(uint8_t *)(host_addr + GUEST_BASE);
-#endif
-            tci_write_reg8(t0, tmp8);
-            break;
-        case INDEX_op_qemu_ld8s:
+        case INDEX_op_qemu_ld_i32:
             t0 = *tb_ptr++;
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp8 = helper_ldb_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp8 = *(uint8_t *)(host_addr + GUEST_BASE);
-#endif
-            tci_write_reg8s(t0, tmp8);
-            break;
-        case INDEX_op_qemu_ld16u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp16 = helper_ldw_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp16 = tswap16(*(uint16_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg16(t0, tmp16);
-            break;
-        case INDEX_op_qemu_ld16s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp16 = helper_ldw_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp16 = tswap16(*(uint16_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg16s(t0, tmp16);
-            break;
-#if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_qemu_ld32u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32(t0, tmp32);
-            break;
-        case INDEX_op_qemu_ld32s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32s(t0, tmp32);
-            break;
-#endif /* TCG_TARGET_REG_BITS == 64 */
-        case INDEX_op_qemu_ld32:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32(t0, tmp32);
+            oi = tci_read_i(&tb_ptr);
+            switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
+            case MO_UB:
+                tmp32 = qemu_ld_ub;
+                break;
+            case MO_SB:
+                tmp32 = (int8_t)qemu_ld_ub;
+                break;
+            case MO_LEUW:
+                tmp32 = qemu_ld_leuw;
+                break;
+            case MO_LESW:
+                tmp32 = (int16_t)qemu_ld_leuw;
+                break;
+            case MO_LEUL:
+                tmp32 = qemu_ld_leul;
+                break;
+            case MO_BEUW:
+                tmp32 = qemu_ld_beuw;
+                break;
+            case MO_BESW:
+                tmp32 = (int16_t)qemu_ld_beuw;
+                break;
+            case MO_BEUL:
+                tmp32 = qemu_ld_beul;
+                break;
+            default:
+                tcg_abort();
+            }
+            tci_write_reg(t0, tmp32);
             break;
-        case INDEX_op_qemu_ld64:
+        case INDEX_op_qemu_ld_i64:
             t0 = *tb_ptr++;
-#if TCG_TARGET_REG_BITS == 32
-            t1 = *tb_ptr++;
-#endif
+            if (TCG_TARGET_REG_BITS == 32) {
+                t1 = *tb_ptr++;
+            }
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp64 = helper_ldq_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp64 = tswap64(*(uint64_t *)(host_addr + GUEST_BASE));
-#endif
+            oi = tci_read_i(&tb_ptr);
+            switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
+            case MO_UB:
+                tmp64 = qemu_ld_ub;
+                break;
+            case MO_SB:
+                tmp64 = (int8_t)qemu_ld_ub;
+                break;
+            case MO_LEUW:
+                tmp64 = qemu_ld_leuw;
+                break;
+            case MO_LESW:
+                tmp64 = (int16_t)qemu_ld_leuw;
+                break;
+            case MO_LEUL:
+                tmp64 = qemu_ld_leul;
+                break;
+            case MO_LESL:
+                tmp64 = (int32_t)qemu_ld_leul;
+                break;
+            case MO_LEQ:
+                tmp64 = qemu_ld_leq;
+                break;
+            case MO_BEUW:
+                tmp64 = qemu_ld_beuw;
+                break;
+            case MO_BESW:
+                tmp64 = (int16_t)qemu_ld_beuw;
+                break;
+            case MO_BEUL:
+                tmp64 = qemu_ld_beul;
+                break;
+            case MO_BESL:
+                tmp64 = (int32_t)qemu_ld_beul;
+                break;
+            case MO_BEQ:
+                tmp64 = qemu_ld_beq;
+                break;
+            default:
+                tcg_abort();
+            }
             tci_write_reg(t0, tmp64);
-#if TCG_TARGET_REG_BITS == 32
-            tci_write_reg(t1, tmp64 >> 32);
-#endif
-            break;
-        case INDEX_op_qemu_st8:
-            t0 = tci_read_r8(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stb_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint8_t *)(host_addr + GUEST_BASE) = t0;
-#endif
-            break;
-        case INDEX_op_qemu_st16:
-            t0 = tci_read_r16(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stw_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint16_t *)(host_addr + GUEST_BASE) = tswap16(t0);
-#endif
+            if (TCG_TARGET_REG_BITS == 32) {
+                tci_write_reg(t1, tmp64 >> 32);
+            }
             break;
-        case INDEX_op_qemu_st32:
-            t0 = tci_read_r32(&tb_ptr);
+        case INDEX_op_qemu_st_i32:
+            t0 = tci_read_r(&tb_ptr);
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stl_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint32_t *)(host_addr + GUEST_BASE) = tswap32(t0);
-#endif
+            oi = tci_read_i(&tb_ptr);
+            switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) {
+            case MO_UB:
+                qemu_st_b(t0);
+                break;
+            case MO_LEUW:
+                qemu_st_lew(t0);
+                break;
+            case MO_LEUL:
+                qemu_st_lel(t0);
+                break;
+            case MO_BEUW:
+                qemu_st_bew(t0);
+                break;
+            case MO_BEUL:
+                qemu_st_bel(t0);
+                break;
+            default:
+                tcg_abort();
+            }
             break;
-        case INDEX_op_qemu_st64:
+        case INDEX_op_qemu_st_i64:
             tmp64 = tci_read_r64(&tb_ptr);
             taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stq_mmu(env, taddr, tmp64, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint64_t *)(host_addr + GUEST_BASE) = tswap64(tmp64);
-#endif
+            oi = tci_read_i(&tb_ptr);
+            switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) {
+            case MO_UB:
+                qemu_st_b(tmp64);
+                break;
+            case MO_LEUW:
+                qemu_st_lew(tmp64);
+                break;
+            case MO_LEUL:
+                qemu_st_lel(tmp64);
+                break;
+            case MO_LEQ:
+                qemu_st_leq(tmp64);
+                break;
+            case MO_BEUW:
+                qemu_st_bew(tmp64);
+                break;
+            case MO_BEUL:
+                qemu_st_bel(tmp64);
+                break;
+            case MO_BEQ:
+                qemu_st_beq(tmp64);
+                break;
+            default:
+                tcg_abort();
+            }
+            break;
+        case INDEX_op_mb:
+            /* Ensure ordering for all kinds */
+            smp_mb();
             break;
         default:
             TODO();
             break;
         }
-        assert(tb_ptr == old_code_ptr + op_size);
+        tci_assert(tb_ptr == old_code_ptr + op_size);
     }
 exit:
-    return next_tb;
+    return ret;
 }