]> git.proxmox.com Git - qemu.git/blobdiff - target-microblaze/translate.c
Update version for 1.1.0-rc4 release
[qemu.git] / target-microblaze / translate.c
index 1ada15e19da73e6139eea24c39a69191b7de5d7b..a362938e41ee2cb479b6218dcc03ce6e768435d7 100644 (file)
@@ -2,6 +2,7 @@
  *  Xilinx MicroBlaze emulation for qemu: main translation routines.
  *
  *  Copyright (c) 2009 Edgar E. Iglesias.
+ *  Copyright (c) 2009-2012 PetaLogix Qld Pty Ltd.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#include <assert.h>
-
 #include "cpu.h"
-#include "exec-all.h"
 #include "disas.h"
 #include "tcg-op.h"
 #include "helper.h"
 #include "microblaze-decode.h"
-#include "qemu-common.h"
 
 #define GEN_HELPER 1
 #include "helper.h"
@@ -62,7 +54,7 @@ static TCGv env_iflags;
 
 /* This is the state at translation time.  */
 typedef struct DisasContext {
-    CPUState *env;
+    CPUMBState *env;
     target_ulong pc;
 
     /* Decoder.  */
@@ -78,9 +70,10 @@ typedef struct DisasContext {
     unsigned int clear_imm;
     int is_jmp;
 
-#define JMP_NOJMP    0
-#define JMP_DIRECT   1
-#define JMP_INDIRECT 2
+#define JMP_NOJMP     0
+#define JMP_DIRECT    1
+#define JMP_DIRECT_CC 2
+#define JMP_INDIRECT  3
     unsigned int jmp;
     uint32_t jmp_pc;
 
@@ -120,7 +113,7 @@ static inline int sign_extend(unsigned int val, unsigned int width)
 
 static inline void t_sync_flags(DisasContext *dc)
 {
-    /* Synch the tb dependant flags between translator and runtime.  */
+    /* Synch the tb dependent flags between translator and runtime.  */
     if (dc->tb_flags != dc->synced_flags) {
         tcg_gen_movi_tl(env_iflags, dc->tb_flags);
         dc->synced_flags = dc->tb_flags;
@@ -145,13 +138,30 @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
     if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
         tcg_gen_goto_tb(n);
         tcg_gen_movi_tl(cpu_SR[SR_PC], dest);
-        tcg_gen_exit_tb((long)tb + n);
+        tcg_gen_exit_tb((tcg_target_long)tb + n);
     } else {
         tcg_gen_movi_tl(cpu_SR[SR_PC], dest);
         tcg_gen_exit_tb(0);
     }
 }
 
+static void read_carry(DisasContext *dc, TCGv d)
+{
+    tcg_gen_shri_tl(d, cpu_SR[SR_MSR], 31);
+}
+
+static void write_carry(DisasContext *dc, TCGv v)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_shli_tl(t0, v, 31);
+    tcg_gen_sari_tl(t0, t0, 31);
+    tcg_gen_andi_tl(t0, t0, (MSR_C | MSR_CC));
+    tcg_gen_andi_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR],
+                    ~(MSR_C | MSR_CC));
+    tcg_gen_or_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0);
+    tcg_temp_free(t0);
+}
+
 /* True if ALU operand b is a small immediate that may deserve
    faster treatment.  */
 static inline int dec_alu_op_b_is_small_imm(DisasContext *dc)
@@ -175,6 +185,7 @@ static inline TCGv *dec_alu_op_b(DisasContext *dc)
 static void dec_add(DisasContext *dc)
 {
     unsigned int k, c;
+    TCGv cf;
 
     k = dc->opcode & 4;
     c = dc->opcode & 2;
@@ -183,22 +194,52 @@ static void dec_add(DisasContext *dc)
             dc->type_b ? "i" : "", k ? "k" : "", c ? "c" : "",
             dc->rd, dc->ra, dc->rb);
 
-    if (k && !c && dc->rd)
+    /* Take care of the easy cases first.  */
+    if (k) {
+        /* k - keep carry, no need to update MSR.  */
+        /* If rd == r0, it's a nop.  */
+        if (dc->rd) {
+            tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+
+            if (c) {
+                /* c - Add carry into the result.  */
+                cf = tcg_temp_new();
+
+                read_carry(dc, cf);
+                tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->rd], cf);
+                tcg_temp_free(cf);
+            }
+        }
+        return;
+    }
+
+    /* From now on, we can assume k is zero.  So we need to update MSR.  */
+    /* Extract carry.  */
+    cf = tcg_temp_new();
+    if (c) {
+        read_carry(dc, cf);
+    } else {
+        tcg_gen_movi_tl(cf, 0);
+    }
+
+    if (dc->rd) {
+        TCGv ncf = tcg_temp_new();
+        gen_helper_carry(ncf, cpu_R[dc->ra], *(dec_alu_op_b(dc)), cf);
         tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
-    else if (dc->rd)
-        gen_helper_addkc(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)),
-                         tcg_const_tl(k), tcg_const_tl(c));
-    else {
-        TCGv d = tcg_temp_new();
-        gen_helper_addkc(d, cpu_R[dc->ra], *(dec_alu_op_b(dc)),
-                         tcg_const_tl(k), tcg_const_tl(c));
-        tcg_temp_free(d);
+        tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->rd], cf);
+        write_carry(dc, ncf);
+        tcg_temp_free(ncf);
+    } else {
+        gen_helper_carry(cf, cpu_R[dc->ra], *(dec_alu_op_b(dc)), cf);
+        write_carry(dc, cf);
     }
+    tcg_temp_free(cf);
 }
 
 static void dec_sub(DisasContext *dc)
 {
     unsigned int u, cmp, k, c;
+    TCGv cf, na;
 
     u = dc->imm & 2;
     k = dc->opcode & 4;
@@ -213,24 +254,57 @@ static void dec_sub(DisasContext *dc)
             else
                 gen_helper_cmp(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
         }
-    } else {
-        LOG_DIS("sub%s%s r%d, r%d r%d\n",
-                 k ? "k" : "",  c ? "c" : "", dc->rd, dc->ra, dc->rb);
+        return;
+    }
 
-        if (!k || c) {
-            TCGv t;
-            t = tcg_temp_new();
-            if (dc->rd)
-                gen_helper_subkc(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)),
-                                 tcg_const_tl(k), tcg_const_tl(c));
-            else
-                gen_helper_subkc(t, cpu_R[dc->ra], *(dec_alu_op_b(dc)),
-                                 tcg_const_tl(k), tcg_const_tl(c));
-            tcg_temp_free(t);
-        }
-        else if (dc->rd)
+    LOG_DIS("sub%s%s r%d, r%d r%d\n",
+             k ? "k" : "",  c ? "c" : "", dc->rd, dc->ra, dc->rb);
+
+    /* Take care of the easy cases first.  */
+    if (k) {
+        /* k - keep carry, no need to update MSR.  */
+        /* If rd == r0, it's a nop.  */
+        if (dc->rd) {
             tcg_gen_sub_tl(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]);
+
+            if (c) {
+                /* c - Add carry into the result.  */
+                cf = tcg_temp_new();
+
+                read_carry(dc, cf);
+                tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->rd], cf);
+                tcg_temp_free(cf);
+            }
+        }
+        return;
     }
+
+    /* From now on, we can assume k is zero.  So we need to update MSR.  */
+    /* Extract carry. And complement a into na.  */
+    cf = tcg_temp_new();
+    na = tcg_temp_new();
+    if (c) {
+        read_carry(dc, cf);
+    } else {
+        tcg_gen_movi_tl(cf, 1);
+    }
+
+    /* d = b + ~a + c. carry defaults to 1.  */
+    tcg_gen_not_tl(na, cpu_R[dc->ra]);
+
+    if (dc->rd) {
+        TCGv ncf = tcg_temp_new();
+        gen_helper_carry(ncf, na, *(dec_alu_op_b(dc)), cf);
+        tcg_gen_add_tl(cpu_R[dc->rd], na, *(dec_alu_op_b(dc)));
+        tcg_gen_add_tl(cpu_R[dc->rd], cpu_R[dc->rd], cf);
+        write_carry(dc, ncf);
+        tcg_temp_free(ncf);
+    } else {
+        gen_helper_carry(cf, na, *(dec_alu_op_b(dc)), cf);
+        write_carry(dc, cf);
+    }
+    tcg_temp_free(cf);
+    tcg_temp_free(na);
 }
 
 static void dec_pattern(DisasContext *dc)
@@ -336,25 +410,6 @@ static void dec_xor(DisasContext *dc)
         tcg_gen_xor_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
 }
 
-static void read_carry(DisasContext *dc, TCGv d)
-{
-    tcg_gen_shri_tl(d, cpu_SR[SR_MSR], 31);
-}
-
-static void write_carry(DisasContext *dc, TCGv v)
-{
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_shli_tl(t0, v, 31);
-    tcg_gen_sari_tl(t0, t0, 31);
-    tcg_gen_mov_tl(env_debug, t0);
-    tcg_gen_andi_tl(t0, t0, (MSR_C | MSR_CC));
-    tcg_gen_andi_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR],
-                    ~(MSR_C | MSR_CC));
-    tcg_gen_or_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR], t0);
-    tcg_temp_free(t0);
-}
-
-
 static inline void msr_read(DisasContext *dc, TCGv d)
 {
     tcg_gen_mov_tl(d, cpu_SR[SR_MSR]);
@@ -362,10 +417,15 @@ static inline void msr_read(DisasContext *dc, TCGv d)
 
 static inline void msr_write(DisasContext *dc, TCGv v)
 {
+    TCGv t;
+
+    t = tcg_temp_new();
     dc->cpustate_changed = 1;
-    tcg_gen_mov_tl(cpu_SR[SR_MSR], v);
-    /* PVR, we have a processor version register.  */
-    tcg_gen_ori_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR], (1 << 10));
+    /* PVR bit is not writable.  */
+    tcg_gen_andi_tl(t, v, ~MSR_PVR);
+    tcg_gen_andi_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR], MSR_PVR);
+    tcg_gen_or_tl(cpu_SR[SR_MSR], cpu_SR[SR_MSR], v);
+    tcg_temp_free(t);
 }
 
 static void dec_msr(DisasContext *dc)
@@ -459,6 +519,12 @@ static void dec_msr(DisasContext *dc)
             case 0x7:
                 tcg_gen_andi_tl(cpu_SR[SR_FSR], cpu_R[dc->ra], 31);
                 break;
+            case 0x800:
+                tcg_gen_st_tl(cpu_R[dc->ra], cpu_env, offsetof(CPUMBState, slr));
+                break;
+            case 0x802:
+                tcg_gen_st_tl(cpu_R[dc->ra], cpu_env, offsetof(CPUMBState, shr));
+                break;
             default:
                 cpu_abort(dc->env, "unknown mts reg %x\n", sr);
                 break;
@@ -485,6 +551,12 @@ static void dec_msr(DisasContext *dc)
             case 0xb:
                 tcg_gen_mov_tl(cpu_R[dc->rd], cpu_SR[SR_BTR]);
                 break;
+            case 0x800:
+                tcg_gen_ld_tl(cpu_R[dc->rd], cpu_env, offsetof(CPUMBState, slr));
+                break;
+            case 0x802:
+                tcg_gen_ld_tl(cpu_R[dc->rd], cpu_env, offsetof(CPUMBState, shr));
+                break;
             case 0x2000:
             case 0x2001:
             case 0x2002:
@@ -500,7 +572,7 @@ static void dec_msr(DisasContext *dc)
             case 0x200c:
                 rn = sr & 0xf;
                 tcg_gen_ld_tl(cpu_R[dc->rd],
-                              cpu_env, offsetof(CPUState, pvr.regs[rn]));
+                              cpu_env, offsetof(CPUMBState, pvr.regs[rn]));
                 break;
             default:
                 cpu_abort(dc->env, "unknown mfs reg %x\n", sr);
@@ -671,7 +743,7 @@ static void dec_bit(DisasContext *dc)
     unsigned int op;
     int mem_index = cpu_mmu_index(dc->env);
 
-    op = dc->ir & ((1 << 8) - 1);
+    op = dc->ir & ((1 << 9) - 1);
     switch (op) {
         case 0x21:
             /* src.  */
@@ -742,6 +814,27 @@ static void dec_bit(DisasContext *dc)
                 return;
             }
             break;
+        case 0xe0:
+            if ((dc->tb_flags & MSR_EE_FLAG)
+                && (dc->env->pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)
+                && !((dc->env->pvr.regs[2] & PVR2_USE_PCMP_INSTR))) {
+                tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
+                t_gen_raise_exception(dc, EXCP_HW_EXCP);
+            }
+            if (dc->env->pvr.regs[2] & PVR2_USE_PCMP_INSTR) {
+                gen_helper_clz(cpu_R[dc->rd], cpu_R[dc->ra]);
+            }
+            break;
+        case 0x1e0:
+            /* swapb */
+            LOG_DIS("swapb r%d r%d\n", dc->rd, dc->ra);
+            tcg_gen_bswap32_i32(cpu_R[dc->rd], cpu_R[dc->ra]);
+            break;
+        case 0x1e1:
+            /*swaph */
+            LOG_DIS("swaph r%d r%d\n", dc->rd, dc->ra);
+            tcg_gen_rotri_i32(cpu_R[dc->rd], cpu_R[dc->ra], 16);
+            break;
         default:
             cpu_abort(dc->env, "unknown bit oc=%x op=%x rd=%d ra=%d rb=%d\n",
                      dc->pc, op, dc->rd, dc->ra, dc->rb);
@@ -751,10 +844,12 @@ static void dec_bit(DisasContext *dc)
 
 static inline void sync_jmpstate(DisasContext *dc)
 {
-    if (dc->jmp == JMP_DIRECT) {
-            dc->jmp = JMP_INDIRECT;
+    if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) {
+        if (dc->jmp == JMP_DIRECT) {
             tcg_gen_movi_tl(env_btaken, 1);
-            tcg_gen_movi_tl(env_btarget, dc->jmp_pc);
+        }
+        dc->jmp = JMP_INDIRECT;
+        tcg_gen_movi_tl(env_btarget, dc->jmp_pc);
     }
 }
 
@@ -784,8 +879,15 @@ static inline void gen_load(DisasContext *dc, TCGv dst, TCGv addr,
 static inline TCGv *compute_ldst_addr(DisasContext *dc, TCGv *t)
 {
     unsigned int extimm = dc->tb_flags & IMM_FLAG;
+    /* Should be set to one if r1 is used by loadstores.  */
+    int stackprot = 0;
 
-    /* Treat the fast cases first.  */
+    /* All load/stores use ra.  */
+    if (dc->ra == 1) {
+        stackprot = 1;
+    }
+
+    /* Treat the common cases first.  */
     if (!dc->type_b) {
         /* If any of the regs is r0, return a ptr to the other.  */
         if (dc->ra == 0) {
@@ -794,8 +896,16 @@ static inline TCGv *compute_ldst_addr(DisasContext *dc, TCGv *t)
             return &cpu_R[dc->ra];
         }
 
+        if (dc->rb == 1) {
+            stackprot = 1;
+        }
+
         *t = tcg_temp_new();
         tcg_gen_add_tl(*t, cpu_R[dc->ra], cpu_R[dc->rb]);
+
+        if (stackprot) {
+            gen_helper_stackprot(*t);
+        }
         return t;
     }
     /* Immediate.  */
@@ -811,15 +921,41 @@ static inline TCGv *compute_ldst_addr(DisasContext *dc, TCGv *t)
         tcg_gen_add_tl(*t, cpu_R[dc->ra], *(dec_alu_op_b(dc)));
     }
 
+    if (stackprot) {
+        gen_helper_stackprot(*t);
+    }
     return t;
 }
 
+static inline void dec_byteswap(DisasContext *dc, TCGv dst, TCGv src, int size)
+{
+    if (size == 4) {
+        tcg_gen_bswap32_tl(dst, src);
+    } else if (size == 2) {
+        TCGv t = tcg_temp_new();
+
+        /* bswap16 assumes the high bits are zero.  */
+        tcg_gen_andi_tl(t, src, 0xffff);
+        tcg_gen_bswap16_tl(dst, t);
+        tcg_temp_free(t);
+    } else {
+        /* Ignore.
+        cpu_abort(dc->env, "Invalid ldst byteswap size %d\n", size);
+        */
+    }
+}
+
 static void dec_load(DisasContext *dc)
 {
     TCGv t, *addr;
-    unsigned int size;
+    unsigned int size, rev = 0;
 
     size = 1 << (dc->opcode & 3);
+
+    if (!dc->type_b) {
+        rev = (dc->ir >> 9) & 1;
+    }
+
     if (size > 4 && (dc->tb_flags & MSR_EE_FLAG)
           && (dc->env->pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)) {
         tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_ILLEGAL_OP);
@@ -827,10 +963,62 @@ static void dec_load(DisasContext *dc)
         return;
     }
 
-    LOG_DIS("l %x %d\n", dc->opcode, size);
+    LOG_DIS("l%d%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "");
+
     t_sync_flags(dc);
     addr = compute_ldst_addr(dc, &t);
 
+    /*
+     * When doing reverse accesses we need to do two things.
+     *
+     * 1. Reverse the address wrt endianness.
+     * 2. Byteswap the data lanes on the way back into the CPU core.
+     */
+    if (rev && size != 4) {
+        /* Endian reverse the address. t is addr.  */
+        switch (size) {
+            case 1:
+            {
+                /* 00 -> 11
+                   01 -> 10
+                   10 -> 10
+                   11 -> 00 */
+                TCGv low = tcg_temp_new();
+
+                /* Force addr into the temp.  */
+                if (addr != &t) {
+                    t = tcg_temp_new();
+                    tcg_gen_mov_tl(t, *addr);
+                    addr = &t;
+                }
+
+                tcg_gen_andi_tl(low, t, 3);
+                tcg_gen_sub_tl(low, tcg_const_tl(3), low);
+                tcg_gen_andi_tl(t, t, ~3);
+                tcg_gen_or_tl(t, t, low);
+                tcg_gen_mov_tl(env_imm, t);
+                tcg_temp_free(low);
+                break;
+            }
+
+            case 2:
+                /* 00 -> 10
+                   10 -> 00.  */
+                /* Force addr into the temp.  */
+                if (addr != &t) {
+                    t = tcg_temp_new();
+                    tcg_gen_xori_tl(t, *addr, 2);
+                    addr = &t;
+                } else {
+                    tcg_gen_xori_tl(t, t, 2);
+                }
+                break;
+            default:
+                cpu_abort(dc->env, "Invalid reverse size\n");
+                break;
+        }
+    }
+
     /* If we get a fault on a dslot, the jmpstate better be in sync.  */
     sync_jmpstate(dc);
 
@@ -849,13 +1037,22 @@ static void dec_load(DisasContext *dc)
         tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
         gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
                             tcg_const_tl(0), tcg_const_tl(size - 1));
-        if (dc->rd)
-            tcg_gen_mov_tl(cpu_R[dc->rd], v);
+        if (dc->rd) {
+            if (rev) {
+                dec_byteswap(dc, cpu_R[dc->rd], v, size);
+            } else {
+                tcg_gen_mov_tl(cpu_R[dc->rd], v);
+            }
+        }
         tcg_temp_free(v);
     } else {
         if (dc->rd) {
             gen_load(dc, cpu_R[dc->rd], *addr, size);
+            if (rev) {
+                dec_byteswap(dc, cpu_R[dc->rd], cpu_R[dc->rd], size);
+            }
         } else {
+            /* We are loading into r0, no need to reverse.  */
             gen_load(dc, env_imm, *addr, size);
         }
     }
@@ -882,9 +1079,12 @@ static void gen_store(DisasContext *dc, TCGv addr, TCGv val,
 static void dec_store(DisasContext *dc)
 {
     TCGv t, *addr;
-    unsigned int size;
+    unsigned int size, rev = 0;
 
     size = 1 << (dc->opcode & 3);
+    if (!dc->type_b) {
+        rev = (dc->ir >> 9) & 1;
+    }
 
     if (size > 4 && (dc->tb_flags & MSR_EE_FLAG)
           && (dc->env->pvr.regs[2] & PVR2_ILL_OPCODE_EXC_MASK)) {
@@ -893,19 +1093,83 @@ static void dec_store(DisasContext *dc)
         return;
     }
 
-    LOG_DIS("s%d%s\n", size, dc->type_b ? "i" : "");
+    LOG_DIS("s%d%s%s\n", size, dc->type_b ? "i" : "", rev ? "r" : "");
     t_sync_flags(dc);
     /* If we get a fault on a dslot, the jmpstate better be in sync.  */
     sync_jmpstate(dc);
     addr = compute_ldst_addr(dc, &t);
 
-    gen_store(dc, *addr, cpu_R[dc->rd], size);
+    if (rev && size != 4) {
+        /* Endian reverse the address. t is addr.  */
+        switch (size) {
+            case 1:
+            {
+                /* 00 -> 11
+                   01 -> 10
+                   10 -> 10
+                   11 -> 00 */
+                TCGv low = tcg_temp_new();
+
+                /* Force addr into the temp.  */
+                if (addr != &t) {
+                    t = tcg_temp_new();
+                    tcg_gen_mov_tl(t, *addr);
+                    addr = &t;
+                }
+
+                tcg_gen_andi_tl(low, t, 3);
+                tcg_gen_sub_tl(low, tcg_const_tl(3), low);
+                tcg_gen_andi_tl(t, t, ~3);
+                tcg_gen_or_tl(t, t, low);
+                tcg_gen_mov_tl(env_imm, t);
+                tcg_temp_free(low);
+                break;
+            }
+
+            case 2:
+                /* 00 -> 10
+                   10 -> 00.  */
+                /* Force addr into the temp.  */
+                if (addr != &t) {
+                    t = tcg_temp_new();
+                    tcg_gen_xori_tl(t, *addr, 2);
+                    addr = &t;
+                } else {
+                    tcg_gen_xori_tl(t, t, 2);
+                }
+                break;
+            default:
+                cpu_abort(dc->env, "Invalid reverse size\n");
+                break;
+        }
+
+        if (size != 1) {
+            TCGv bs_data = tcg_temp_new();
+            dec_byteswap(dc, bs_data, cpu_R[dc->rd], size);
+            gen_store(dc, *addr, bs_data, size);
+            tcg_temp_free(bs_data);
+        } else {
+            gen_store(dc, *addr, cpu_R[dc->rd], size);
+        }
+    } else {
+        if (rev) {
+            TCGv bs_data = tcg_temp_new();
+            dec_byteswap(dc, bs_data, cpu_R[dc->rd], size);
+            gen_store(dc, *addr, bs_data, size);
+            tcg_temp_free(bs_data);
+        } else {
+            gen_store(dc, *addr, cpu_R[dc->rd], size);
+        }
+    }
 
     /* Verify alignment if needed.  */
     if ((dc->env->pvr.regs[2] & PVR2_UNALIGNED_EXC_MASK) && size > 1) {
         tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
         /* FIXME: if the alignment is wrong, we should restore the value
-         *        in memory.
+         *        in memory. One possible way to achieve this is to probe
+         *        the MMU prior to the memaccess, thay way we could put
+         *        the alignment checks in between the probe and the mem
+         *        access.
          */
         gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
                             tcg_const_tl(1), tcg_const_tl(size - 1));
@@ -969,29 +1233,41 @@ static void dec_bcc(DisasContext *dc)
         dc->delayed_branch = 2;
         dc->tb_flags |= D_FLAG;
         tcg_gen_st_tl(tcg_const_tl(dc->type_b && (dc->tb_flags & IMM_FLAG)),
-                      cpu_env, offsetof(CPUState, bimm));
+                      cpu_env, offsetof(CPUMBState, bimm));
     }
 
     if (dec_alu_op_b_is_small_imm(dc)) {
         int32_t offset = (int32_t)((int16_t)dc->imm); /* sign-extend.  */
 
         tcg_gen_movi_tl(env_btarget, dc->pc + offset);
+        dc->jmp = JMP_DIRECT_CC;
+        dc->jmp_pc = dc->pc + offset;
     } else {
+        dc->jmp = JMP_INDIRECT;
         tcg_gen_movi_tl(env_btarget, dc->pc);
         tcg_gen_add_tl(env_btarget, env_btarget, *(dec_alu_op_b(dc)));
     }
-    dc->jmp = JMP_INDIRECT;
     eval_cc(dc, cc, env_btaken, cpu_R[dc->ra], tcg_const_tl(0));
 }
 
 static void dec_br(DisasContext *dc)
 {
-    unsigned int dslot, link, abs;
+    unsigned int dslot, link, abs, mbar;
     int mem_index = cpu_mmu_index(dc->env);
 
     dslot = dc->ir & (1 << 20);
     abs = dc->ir & (1 << 19);
     link = dc->ir & (1 << 18);
+
+    /* Memory barrier.  */
+    mbar = (dc->ir >> 16) & 31;
+    if (mbar == 2 && dc->imm == 4) {
+        LOG_DIS("mbar %d\n", dc->rd);
+        /* Break the TB.  */
+        dc->cpustate_changed = 1;
+        return;
+    }
+
     LOG_DIS("br%s%s%s%s imm=%x\n",
              abs ? "a" : "", link ? "l" : "",
              dc->type_b ? "i" : "", dslot ? "d" : "",
@@ -1002,7 +1278,7 @@ static void dec_br(DisasContext *dc)
         dc->delayed_branch = 2;
         dc->tb_flags |= D_FLAG;
         tcg_gen_st_tl(tcg_const_tl(dc->type_b && (dc->tb_flags & IMM_FLAG)),
-                      cpu_env, offsetof(CPUState, bimm));
+                      cpu_env, offsetof(CPUMBState, bimm));
     }
     if (link && dc->rd)
         tcg_gen_movi_tl(cpu_R[dc->rd], dc->pc);
@@ -1101,7 +1377,7 @@ static void dec_rts(DisasContext *dc)
     dc->delayed_branch = 2;
     dc->tb_flags |= D_FLAG;
     tcg_gen_st_tl(tcg_const_tl(dc->type_b && (dc->tb_flags & IMM_FLAG)),
-                  cpu_env, offsetof(CPUState, bimm));
+                  cpu_env, offsetof(CPUMBState, bimm));
 
     if (i_bit) {
         LOG_DIS("rtid ir=%x\n", dc->ir);
@@ -1130,6 +1406,7 @@ static void dec_rts(DisasContext *dc)
     } else
         LOG_DIS("rts ir=%x\n", dc->ir);
 
+    dc->jmp = JMP_INDIRECT;
     tcg_gen_movi_tl(env_btaken, 1);
     tcg_gen_add_tl(env_btarget, cpu_R[dc->ra], *(dec_alu_op_b(dc)));
 }
@@ -1257,6 +1534,42 @@ static void dec_null(DisasContext *dc)
     dc->abort_at_next_insn = 1;
 }
 
+/* Insns connected to FSL or AXI stream attached devices.  */
+static void dec_stream(DisasContext *dc)
+{
+    int mem_index = cpu_mmu_index(dc->env);
+    TCGv_i32 t_id, t_ctrl;
+    int ctrl;
+
+    LOG_DIS("%s%s imm=%x\n", dc->rd ? "get" : "put",
+            dc->type_b ? "" : "d", dc->imm);
+
+    if ((dc->tb_flags & MSR_EE_FLAG) && (mem_index == MMU_USER_IDX)) {
+        tcg_gen_movi_tl(cpu_SR[SR_ESR], ESR_EC_PRIVINSN);
+        t_gen_raise_exception(dc, EXCP_HW_EXCP);
+        return;
+    }
+
+    t_id = tcg_temp_new();
+    if (dc->type_b) {
+        tcg_gen_movi_tl(t_id, dc->imm & 0xf);
+        ctrl = dc->imm >> 10;
+    } else {
+        tcg_gen_andi_tl(t_id, cpu_R[dc->rb], 0xf);
+        ctrl = dc->imm >> 5;
+    }
+
+    t_ctrl = tcg_const_tl(ctrl);
+
+    if (dc->rd == 0) {
+        gen_helper_put(t_id, t_ctrl, cpu_R[dc->ra]);
+    } else {
+        gen_helper_get(cpu_R[dc->rd], t_id, t_ctrl);
+    }
+    tcg_temp_free(t_id);
+    tcg_temp_free(t_ctrl);
+}
+
 static struct decoder_info {
     struct {
         uint32_t bits;
@@ -1281,6 +1594,7 @@ static struct decoder_info {
     {DEC_MUL, dec_mul},
     {DEC_DIV, dec_div},
     {DEC_MSR, dec_msr},
+    {DEC_STREAM, dec_stream},
     {{0, 0}, dec_null}
 };
 
@@ -1329,7 +1643,7 @@ static inline void decode(DisasContext *dc)
     }
 }
 
-static void check_breakpoint(CPUState *env, DisasContext *dc)
+static void check_breakpoint(CPUMBState *env, DisasContext *dc)
 {
     CPUBreakpoint *bp;
 
@@ -1345,7 +1659,7 @@ static void check_breakpoint(CPUState *env, DisasContext *dc)
 
 /* generate intermediate code for basic block 'tb'.  */
 static void
-gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
+gen_intermediate_code_internal(CPUMBState *env, TranslationBlock *tb,
                                int search_pc)
 {
     uint16_t *gen_opc_end;
@@ -1370,6 +1684,9 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
     dc->is_jmp = DISAS_NEXT;
     dc->jmp = 0;
     dc->delayed_branch = !!(dc->tb_flags & D_FLAG);
+    if (dc->delayed_branch) {
+        dc->jmp = JMP_INDIRECT;
+    }
     dc->pc = pc_start;
     dc->singlestep_enabled = env->singlestep_enabled;
     dc->cpustate_changed = 0;
@@ -1441,9 +1758,25 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
                 /* Clear the delay slot flag.  */
                 dc->tb_flags &= ~D_FLAG;
                 /* If it is a direct jump, try direct chaining.  */
-                if (dc->jmp != JMP_DIRECT) {
+                if (dc->jmp == JMP_INDIRECT) {
                     eval_cond_jmp(dc, env_btarget, tcg_const_tl(dc->pc));
                     dc->is_jmp = DISAS_JUMP;
+                } else if (dc->jmp == JMP_DIRECT) {
+                    t_sync_flags(dc);
+                    gen_goto_tb(dc, 0, dc->jmp_pc);
+                    dc->is_jmp = DISAS_TB_JUMP;
+                } else if (dc->jmp == JMP_DIRECT_CC) {
+                    int l1;
+
+                    t_sync_flags(dc);
+                    l1 = gen_new_label();
+                    /* Conditional jmp.  */
+                    tcg_gen_brcondi_tl(TCG_COND_NE, env_btaken, 0, l1);
+                    gen_goto_tb(dc, 1, dc->pc);
+                    gen_set_label(l1);
+                    gen_goto_tb(dc, 0, dc->jmp_pc);
+
+                    dc->is_jmp = DISAS_TB_JUMP;
                 }
                 break;
             }
@@ -1457,7 +1790,7 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
                  && num_insns < max_insns);
 
     npc = dc->pc;
-    if (dc->jmp == JMP_DIRECT) {
+    if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) {
         if (dc->tb_flags & D_FLAG) {
             dc->is_jmp = DISAS_UPDATE;
             tcg_gen_movi_tl(cpu_SR[SR_PC], npc);
@@ -1477,9 +1810,13 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
     t_sync_flags(dc);
 
     if (unlikely(env->singlestep_enabled)) {
-        t_gen_raise_exception(dc, EXCP_DEBUG);
-        if (dc->is_jmp == DISAS_NEXT)
+        TCGv_i32 tmp = tcg_const_i32(EXCP_DEBUG);
+
+        if (dc->is_jmp != DISAS_JUMP) {
             tcg_gen_movi_tl(cpu_SR[SR_PC], npc);
+        }
+        gen_helper_raise_exception(tmp);
+        tcg_temp_free_i32(tmp);
     } else {
         switch(dc->is_jmp) {
             case DISAS_NEXT:
@@ -1524,17 +1861,17 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
     assert(!dc->abort_at_next_insn);
 }
 
-void gen_intermediate_code (CPUState *env, struct TranslationBlock *tb)
+void gen_intermediate_code (CPUMBState *env, struct TranslationBlock *tb)
 {
     gen_intermediate_code_internal(env, tb, 0);
 }
 
-void gen_intermediate_code_pc (CPUState *env, struct TranslationBlock *tb)
+void gen_intermediate_code_pc (CPUMBState *env, struct TranslationBlock *tb)
 {
     gen_intermediate_code_internal(env, tb, 1);
 }
 
-void cpu_dump_state (CPUState *env, FILE *f, fprintf_function cpu_fprintf,
+void cpu_dump_state (CPUMBState *env, FILE *f, fprintf_function cpu_fprintf,
                      int flags)
 {
     int i;
@@ -1562,17 +1899,18 @@ void cpu_dump_state (CPUState *env, FILE *f, fprintf_function cpu_fprintf,
     cpu_fprintf(f, "\n\n");
 }
 
-CPUState *cpu_mb_init (const char *cpu_model)
+CPUMBState *cpu_mb_init (const char *cpu_model)
 {
-    CPUState *env;
+    MicroBlazeCPU *cpu;
+    CPUMBState *env;
     static int tcg_initialized = 0;
     int i;
 
-    env = qemu_mallocz(sizeof(CPUState));
+    cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
+    env = &cpu->env;
 
-    cpu_exec_init(env);
-    cpu_reset(env);
-    set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
+    cpu_reset(CPU(cpu));
+    qemu_init_vcpu(env);
 
     if (tcg_initialized)
         return env;
@@ -1582,28 +1920,28 @@ CPUState *cpu_mb_init (const char *cpu_model)
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
 
     env_debug = tcg_global_mem_new(TCG_AREG0, 
-                    offsetof(CPUState, debug),
+                    offsetof(CPUMBState, debug),
                     "debug0");
     env_iflags = tcg_global_mem_new(TCG_AREG0, 
-                    offsetof(CPUState, iflags),
+                    offsetof(CPUMBState, iflags),
                     "iflags");
     env_imm = tcg_global_mem_new(TCG_AREG0, 
-                    offsetof(CPUState, imm),
+                    offsetof(CPUMBState, imm),
                     "imm");
     env_btarget = tcg_global_mem_new(TCG_AREG0,
-                     offsetof(CPUState, btarget),
+                     offsetof(CPUMBState, btarget),
                      "btarget");
     env_btaken = tcg_global_mem_new(TCG_AREG0,
-                     offsetof(CPUState, btaken),
+                     offsetof(CPUMBState, btaken),
                      "btaken");
     for (i = 0; i < ARRAY_SIZE(cpu_R); i++) {
         cpu_R[i] = tcg_global_mem_new(TCG_AREG0,
-                          offsetof(CPUState, regs[i]),
+                          offsetof(CPUMBState, regs[i]),
                           regnames[i]);
     }
     for (i = 0; i < ARRAY_SIZE(cpu_SR); i++) {
         cpu_SR[i] = tcg_global_mem_new(TCG_AREG0,
-                          offsetof(CPUState, sregs[i]),
+                          offsetof(CPUMBState, sregs[i]),
                           special_regnames[i]);
     }
 #define GEN_HELPER 2
@@ -1612,57 +1950,12 @@ CPUState *cpu_mb_init (const char *cpu_model)
     return env;
 }
 
-void cpu_reset (CPUState *env)
+void cpu_state_reset(CPUMBState *env)
 {
-    if (qemu_loglevel_mask(CPU_LOG_RESET)) {
-        qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
-        log_cpu_state(env, 0);
-    }
-
-    memset(env, 0, offsetof(CPUMBState, breakpoints));
-    tlb_flush(env, 1);
-
-    env->pvr.regs[0] = PVR0_PVR_FULL_MASK \
-                       | PVR0_USE_BARREL_MASK \
-                       | PVR0_USE_DIV_MASK \
-                       | PVR0_USE_HW_MUL_MASK \
-                       | PVR0_USE_EXC_MASK \
-                       | PVR0_USE_ICACHE_MASK \
-                       | PVR0_USE_DCACHE_MASK \
-                       | PVR0_USE_MMU \
-                       | (0xb << 8);
-    env->pvr.regs[2] = PVR2_D_OPB_MASK \
-                        | PVR2_D_LMB_MASK \
-                        | PVR2_I_OPB_MASK \
-                        | PVR2_I_LMB_MASK \
-                        | PVR2_USE_MSR_INSTR \
-                        | PVR2_USE_PCMP_INSTR \
-                        | PVR2_USE_BARREL_MASK \
-                        | PVR2_USE_DIV_MASK \
-                        | PVR2_USE_HW_MUL_MASK \
-                        | PVR2_USE_MUL64_MASK \
-                        | PVR2_USE_FPU_MASK \
-                        | PVR2_USE_FPU2_MASK \
-                        | PVR2_FPU_EXC_MASK \
-                        | 0;
-    env->pvr.regs[10] = 0x0c000000; /* Default to spartan 3a dsp family.  */
-    env->pvr.regs[11] = PVR11_USE_MMU | (16 << 17);
-
-#if defined(CONFIG_USER_ONLY)
-    /* start in user mode with interrupts enabled.  */
-    env->sregs[SR_MSR] = MSR_EE | MSR_IE | MSR_VM | MSR_UM;
-    env->pvr.regs[10] = 0x0c000000; /* Spartan 3a dsp.  */
-#else
-    env->sregs[SR_MSR] = 0;
-    mmu_init(&env->mmu);
-    env->mmu.c_mmu = 3;
-    env->mmu.c_mmu_tlb_access = 3;
-    env->mmu.c_mmu_zones = 16;
-#endif
+    cpu_reset(ENV_GET_CPU(env));
 }
 
-void gen_pc_load(CPUState *env, struct TranslationBlock *tb,
-                 unsigned long searched_pc, int pc_pos, void *puc)
+void restore_state_to_opc(CPUMBState *env, TranslationBlock *tb, int pc_pos)
 {
     env->sregs[SR_PC] = gen_opc_pc[pc_pos];
 }