]> git.proxmox.com Git - qemu.git/blobdiff - exec.c
memory: move endianness compensation to memory core
[qemu.git] / exec.c
diff --git a/exec.c b/exec.c
index 238c173de4d55e2ebd2271963f8a016cc3e8e245..507d37c61761d71d8b39d0073724ceb0200ff6d4 100644 (file)
--- a/exec.c
+++ b/exec.c
@@ -26,7 +26,6 @@
 
 #include "qemu-common.h"
 #include "cpu.h"
-#include "exec-all.h"
 #include "tcg.h"
 #include "hw/hw.h"
 #include "hw/qdev.h"
@@ -34,6 +33,8 @@
 #include "kvm.h"
 #include "hw/xen.h"
 #include "qemu-timer.h"
+#include "memory.h"
+#include "exec-memory.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -56,6 +57,9 @@
 #include "trace.h"
 #endif
 
+#define WANT_EXEC_OBSOLETE
+#include "exec-obsolete.h"
+
 //#define DEBUG_TB_INVALIDATE
 //#define DEBUG_FLUSH
 //#define DEBUG_TLB
@@ -109,20 +113,21 @@ static uint8_t *code_gen_ptr;
 int phys_ram_fd;
 static int in_migration;
 
-RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
+RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
+
+static MemoryRegion *system_memory;
+static MemoryRegion *system_io;
+
 #endif
 
 CPUState *first_cpu;
 /* current CPU in the current thread. It is only valid inside
    cpu_exec() */
-CPUState *cpu_single_env;
+DEFINE_TLS(CPUState *,cpu_single_env);
 /* 0 = Do not count executed instructions.
    1 = Precise instruction counting.
    2 = Adaptive rate instruction counting.  */
 int use_icount = 0;
-/* Current instruction counter.  While executing translated code this may
-   include some instructions that have not yet been executed.  */
-int64_t qemu_icount;
 
 typedef struct PageDesc {
     /* list of TBs intersecting this ram page */
@@ -178,7 +183,6 @@ typedef struct PageDesc {
 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
 
 unsigned long qemu_real_host_page_size;
-unsigned long qemu_host_page_bits;
 unsigned long qemu_host_page_size;
 unsigned long qemu_host_page_mask;
 
@@ -198,6 +202,7 @@ typedef struct PhysPageDesc {
 static void *l1_phys_map[P_L1_SIZE];
 
 static void io_mem_init(void);
+static void memory_map_init(void);
 
 /* io memory support */
 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
@@ -268,9 +273,6 @@ static void page_init(void)
         qemu_host_page_size = qemu_real_host_page_size;
     if (qemu_host_page_size < TARGET_PAGE_SIZE)
         qemu_host_page_size = TARGET_PAGE_SIZE;
-    qemu_host_page_bits = 0;
-    while ((1 << qemu_host_page_bits) < qemu_host_page_size)
-        qemu_host_page_bits++;
     qemu_host_page_mask = ~(qemu_host_page_size - 1);
 
 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
@@ -346,7 +348,7 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
     int i;
 
 #if defined(CONFIG_USER_ONLY)
-    /* We can't use qemu_malloc because it may recurse into a locked mutex. */
+    /* We can't use g_malloc because it may recurse into a locked mutex. */
 # define ALLOC(P, SIZE)                                 \
     do {                                                \
         P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
@@ -354,7 +356,7 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
     } while (0)
 #else
 # define ALLOC(P, SIZE) \
-    do { P = qemu_mallocz(SIZE); } while (0)
+    do { P = g_malloc0(SIZE); } while (0)
 #endif
 
     /* Level 1.  Always allocated.  */
@@ -411,7 +413,7 @@ static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
             if (!alloc) {
                 return NULL;
             }
-            *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
+            *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
         }
         lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
     }
@@ -419,16 +421,17 @@ static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
     pd = *lp;
     if (pd == NULL) {
         int i;
+        int first_index = index & ~(L2_SIZE - 1);
 
         if (!alloc) {
             return NULL;
         }
 
-        *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
+        *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
 
         for (i = 0; i < L2_SIZE; i++) {
             pd[i].phys_offset = IO_MEM_UNASSIGNED;
-            pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
+            pd[i].region_offset = (first_index + i) << TARGET_PAGE_BITS;
         }
     }
 
@@ -470,7 +473,6 @@ static void code_gen_alloc(unsigned long tb_size)
     code_gen_buffer_size = tb_size;
     if (code_gen_buffer_size == 0) {
 #if defined(CONFIG_USER_ONLY)
-        /* in user mode, phys_ram_size is not meaningful */
         code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 #else
         /* XXX: needs adjustments */
@@ -499,9 +501,7 @@ static void code_gen_alloc(unsigned long tb_size)
         if (code_gen_buffer_size > (512 * 1024 * 1024))
             code_gen_buffer_size = (512 * 1024 * 1024);
 #elif defined(__arm__)
-        /* Map the buffer below 32M, so we can use direct calls and branches */
-        flags |= MAP_FIXED;
-        start = (void *) 0x01000000UL;
+        /* Keep the buffer no bigger than 16GB to branch between blocks */
         if (code_gen_buffer_size > 16 * 1024 * 1024)
             code_gen_buffer_size = 16 * 1024 * 1024;
 #elif defined(__s390x__)
@@ -521,7 +521,8 @@ static void code_gen_alloc(unsigned long tb_size)
         }
     }
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__)
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__)
     {
         int flags;
         void *addr = NULL;
@@ -551,29 +552,26 @@ static void code_gen_alloc(unsigned long tb_size)
         }
     }
 #else
-    code_gen_buffer = qemu_malloc(code_gen_buffer_size);
+    code_gen_buffer = g_malloc(code_gen_buffer_size);
     map_exec(code_gen_buffer, code_gen_buffer_size);
 #endif
 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
     map_exec(code_gen_prologue, sizeof(code_gen_prologue));
-    code_gen_buffer_max_size = code_gen_buffer_size - 
-        (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
+    code_gen_buffer_max_size = code_gen_buffer_size -
+        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
     code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-    tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
    (in bytes) allocated to the translation buffer. Zero means default
    size. */
-void cpu_exec_init_all(unsigned long tb_size)
+void tcg_exec_init(unsigned long tb_size)
 {
     cpu_gen_init();
     code_gen_alloc(tb_size);
     code_gen_ptr = code_gen_buffer;
     page_init();
-#if !defined(CONFIG_USER_ONLY)
-    io_mem_init();
-#endif
 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
     /* There's no guest base to take into account, so go ahead and
        initialize the prologue now.  */
@@ -581,6 +579,19 @@ void cpu_exec_init_all(unsigned long tb_size)
 #endif
 }
 
+bool tcg_enabled(void)
+{
+    return code_gen_buffer != NULL;
+}
+
+void cpu_exec_init_all(void)
+{
+#if !defined(CONFIG_USER_ONLY)
+    memory_map_init();
+    io_mem_init();
+#endif
+}
+
 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 
 static int cpu_common_post_load(void *opaque, int version_id)
@@ -684,7 +695,7 @@ void tb_free(TranslationBlock *tb)
 static inline void invalidate_page_bitmap(PageDesc *p)
 {
     if (p->code_bitmap) {
-        qemu_free(p->code_bitmap);
+        g_free(p->code_bitmap);
         p->code_bitmap = NULL;
     }
     p->code_write_count = 0;
@@ -944,7 +955,7 @@ static void build_page_bitmap(PageDesc *p)
     int n, tb_start, tb_end;
     TranslationBlock *tb;
 
-    p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
+    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
 
     tb = p->first_tb;
     while (tb != NULL) {
@@ -1208,12 +1219,16 @@ static inline void tb_alloc_page(TranslationBlock *tb,
                                  unsigned int n, tb_page_addr_t page_addr)
 {
     PageDesc *p;
-    TranslationBlock *last_first_tb;
+#ifndef CONFIG_USER_ONLY
+    bool page_already_protected;
+#endif
 
     tb->page_addr[n] = page_addr;
     p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
     tb->page_next[n] = p->first_tb;
-    last_first_tb = p->first_tb;
+#ifndef CONFIG_USER_ONLY
+    page_already_protected = p->first_tb != NULL;
+#endif
     p->first_tb = (TranslationBlock *)((long)tb | n);
     invalidate_page_bitmap(p);
 
@@ -1249,7 +1264,7 @@ static inline void tb_alloc_page(TranslationBlock *tb,
     /* if some code is already present, then the pages are already
        protected. So we handle the case where only the first TB is
        allocated in a physical page */
-    if (!last_first_tb) {
+    if (!page_already_protected) {
         tlb_protect_code(page_addr);
     }
 #endif
@@ -1427,7 +1442,7 @@ int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
         return -EINVAL;
     }
-    wp = qemu_malloc(sizeof(*wp));
+    wp = g_malloc(sizeof(*wp));
 
     wp->vaddr = addr;
     wp->len_mask = len_mask;
@@ -1470,7 +1485,7 @@ void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
 
     tlb_flush_page(env, watchpoint->vaddr);
 
-    qemu_free(watchpoint);
+    g_free(watchpoint);
 }
 
 /* Remove all matching watchpoints.  */
@@ -1492,7 +1507,7 @@ int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
 #if defined(TARGET_HAS_ICE)
     CPUBreakpoint *bp;
 
-    bp = qemu_malloc(sizeof(*bp));
+    bp = g_malloc(sizeof(*bp));
 
     bp->pc = pc;
     bp->flags = flags;
@@ -1539,7 +1554,7 @@ void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
 
     breakpoint_invalidate(env, breakpoint->pc);
 
-    qemu_free(breakpoint);
+    g_free(breakpoint);
 #endif
 }
 
@@ -1590,8 +1605,10 @@ void cpu_set_log(int log_flags)
             static char logfile_buf[4096];
             setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
         }
-#elif !defined(_WIN32)
-        /* Win32 doesn't support line-buffering and requires size >= 2 */
+#elif defined(_WIN32)
+        /* Win32 doesn't support line-buffering, so use unbuffered output. */
+        setvbuf(logfile, NULL, _IONBF, 0);
+#else
         setvbuf(logfile, NULL, _IOLBF, 0);
 #endif
         log_append = 1;
@@ -1715,97 +1732,6 @@ const CPULogItem cpu_log_items[] = {
     { 0, NULL, NULL },
 };
 
-#ifndef CONFIG_USER_ONLY
-static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
-    = QLIST_HEAD_INITIALIZER(memory_client_list);
-
-static void cpu_notify_set_memory(target_phys_addr_t start_addr,
-                                  ram_addr_t size,
-                                  ram_addr_t phys_offset,
-                                  bool log_dirty)
-{
-    CPUPhysMemoryClient *client;
-    QLIST_FOREACH(client, &memory_client_list, list) {
-        client->set_memory(client, start_addr, size, phys_offset, log_dirty);
-    }
-}
-
-static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
-                                        target_phys_addr_t end)
-{
-    CPUPhysMemoryClient *client;
-    QLIST_FOREACH(client, &memory_client_list, list) {
-        int r = client->sync_dirty_bitmap(client, start, end);
-        if (r < 0)
-            return r;
-    }
-    return 0;
-}
-
-static int cpu_notify_migration_log(int enable)
-{
-    CPUPhysMemoryClient *client;
-    QLIST_FOREACH(client, &memory_client_list, list) {
-        int r = client->migration_log(client, enable);
-        if (r < 0)
-            return r;
-    }
-    return 0;
-}
-
-/* The l1_phys_map provides the upper P_L1_BITs of the guest physical
- * address.  Each intermediate table provides the next L2_BITs of guest
- * physical address space.  The number of levels vary based on host and
- * guest configuration, making it efficient to build the final guest
- * physical address by seeding the L1 offset and shifting and adding in
- * each L2 offset as we recurse through them. */
-static void phys_page_for_each_1(CPUPhysMemoryClient *client,
-                                 int level, void **lp, target_phys_addr_t addr)
-{
-    int i;
-
-    if (*lp == NULL) {
-        return;
-    }
-    if (level == 0) {
-        PhysPageDesc *pd = *lp;
-        addr <<= L2_BITS + TARGET_PAGE_BITS;
-        for (i = 0; i < L2_SIZE; ++i) {
-            if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
-                client->set_memory(client, addr | i << TARGET_PAGE_BITS,
-                                   TARGET_PAGE_SIZE, pd[i].phys_offset, false);
-            }
-        }
-    } else {
-        void **pp = *lp;
-        for (i = 0; i < L2_SIZE; ++i) {
-            phys_page_for_each_1(client, level - 1, pp + i,
-                                 (addr << L2_BITS) | i);
-        }
-    }
-}
-
-static void phys_page_for_each(CPUPhysMemoryClient *client)
-{
-    int i;
-    for (i = 0; i < P_L1_SIZE; ++i) {
-        phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
-                             l1_phys_map + i, i);
-    }
-}
-
-void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
-{
-    QLIST_INSERT_HEAD(&memory_client_list, client, list);
-    phys_page_for_each(client);
-}
-
-void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
-{
-    QLIST_REMOVE(client, list);
-}
-#endif
-
 static int cmp1(const char *s1, int n, const char *s2)
 {
     if (strlen(s2) != n)
@@ -2082,54 +2008,9 @@ int cpu_physical_memory_set_dirty_tracking(int enable)
 {
     int ret = 0;
     in_migration = enable;
-    ret = cpu_notify_migration_log(!!enable);
     return ret;
 }
 
-int cpu_physical_memory_get_dirty_tracking(void)
-{
-    return in_migration;
-}
-
-int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
-                                   target_phys_addr_t end_addr)
-{
-    int ret;
-
-    ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
-    return ret;
-}
-
-int cpu_physical_log_start(target_phys_addr_t start_addr,
-                           ram_addr_t size)
-{
-    CPUPhysMemoryClient *client;
-    QLIST_FOREACH(client, &memory_client_list, list) {
-        if (client->log_start) {
-            int r = client->log_start(client, start_addr, size);
-            if (r < 0) {
-                return r;
-            }
-        }
-    }
-    return 0;
-}
-
-int cpu_physical_log_stop(target_phys_addr_t start_addr,
-                          ram_addr_t size)
-{
-    CPUPhysMemoryClient *client;
-    QLIST_FOREACH(client, &memory_client_list, list) {
-        if (client->log_stop) {
-            int r = client->log_stop(client, start_addr, size);
-            if (r < 0) {
-                return r;
-            }
-        }
-    }
-    return 0;
-}
-
 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
 {
     ram_addr_t ram_addr;
@@ -2632,7 +2513,6 @@ void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
     subpage_t *subpage;
 
     assert(size);
-    cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
 
     if (phys_offset == IO_MEM_UNASSIGNED) {
         region_offset = start_addr;
@@ -2705,17 +2585,6 @@ void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
     }
 }
 
-/* XXX: temporary until new memory mapping API */
-ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
-{
-    PhysPageDesc *p;
-
-    p = phys_page_find(addr >> TARGET_PAGE_BITS);
-    if (!p)
-        return IO_MEM_UNASSIGNED;
-    return p->phys_offset;
-}
-
 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
 {
     if (kvm_enabled())
@@ -2833,13 +2702,13 @@ static void *file_ram_alloc(RAMBlock *block,
 static ram_addr_t find_ram_offset(ram_addr_t size)
 {
     RAMBlock *block, *next_block;
-    ram_addr_t offset = 0, mingap = ULONG_MAX;
+    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
 
     if (QLIST_EMPTY(&ram_list.blocks))
         return 0;
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
-        ram_addr_t end, next = ULONG_MAX;
+        ram_addr_t end, next = RAM_ADDR_MAX;
 
         end = block->offset + block->length;
 
@@ -2849,10 +2718,17 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
             }
         }
         if (next - end >= size && next - end < mingap) {
-            offset =  end;
+            offset = end;
             mingap = next - end;
         }
     }
+
+    if (offset == RAM_ADDR_MAX) {
+        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
+                (uint64_t)size);
+        abort();
+    }
+
     return offset;
 }
 
@@ -2867,31 +2743,47 @@ static ram_addr_t last_ram_offset(void)
     return last;
 }
 
-ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
-                                   ram_addr_t size, void *host)
+void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
 {
     RAMBlock *new_block, *block;
 
-    size = TARGET_PAGE_ALIGN(size);
-    new_block = qemu_mallocz(sizeof(*new_block));
+    new_block = NULL;
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (block->offset == addr) {
+            new_block = block;
+            break;
+        }
+    }
+    assert(new_block);
+    assert(!new_block->idstr[0]);
 
     if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
         char *id = dev->parent_bus->info->get_dev_path(dev);
         if (id) {
             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
-            qemu_free(id);
+            g_free(id);
         }
     }
     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
-        if (!strcmp(block->idstr, new_block->idstr)) {
+        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                     new_block->idstr);
             abort();
         }
     }
+}
+
+ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
+                                   MemoryRegion *mr)
+{
+    RAMBlock *new_block;
 
+    size = TARGET_PAGE_ALIGN(size);
+    new_block = g_malloc0(sizeof(*new_block));
+
+    new_block->mr = mr;
     new_block->offset = find_ram_offset(size);
     if (host) {
         new_block->host = host;
@@ -2923,8 +2815,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
                 abort();
             }
 #else
-            if (xen_mapcache_enabled()) {
-                xen_ram_alloc(new_block->offset, size);
+            if (xen_enabled()) {
+                xen_ram_alloc(new_block->offset, size, mr);
             } else {
                 new_block->host = qemu_vmalloc(size);
             }
@@ -2936,7 +2828,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
 
-    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
+    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
                                        last_ram_offset() >> TARGET_PAGE_BITS);
     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
            0xff, size >> TARGET_PAGE_BITS);
@@ -2947,9 +2839,9 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
     return new_block->offset;
 }
 
-ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
+ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
 {
-    return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
+    return qemu_ram_alloc_from_ptr(size, NULL, mr);
 }
 
 void qemu_ram_free_from_ptr(ram_addr_t addr)
@@ -2959,7 +2851,7 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
-            qemu_free(block);
+            g_free(block);
             return;
         }
     }
@@ -2989,14 +2881,14 @@ void qemu_ram_free(ram_addr_t addr)
 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
                 munmap(block->host, block->length);
 #else
-                if (xen_mapcache_enabled()) {
-                    qemu_invalidate_entry(block->host);
+                if (xen_enabled()) {
+                    xen_invalidate_map_cache_entry(block->host);
                 } else {
                     qemu_vfree(block->host);
                 }
 #endif
             }
-            qemu_free(block);
+            g_free(block);
             return;
         }
     }
@@ -3051,7 +2943,8 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
 #endif
                 }
                 if (area != vaddr) {
-                    fprintf(stderr, "Could not remap addr: %lx@%lx\n",
+                    fprintf(stderr, "Could not remap addr: "
+                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
                             length, addr);
                     exit(1);
                 }
@@ -3082,14 +2975,16 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
                 QLIST_REMOVE(block, next);
                 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
             }
-            if (xen_mapcache_enabled()) {
+            if (xen_enabled()) {
                 /* We need to check if the requested address is in the RAM
                  * because we don't want to map the entire memory in QEMU.
+                 * In that case just map until the end of the page.
                  */
                 if (block->offset == 0) {
-                    return qemu_map_cache(addr, 0, 1);
+                    return xen_map_cache(addr, 0, 0);
                 } else if (block->host == NULL) {
-                    block->host = qemu_map_cache(block->offset, block->length, 1);
+                    block->host =
+                        xen_map_cache(block->offset, block->length, 1);
                 }
             }
             return block->host + (addr - block->offset);
@@ -3111,14 +3006,16 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
-            if (xen_mapcache_enabled()) {
+            if (xen_enabled()) {
                 /* We need to check if the requested address is in the RAM
                  * because we don't want to map the entire memory in QEMU.
+                 * In that case just map until the end of the page.
                  */
                 if (block->offset == 0) {
-                    return qemu_map_cache(addr, 0, 1);
+                    return xen_map_cache(addr, 0, 0);
                 } else if (block->host == NULL) {
-                    block->host = qemu_map_cache(block->offset, block->length, 1);
+                    block->host =
+                        xen_map_cache(block->offset, block->length, 1);
                 }
             }
             return block->host + (addr - block->offset);
@@ -3133,11 +3030,14 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
 
 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
  * but takes a size argument */
-void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size)
+void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
 {
-    if (xen_mapcache_enabled())
-        return qemu_map_cache(addr, *size, 1);
-    else {
+    if (*size == 0) {
+        return NULL;
+    }
+    if (xen_enabled()) {
+        return xen_map_cache(addr, *size, 1);
+    } else {
         RAMBlock *block;
 
         QLIST_FOREACH(block, &ram_list.blocks, next) {
@@ -3150,19 +3050,12 @@ void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size)
 
         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
         abort();
-
-        *size = 0;
-        return NULL;
     }
 }
 
 void qemu_put_ram_ptr(void *addr)
 {
     trace_qemu_put_ram_ptr(addr);
-
-    if (xen_mapcache_enabled()) {
-            qemu_invalidate_entry(block->host);
-    }
 }
 
 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
@@ -3170,6 +3063,11 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
     RAMBlock *block;
     uint8_t *host = ptr;
 
+    if (xen_enabled()) {
+        *ram_addr = xen_ram_addr_from_mapcache(ptr);
+        return 0;
+    }
+
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         /* This case append when the block is not mapped. */
         if (block->host == NULL) {
@@ -3181,11 +3079,6 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         }
     }
 
-    if (xen_mapcache_enabled()) {
-        *ram_addr = qemu_ram_addr_from_mapcache(ptr);
-        return 0;
-    }
-
     return -1;
 }
 
@@ -3208,7 +3101,7 @@ static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 1);
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
 #endif
     return 0;
 }
@@ -3219,7 +3112,7 @@ static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 2);
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
 #endif
     return 0;
 }
@@ -3230,7 +3123,7 @@ static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 4);
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
 #endif
     return 0;
 }
@@ -3241,7 +3134,7 @@ static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 1);
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
 #endif
 }
 
@@ -3251,7 +3144,7 @@ static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 2);
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
 #endif
 }
 
@@ -3261,7 +3154,7 @@ static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 4);
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
 #endif
 }
 
@@ -3522,6 +3415,63 @@ static CPUWriteMemoryFunc * const subpage_write[] = {
     &subpage_writel,
 };
 
+static uint32_t subpage_ram_readb(void *opaque, target_phys_addr_t addr)
+{
+    ram_addr_t raddr = addr;
+    void *ptr = qemu_get_ram_ptr(raddr);
+    return ldub_p(ptr);
+}
+
+static void subpage_ram_writeb(void *opaque, target_phys_addr_t addr,
+                               uint32_t value)
+{
+    ram_addr_t raddr = addr;
+    void *ptr = qemu_get_ram_ptr(raddr);
+    stb_p(ptr, value);
+}
+
+static uint32_t subpage_ram_readw(void *opaque, target_phys_addr_t addr)
+{
+    ram_addr_t raddr = addr;
+    void *ptr = qemu_get_ram_ptr(raddr);
+    return lduw_p(ptr);
+}
+
+static void subpage_ram_writew(void *opaque, target_phys_addr_t addr,
+                               uint32_t value)
+{
+    ram_addr_t raddr = addr;
+    void *ptr = qemu_get_ram_ptr(raddr);
+    stw_p(ptr, value);
+}
+
+static uint32_t subpage_ram_readl(void *opaque, target_phys_addr_t addr)
+{
+    ram_addr_t raddr = addr;
+    void *ptr = qemu_get_ram_ptr(raddr);
+    return ldl_p(ptr);
+}
+
+static void subpage_ram_writel(void *opaque, target_phys_addr_t addr,
+                               uint32_t value)
+{
+    ram_addr_t raddr = addr;
+    void *ptr = qemu_get_ram_ptr(raddr);
+    stl_p(ptr, value);
+}
+
+static CPUReadMemoryFunc * const subpage_ram_read[] = {
+    &subpage_ram_readb,
+    &subpage_ram_readw,
+    &subpage_ram_readl,
+};
+
+static CPUWriteMemoryFunc * const subpage_ram_write[] = {
+    &subpage_ram_writeb,
+    &subpage_ram_writew,
+    &subpage_ram_writel,
+};
+
 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
                              ram_addr_t memory, ram_addr_t region_offset)
 {
@@ -3535,8 +3485,9 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
            mmio, start, end, idx, eidx, memory);
 #endif
-    if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
-        memory = IO_MEM_UNASSIGNED;
+    if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
+        memory = IO_MEM_SUBPAGE_RAM;
+    }
     memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     for (; idx <= eidx; idx++) {
         mmio->sub_io_index[idx] = memory;
@@ -3553,11 +3504,10 @@ static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
     subpage_t *mmio;
     int subpage_memory;
 
-    mmio = qemu_mallocz(sizeof(subpage_t));
+    mmio = g_malloc0(sizeof(subpage_t));
 
     mmio->base = base;
-    subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
-                                            DEVICE_NATIVE_ENDIAN);
+    subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio);
 #if defined(DEBUG_SUBPAGE)
     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
@@ -3581,106 +3531,6 @@ static int get_free_io_mem_idx(void)
     return -1;
 }
 
-/*
- * Usually, devices operate in little endian mode. There are devices out
- * there that operate in big endian too. Each device gets byte swapped
- * mmio if plugged onto a CPU that does the other endianness.
- *
- * CPU          Device           swap?
- *
- * little       little           no
- * little       big              yes
- * big          little           yes
- * big          big              no
- */
-
-typedef struct SwapEndianContainer {
-    CPUReadMemoryFunc *read[3];
-    CPUWriteMemoryFunc *write[3];
-    void *opaque;
-} SwapEndianContainer;
-
-static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
-{
-    uint32_t val;
-    SwapEndianContainer *c = opaque;
-    val = c->read[0](c->opaque, addr);
-    return val;
-}
-
-static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
-{
-    uint32_t val;
-    SwapEndianContainer *c = opaque;
-    val = bswap16(c->read[1](c->opaque, addr));
-    return val;
-}
-
-static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
-{
-    uint32_t val;
-    SwapEndianContainer *c = opaque;
-    val = bswap32(c->read[2](c->opaque, addr));
-    return val;
-}
-
-static CPUReadMemoryFunc * const swapendian_readfn[3]={
-    swapendian_mem_readb,
-    swapendian_mem_readw,
-    swapendian_mem_readl
-};
-
-static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
-                                  uint32_t val)
-{
-    SwapEndianContainer *c = opaque;
-    c->write[0](c->opaque, addr, val);
-}
-
-static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
-                                  uint32_t val)
-{
-    SwapEndianContainer *c = opaque;
-    c->write[1](c->opaque, addr, bswap16(val));
-}
-
-static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
-                                  uint32_t val)
-{
-    SwapEndianContainer *c = opaque;
-    c->write[2](c->opaque, addr, bswap32(val));
-}
-
-static CPUWriteMemoryFunc * const swapendian_writefn[3]={
-    swapendian_mem_writeb,
-    swapendian_mem_writew,
-    swapendian_mem_writel
-};
-
-static void swapendian_init(int io_index)
-{
-    SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
-    int i;
-
-    /* Swap mmio for big endian targets */
-    c->opaque = io_mem_opaque[io_index];
-    for (i = 0; i < 3; i++) {
-        c->read[i] = io_mem_read[io_index][i];
-        c->write[i] = io_mem_write[io_index][i];
-
-        io_mem_read[io_index][i] = swapendian_readfn[i];
-        io_mem_write[io_index][i] = swapendian_writefn[i];
-    }
-    io_mem_opaque[io_index] = c;
-}
-
-static void swapendian_del(int io_index)
-{
-    if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
-        qemu_free(io_mem_opaque[io_index]);
-    }
-}
-
 /* mem_read and mem_write are arrays of functions containing the
    function to access byte (index 0), word (index 1) and dword (index
    2). Functions can be omitted with a NULL function pointer.
@@ -3691,7 +3541,7 @@ static void swapendian_del(int io_index)
 static int cpu_register_io_memory_fixed(int io_index,
                                         CPUReadMemoryFunc * const *mem_read,
                                         CPUWriteMemoryFunc * const *mem_write,
-                                        void *opaque, enum device_endian endian)
+                                        void *opaque)
 {
     int i;
 
@@ -3715,30 +3565,14 @@ static int cpu_register_io_memory_fixed(int io_index,
     }
     io_mem_opaque[io_index] = opaque;
 
-    switch (endian) {
-    case DEVICE_BIG_ENDIAN:
-#ifndef TARGET_WORDS_BIGENDIAN
-        swapendian_init(io_index);
-#endif
-        break;
-    case DEVICE_LITTLE_ENDIAN:
-#ifdef TARGET_WORDS_BIGENDIAN
-        swapendian_init(io_index);
-#endif
-        break;
-    case DEVICE_NATIVE_ENDIAN:
-    default:
-        break;
-    }
-
     return (io_index << IO_MEM_SHIFT);
 }
 
 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
                            CPUWriteMemoryFunc * const *mem_write,
-                           void *opaque, enum device_endian endian)
+                           void *opaque)
 {
-    return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
+    return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
 }
 
 void cpu_unregister_io_memory(int io_table_address)
@@ -3746,8 +3580,6 @@ void cpu_unregister_io_memory(int io_table_address)
     int i;
     int io_index = io_table_address >> IO_MEM_SHIFT;
 
-    swapendian_del(io_index);
-
     for (i=0;i < 3; i++) {
         io_mem_read[io_index][i] = unassigned_mem_read[i];
         io_mem_write[io_index][i] = unassigned_mem_write[i];
@@ -3761,20 +3593,39 @@ static void io_mem_init(void)
     int i;
 
     cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
-                                 unassigned_mem_write, NULL,
-                                 DEVICE_NATIVE_ENDIAN);
+                                 unassigned_mem_write, NULL);
     cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
-                                 unassigned_mem_write, NULL,
-                                 DEVICE_NATIVE_ENDIAN);
+                                 unassigned_mem_write, NULL);
     cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
-                                 notdirty_mem_write, NULL,
-                                 DEVICE_NATIVE_ENDIAN);
+                                 notdirty_mem_write, NULL);
+    cpu_register_io_memory_fixed(IO_MEM_SUBPAGE_RAM, subpage_ram_read,
+                                 subpage_ram_write, NULL);
     for (i=0; i<5; i++)
         io_mem_used[i] = 1;
 
     io_mem_watch = cpu_register_io_memory(watch_mem_read,
-                                          watch_mem_write, NULL,
-                                          DEVICE_NATIVE_ENDIAN);
+                                          watch_mem_write, NULL);
+}
+
+static void memory_map_init(void)
+{
+    system_memory = g_malloc(sizeof(*system_memory));
+    memory_region_init(system_memory, "system", INT64_MAX);
+    set_system_memory_map(system_memory);
+
+    system_io = g_malloc(sizeof(*system_io));
+    memory_region_init(system_io, "io", 65536);
+    set_system_io_map(system_io);
+}
+
+MemoryRegion *get_system_memory(void)
+{
+    return system_memory;
+}
+
+MemoryRegion *get_system_io(void)
+{
+    return system_io;
 }
 
 #endif /* !defined(CONFIG_USER_ONLY) */
@@ -3828,7 +3679,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    ram_addr_t pd;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -3868,7 +3719,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
                     l = 1;
                 }
             } else {
-                unsigned long addr1;
+                ram_addr_t addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = qemu_get_ram_ptr(addr1);
@@ -3978,7 +3829,7 @@ static QLIST_HEAD(map_client_list, MapClient) map_client_list
 
 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
 {
-    MapClient *client = qemu_malloc(sizeof(*client));
+    MapClient *client = g_malloc(sizeof(*client));
 
     client->opaque = opaque;
     client->callback = callback;
@@ -3991,7 +3842,7 @@ void cpu_unregister_map_client(void *_client)
     MapClient *client = (MapClient *)_client;
 
     QLIST_REMOVE(client, link);
-    qemu_free(client);
+    g_free(client);
 }
 
 static void cpu_notify_map_clients(void)
@@ -4022,7 +3873,9 @@ void *cpu_physical_memory_map(target_phys_addr_t addr,
     target_phys_addr_t page;
     unsigned long pd;
     PhysPageDesc *p;
-    target_phys_addr_t addr1 = addr;
+    ram_addr_t raddr = RAM_ADDR_MAX;
+    ram_addr_t rlen;
+    void *ret;
 
     while (len > 0) {
         page = addr & TARGET_PAGE_MASK;
@@ -4050,13 +3903,18 @@ void *cpu_physical_memory_map(target_phys_addr_t addr,
             *plen = l;
             return bounce.buffer;
         }
+        if (!todo) {
+            raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+        }
 
         len -= l;
         addr += l;
         todo += l;
     }
-    *plen = todo;
-    return qemu_ram_ptr_length(addr1, plen);
+    rlen = todo;
+    ret = qemu_ram_ptr_length(raddr, &rlen);
+    *plen = rlen;
+    return ret;
 }
 
 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
@@ -4085,14 +3943,8 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
                 access_len -= l;
             }
         }
-        if (xen_mapcache_enabled()) {
-            uint8_t *buffer1 = buffer;
-            uint8_t *end_buffer = buffer + len;
-
-            while (buffer1 < end_buffer) {
-                qemu_put_ram_ptr(buffer1);
-                buffer1 += TARGET_PAGE_SIZE;
-            }
+        if (xen_enabled()) {
+            xen_invalidate_map_cache_entry(buffer);
         }
         return;
     }
@@ -4105,7 +3957,8 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
 }
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
+static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
+                                         enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4127,17 +3980,52 @@ uint32_t ldl_phys(target_phys_addr_t addr)
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
         val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap32(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap32(val);
+        }
+#endif
     } else {
         /* RAM case */
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = ldl_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = ldl_be_p(ptr);
+            break;
+        default:
+            val = ldl_p(ptr);
+            break;
+        }
     }
     return val;
 }
 
+uint32_t ldl_phys(target_phys_addr_t addr)
+{
+    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t ldl_le_phys(target_phys_addr_t addr)
+{
+    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t ldl_be_phys(target_phys_addr_t addr)
+{
+    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
+}
+
 /* warning: addr must be aligned */
-uint64_t ldq_phys(target_phys_addr_t addr)
+static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
+                                         enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4158,6 +4046,9 @@ uint64_t ldq_phys(target_phys_addr_t addr)
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+
+        /* XXX This is broken when device endian != cpu endian.
+               Fix and add "endian" variable check */
 #ifdef TARGET_WORDS_BIGENDIAN
         val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
         val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
@@ -4169,11 +4060,36 @@ uint64_t ldq_phys(target_phys_addr_t addr)
         /* RAM case */
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldq_p(ptr);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = ldq_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = ldq_be_p(ptr);
+            break;
+        default:
+            val = ldq_p(ptr);
+            break;
+        }
     }
     return val;
 }
 
+uint64_t ldq_phys(target_phys_addr_t addr)
+{
+    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint64_t ldq_le_phys(target_phys_addr_t addr)
+{
+    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint64_t ldq_be_phys(target_phys_addr_t addr)
+{
+    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
+}
+
 /* XXX: optimize */
 uint32_t ldub_phys(target_phys_addr_t addr)
 {
@@ -4183,7 +4099,8 @@ uint32_t ldub_phys(target_phys_addr_t addr)
 }
 
 /* warning: addr must be aligned */
-uint32_t lduw_phys(target_phys_addr_t addr)
+static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
+                                          enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4205,15 +4122,49 @@ uint32_t lduw_phys(target_phys_addr_t addr)
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
         val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap16(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap16(val);
+        }
+#endif
     } else {
         /* RAM case */
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = lduw_p(ptr);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = lduw_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = lduw_be_p(ptr);
+            break;
+        default:
+            val = lduw_p(ptr);
+            break;
+        }
     }
     return val;
 }
 
+uint32_t lduw_phys(target_phys_addr_t addr)
+{
+    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t lduw_le_phys(target_phys_addr_t addr)
+{
+    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t lduw_be_phys(target_phys_addr_t addr)
+{
+    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
+}
+
 /* warning: addr must be aligned. The ram page is not masked as dirty
    and the code inside is not invalidated. It is useful if the dirty
    bits are used to track modified PTEs */
@@ -4286,7 +4237,8 @@ void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
 }
 
 /* warning: addr must be aligned */
-void stl_phys(target_phys_addr_t addr, uint32_t val)
+static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
+                                     enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4304,13 +4256,32 @@ void stl_phys(target_phys_addr_t addr, uint32_t val)
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap32(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap32(val);
+        }
+#endif
         io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
     } else {
         unsigned long addr1;
         addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
         /* RAM case */
         ptr = qemu_get_ram_ptr(addr1);
-        stl_p(ptr, val);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            stl_le_p(ptr, val);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            stl_be_p(ptr, val);
+            break;
+        default:
+            stl_p(ptr, val);
+            break;
+        }
         if (!cpu_physical_memory_is_dirty(addr1)) {
             /* invalidate code */
             tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
@@ -4321,6 +4292,21 @@ void stl_phys(target_phys_addr_t addr, uint32_t val)
     }
 }
 
+void stl_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
+}
+
+void stl_le_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
+}
+
+void stl_be_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
+}
+
 /* XXX: optimize */
 void stb_phys(target_phys_addr_t addr, uint32_t val)
 {
@@ -4329,7 +4315,8 @@ void stb_phys(target_phys_addr_t addr, uint32_t val)
 }
 
 /* warning: addr must be aligned */
-void stw_phys(target_phys_addr_t addr, uint32_t val)
+static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
+                                     enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4347,13 +4334,32 @@ void stw_phys(target_phys_addr_t addr, uint32_t val)
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap16(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap16(val);
+        }
+#endif
         io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
     } else {
         unsigned long addr1;
         addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
         /* RAM case */
         ptr = qemu_get_ram_ptr(addr1);
-        stw_p(ptr, val);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            stw_le_p(ptr, val);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            stw_be_p(ptr, val);
+            break;
+        default:
+            stw_p(ptr, val);
+            break;
+        }
         if (!cpu_physical_memory_is_dirty(addr1)) {
             /* invalidate code */
             tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
@@ -4364,6 +4370,21 @@ void stw_phys(target_phys_addr_t addr, uint32_t val)
     }
 }
 
+void stw_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
+}
+
+void stw_le_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
+}
+
+void stw_be_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
+}
+
 /* XXX: optimize */
 void stq_phys(target_phys_addr_t addr, uint64_t val)
 {
@@ -4371,6 +4392,18 @@ void stq_phys(target_phys_addr_t addr, uint64_t val)
     cpu_physical_memory_write(addr, &val, 8);
 }
 
+void stq_le_phys(target_phys_addr_t addr, uint64_t val)
+{
+    val = cpu_to_le64(val);
+    cpu_physical_memory_write(addr, &val, 8);
+}
+
+void stq_be_phys(target_phys_addr_t addr, uint64_t val)
+{
+    val = cpu_to_be64(val);
+    cpu_physical_memory_write(addr, &val, 8);
+}
+
 /* virtual memory access for debug (includes writing to ROM) */
 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
                         uint8_t *buf, int len, int is_write)
@@ -4515,6 +4548,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
 }
 
 #define MMUSUFFIX _cmmu
+#undef GETPC
 #define GETPC() NULL
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS