include/hw/core: Create struct CPUJumpCache

[mirror_qemu.git] / accel / tcg / cputlb.c
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

index b69a9534473ef9d629ce15451efaaf15bd8d0bce..6f1c00682b9ae55f4b091a834cd4d7405cde8b15 100644 (file)
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -100,21 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
  
  static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
  {
-    unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
+    int i, i0 = tb_jmp_cache_hash_page(page_addr);
+    CPUJumpCache *jc = cpu->tb_jmp_cache;
  
      for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
-        qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
+        qatomic_set(&jc->array[i0 + i].tb, NULL);
      }
  }
  
-static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
-{
-    /* Discard jump cache entries for any tb which might potentially
-       overlap the flushed page.  */
-    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
-    tb_jmp_cache_clear_page(cpu, addr);
-}
-
  /**
   * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
   * @desc: The CPUTLBDesc portion of the TLB
@@ -200,13 +193,13 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
      }
  
      g_free(fast->table);
-    g_free(desc->iotlb);
+    g_free(desc->fulltlb);
  
      tlb_window_reset(desc, now, 0);
      /* desc->n_used_entries is cleared by the caller */
      fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
      fast->table = g_try_new(CPUTLBEntry, new_size);
-    desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
+    desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
  
      /*
       * If the allocations fail, try smaller sizes. We just freed some
@@ -215,7 +208,7 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
       * allocations to fail though, so we progressively reduce the allocation
       * size, aborting if we cannot even allocate the smallest TLB we support.
       */
-    while (fast->table == NULL || desc->iotlb == NULL) {
+    while (fast->table == NULL || desc->fulltlb == NULL) {
          if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
              error_report("%s: %s", __func__, strerror(errno));
              abort();
@@ -224,9 +217,9 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
          fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
  
          g_free(fast->table);
-        g_free(desc->iotlb);
+        g_free(desc->fulltlb);
          fast->table = g_try_new(CPUTLBEntry, new_size);
-        desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
+        desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
      }
  }
  
@@ -258,7 +251,7 @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
      desc->n_used_entries = 0;
      fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
      fast->table = g_new(CPUTLBEntry, n_entries);
-    desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
+    desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
      tlb_mmu_flush_locked(desc, fast);
  }
  
@@ -299,7 +292,7 @@ void tlb_destroy(CPUState *cpu)
          CPUTLBDescFast *fast = &env_tlb(env)->f[i];
  
          g_free(fast->table);
-        g_free(desc->iotlb);
+        g_free(desc->fulltlb);
      }
  }
  
@@ -364,7 +357,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
  
      qemu_spin_unlock(&env_tlb(env)->c.lock);
  
-    cpu_tb_jmp_cache_clear(cpu);
+    tcg_flush_jmp_cache(cpu);
  
      if (to_clean == ALL_MMUIDX_BITS) {
          qatomic_set(&env_tlb(env)->c.full_flush_count,
@@ -541,7 +534,12 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
      }
      qemu_spin_unlock(&env_tlb(env)->c.lock);
  
-    tb_flush_jmp_cache(cpu, addr);
+    /*
+     * Discard jump cache entries for any tb which might potentially
+     * overlap the flushed page, which includes the previous.
+     */
+    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
+    tb_jmp_cache_clear_page(cpu, addr);
  }
  
  /**
@@ -783,8 +781,23 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
      }
      qemu_spin_unlock(&env_tlb(env)->c.lock);
  
-    for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
-        tb_flush_jmp_cache(cpu, d.addr + i);
+    /*
+     * If the length is larger than the jump cache size, then it will take
+     * longer to clear each entry individually than it will to clear it all.
+     */
+    if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
+        tcg_flush_jmp_cache(cpu);
+        return;
+    }
+
+    /*
+     * Discard jump cache entries for any tb which might potentially
+     * overlap the flushed pages, which includes the previous.
+     */
+    d.addr -= TARGET_PAGE_SIZE;
+    for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
+        tb_jmp_cache_clear_page(cpu, d.addr);
+        d.addr += TARGET_PAGE_SIZE;
      }
  }
  
@@ -942,7 +955,8 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
     can be detected */
  void tlb_protect_code(ram_addr_t ram_addr)
  {
-    cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
+    cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
+                                             TARGET_PAGE_SIZE,
                                               DIRTY_MEMORY_CODE);
  }
  
@@ -1086,16 +1100,16 @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
      env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
  }
  
-/* Add a new TLB entry. At most one entry for a given virtual address
+/*
+ * Add a new TLB entry. At most one entry for a given virtual address
   * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
   * supplied size is only used by tlb_flush_page.
   *
   * Called from TCG-generated code, which is under an RCU read-side
   * critical section.
   */
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
-                             hwaddr paddr, MemTxAttrs attrs, int prot,
-                             int mmu_idx, target_ulong size)
+void tlb_set_page_full(CPUState *cpu, int mmu_idx,
+                       target_ulong vaddr, CPUTLBEntryFull *full)
  {
      CPUArchState *env = cpu->env_ptr;
      CPUTLB *tlb = env_tlb(env);
@@ -1108,35 +1122,36 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
      CPUTLBEntry *te, tn;
      hwaddr iotlb, xlat, sz, paddr_page;
      target_ulong vaddr_page;
-    int asidx = cpu_asidx_from_attrs(cpu, attrs);
-    int wp_flags;
+    int asidx, wp_flags, prot;
      bool is_ram, is_romd;
  
      assert_cpu_is_self(cpu);
  
-    if (size <= TARGET_PAGE_SIZE) {
+    if (full->lg_page_size <= TARGET_PAGE_BITS) {
          sz = TARGET_PAGE_SIZE;
      } else {
-        tlb_add_large_page(env, mmu_idx, vaddr, size);
-        sz = size;
+        sz = (hwaddr)1 << full->lg_page_size;
+        tlb_add_large_page(env, mmu_idx, vaddr, sz);
      }
      vaddr_page = vaddr & TARGET_PAGE_MASK;
-    paddr_page = paddr & TARGET_PAGE_MASK;
+    paddr_page = full->phys_addr & TARGET_PAGE_MASK;
  
+    prot = full->prot;
+    asidx = cpu_asidx_from_attrs(cpu, full->attrs);
      section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
-                                                &xlat, &sz, attrs, &prot);
+                                                &xlat, &sz, full->attrs, &prot);
      assert(sz >= TARGET_PAGE_SIZE);
  
      tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
                " prot=%x idx=%d\n",
-              vaddr, paddr, prot, mmu_idx);
+              vaddr, full->phys_addr, prot, mmu_idx);
  
      address = vaddr_page;
-    if (size < TARGET_PAGE_SIZE) {
+    if (full->lg_page_size < TARGET_PAGE_BITS) {
          /* Repeat the MMU check and TLB fill on every access.  */
          address |= TLB_INVALID_MASK;
      }
-    if (attrs.byte_swap) {
+    if (full->attrs.byte_swap) {
          address |= TLB_BSWAP;
      }
  
@@ -1210,7 +1225,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
  
          /* Evict the old entry into the victim tlb.  */
          copy_tlb_helper_locked(tv, te);
-        desc->viotlb[vidx] = desc->iotlb[index];
+        desc->vfulltlb[vidx] = desc->fulltlb[index];
          tlb_n_used_entries_dec(env, mmu_idx);
      }
  
@@ -1227,8 +1242,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
       * subtract here is that of the page base, and not the same as the
       * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
       */
-    desc->iotlb[index].addr = iotlb - vaddr_page;
-    desc->iotlb[index].attrs = attrs;
+    desc->fulltlb[index] = *full;
+    desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
+    desc->fulltlb[index].phys_addr = paddr_page;
+    desc->fulltlb[index].prot = prot;
  
      /* Now calculate the new entry */
      tn.addend = addend - vaddr_page;
@@ -1263,9 +1280,21 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
      qemu_spin_unlock(&tlb->c.lock);
  }
  
-/* Add a new TLB entry, but without specifying the memory
- * transaction attributes to be used.
- */
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
+                             hwaddr paddr, MemTxAttrs attrs, int prot,
+                             int mmu_idx, target_ulong size)
+{
+    CPUTLBEntryFull full = {
+        .phys_addr = paddr,
+        .attrs = attrs,
+        .prot = prot,
+        .lg_page_size = ctz64(size)
+    };
+
+    assert(is_power_of_2(size));
+    tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
+}
+
  void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                    hwaddr paddr, int prot,
                    int mmu_idx, target_ulong size)
@@ -1274,18 +1303,6 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                              prot, mmu_idx, size);
  }
  
-static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
-{
-    ram_addr_t ram_addr;
-
-    ram_addr = qemu_ram_addr_from_host(ptr);
-    if (ram_addr == RAM_ADDR_INVALID) {
-        error_report("Bad ram pointer %p", ptr);
-        abort();
-    }
-    return ram_addr;
-}
-
  /*
   * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
   * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
@@ -1294,15 +1311,14 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
  static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
                       MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
  {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
      bool ok;
  
      /*
       * This is not a probe, so only valid return is success; failure
       * should result in exception + longjmp to the cpu loop.
       */
-    ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
-                               access_type, mmu_idx, false, retaddr);
+    ok = cpu->cc->tcg_ops->tlb_fill(cpu, addr, size,
+                                    access_type, mmu_idx, false, retaddr);
      assert(ok);
  }
  
@@ -1310,9 +1326,8 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
                                          MMUAccessType access_type,
                                          int mmu_idx, uintptr_t retaddr)
  {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
+    cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
+                                          mmu_idx, retaddr);
  }
  
  static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
@@ -1332,7 +1347,7 @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
      }
  }
  
-static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
                           int mmu_idx, target_ulong addr, uintptr_t retaddr,
                           MMUAccessType access_type, MemOp op)
  {
@@ -1344,9 +1359,9 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
      bool locked = false;
      MemTxResult r;
  
-    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
      mr = section->mr;
-    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
      cpu->mem_io_pc = retaddr;
      if (!cpu->can_do_io) {
          cpu_io_recompile(cpu, retaddr);
@@ -1356,14 +1371,14 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
          qemu_mutex_lock_iothread();
          locked = true;
      }
-    r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
+    r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
      if (r != MEMTX_OK) {
          hwaddr physaddr = mr_offset +
              section->offset_within_address_space -
              section->offset_within_region;
  
          cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
-                               mmu_idx, iotlbentry->attrs, r, retaddr);
+                               mmu_idx, full->attrs, r, retaddr);
      }
      if (locked) {
          qemu_mutex_unlock_iothread();
@@ -1373,22 +1388,21 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
  }
  
  /*
- * Save a potentially trashed IOTLB entry for later lookup by plugin.
- * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
+ * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
+ * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
   * because of the side effect of io_writex changing memory layout.
   */
-static void save_iotlb_data(CPUState *cs, hwaddr addr,
-                            MemoryRegionSection *section, hwaddr mr_offset)
+static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
+                            hwaddr mr_offset)
  {
  #ifdef CONFIG_PLUGIN
      SavedIOTLB *saved = &cs->saved_iotlb;
-    saved->addr = addr;
      saved->section = section;
      saved->mr_offset = mr_offset;
  #endif
  }
  
-static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
                        int mmu_idx, uint64_t val, target_ulong addr,
                        uintptr_t retaddr, MemOp op)
  {
@@ -1399,9 +1413,9 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
      bool locked = false;
      MemTxResult r;
  
-    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
      mr = section->mr;
-    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
      if (!cpu->can_do_io) {
          cpu_io_recompile(cpu, retaddr);
      }
@@ -1411,20 +1425,20 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
       * The memory_region_dispatch may trigger a flush/resize
       * so for plugins we save the iotlb_data just in case.
       */
-    save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
+    save_iotlb_data(cpu, section, mr_offset);
  
      if (!qemu_mutex_iothread_locked()) {
          qemu_mutex_lock_iothread();
          locked = true;
      }
-    r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
+    r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
      if (r != MEMTX_OK) {
          hwaddr physaddr = mr_offset +
              section->offset_within_address_space -
              section->offset_within_region;
  
          cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
-                               MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
+                               MMU_DATA_STORE, mmu_idx, full->attrs, r,
                                 retaddr);
      }
      if (locked) {
@@ -1471,9 +1485,10 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
              copy_tlb_helper_locked(vtlb, &tmptlb);
              qemu_spin_unlock(&env_tlb(env)->c.lock);
  
-            CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
-            CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
-            tmpio = *io; *io = *vio; *vio = tmpio;
+            CPUTLBEntryFull *f1 = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+            CPUTLBEntryFull *f2 = &env_tlb(env)->d[mmu_idx].vfulltlb[vidx];
+            CPUTLBEntryFull tmpf;
+            tmpf = *f1; *f1 = *f2; *f2 = tmpf;
              return true;
          }
      }
@@ -1485,65 +1500,10 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
    victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
                   (ADDR) & TARGET_PAGE_MASK)
  
-/*
- * Return a ram_addr_t for the virtual address for execution.
- *
- * Return -1 if we can't translate and execute from an entire page
- * of RAM.  This will force us to execute by loading and translating
- * one insn at a time, without caching.
- *
- * NOTE: This function will trigger an exception if the page is
- * not executable.
- */
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
-                                        void **hostp)
-{
-    uintptr_t mmu_idx = cpu_mmu_index(env, true);
-    uintptr_t index = tlb_index(env, mmu_idx, addr);
-    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
-    void *p;
-
-    if (unlikely(!tlb_hit(entry->addr_code, addr))) {
-        if (!VICTIM_TLB_HIT(addr_code, addr)) {
-            tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
-            index = tlb_index(env, mmu_idx, addr);
-            entry = tlb_entry(env, mmu_idx, addr);
-
-            if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
-                /*
-                 * The MMU protection covers a smaller range than a target
-                 * page, so we must redo the MMU check for every insn.
-                 */
-                return -1;
-            }
-        }
-        assert(tlb_hit(entry->addr_code, addr));
-    }
-
-    if (unlikely(entry->addr_code & TLB_MMIO)) {
-        /* The region is not backed by RAM.  */
-        if (hostp) {
-            *hostp = NULL;
-        }
-        return -1;
-    }
-
-    p = (void *)((uintptr_t)addr + entry->addend);
-    if (hostp) {
-        *hostp = p;
-    }
-    return qemu_ram_addr_from_host_nofail(p);
-}
-
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
-{
-    return get_page_addr_code_hostp(env, addr, NULL);
-}
-
  static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
-                           CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
+                           CPUTLBEntryFull *full, uintptr_t retaddr)
  {
-    ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
+    ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
  
      trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
  
@@ -1570,7 +1530,8 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
  static int probe_access_internal(CPUArchState *env, target_ulong addr,
                                   int fault_size, MMUAccessType access_type,
                                   int mmu_idx, bool nonfault,
-                                 void **phost, uintptr_t retaddr)
+                                 void **phost, CPUTLBEntryFull **pfull,
+                                 uintptr_t retaddr)
  {
      uintptr_t index = tlb_index(env, mmu_idx, addr);
      CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -1593,25 +1554,36 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
      }
      tlb_addr = tlb_read_ofs(entry, elt_ofs);
  
+    flags = TLB_FLAGS_MASK;
      page_addr = addr & TARGET_PAGE_MASK;
      if (!tlb_hit_page(tlb_addr, page_addr)) {
          if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
              CPUState *cs = env_cpu(env);
-            CPUClass *cc = CPU_GET_CLASS(cs);
  
-            if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
-                                       mmu_idx, nonfault, retaddr)) {
+            if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
+                                           mmu_idx, nonfault, retaddr)) {
                  /* Non-faulting page table read failed.  */
                  *phost = NULL;
+                *pfull = NULL;
                  return TLB_INVALID_MASK;
              }
  
              /* TLB resize via tlb_fill may have moved the entry.  */
+            index = tlb_index(env, mmu_idx, addr);
              entry = tlb_entry(env, mmu_idx, addr);
+
+            /*
+             * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
+             * to force the next access through tlb_fill.  We've just
+             * called tlb_fill, so we know that this entry *is* valid.
+             */
+            flags &= ~TLB_INVALID_MASK;
          }
          tlb_addr = tlb_read_ofs(entry, elt_ofs);
      }
-    flags = tlb_addr & TLB_FLAGS_MASK;
+    flags &= tlb_addr;
+
+    *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index];
  
      /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
      if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
@@ -1624,37 +1596,44 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
      return flags;
  }
  
-int probe_access_flags(CPUArchState *env, target_ulong addr,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool nonfault, void **phost, uintptr_t retaddr)
+int probe_access_full(CPUArchState *env, target_ulong addr,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool nonfault, void **phost, CPUTLBEntryFull **pfull,
+                      uintptr_t retaddr)
  {
-    int flags;
-
-    flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
-                                  nonfault, phost, retaddr);
+    int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
+                                      nonfault, phost, pfull, retaddr);
  
      /* Handle clean RAM pages.  */
      if (unlikely(flags & TLB_NOTDIRTY)) {
-        uintptr_t index = tlb_index(env, mmu_idx, addr);
-        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
-
-        notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
+        notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
          flags &= ~TLB_NOTDIRTY;
      }
  
      return flags;
  }
  
+int probe_access_flags(CPUArchState *env, target_ulong addr,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool nonfault, void **phost, uintptr_t retaddr)
+{
+    CPUTLBEntryFull *full;
+
+    return probe_access_full(env, addr, access_type, mmu_idx,
+                             nonfault, phost, &full, retaddr);
+}
+
  void *probe_access(CPUArchState *env, target_ulong addr, int size,
                     MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
  {
+    CPUTLBEntryFull *full;
      void *host;
      int flags;
  
      g_assert(-(addr | TARGET_PAGE_MASK) >= size);
  
      flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
-                                  false, &host, retaddr);
+                                  false, &host, &full, retaddr);
  
      /* Per the interface, size == 0 merely faults the access. */
      if (size == 0) {
@@ -1662,20 +1641,17 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
      }
  
      if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
-        uintptr_t index = tlb_index(env, mmu_idx, addr);
-        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
-
          /* Handle watchpoints.  */
          if (flags & TLB_WATCHPOINT) {
              int wp_access = (access_type == MMU_DATA_STORE
                               ? BP_MEM_WRITE : BP_MEM_READ);
              cpu_check_watchpoint(env_cpu(env), addr, size,
-                                 iotlbentry->attrs, wp_access, retaddr);
+                                 full->attrs, wp_access, retaddr);
          }
  
          /* Handle clean RAM pages.  */
          if (flags & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
+            notdirty_write(env_cpu(env), addr, 1, full, retaddr);
          }
      }
  
@@ -1685,16 +1661,44 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
  void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
                          MMUAccessType access_type, int mmu_idx)
  {
+    CPUTLBEntryFull *full;
      void *host;
      int flags;
  
      flags = probe_access_internal(env, addr, 0, access_type,
-                                  mmu_idx, true, &host, 0);
+                                  mmu_idx, true, &host, &full, 0);
  
      /* No combination of flags are expected by the caller. */
      return flags ? NULL : host;
  }
  
+/*
+ * Return a ram_addr_t for the virtual address for execution.
+ *
+ * Return -1 if we can't translate and execute from an entire page
+ * of RAM.  This will force us to execute by loading and translating
+ * one insn at a time, without caching.
+ *
+ * NOTE: This function will trigger an exception if the page is
+ * not executable.
+ */
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+                                        void **hostp)
+{
+    CPUTLBEntryFull *full;
+    void *p;
+
+    (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
+                                cpu_mmu_index(env, true), false, &p, &full, 0);
+    if (p == NULL) {
+        return -1;
+    }
+    if (hostp) {
+        *hostp = p;
+    }
+    return qemu_ram_addr_from_host_nofail(p);
+}
+
  #ifdef CONFIG_PLUGIN
  /*
   * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
@@ -1706,7 +1710,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
   * should have just filled the TLB. The one corner case is io_writex
   * which can cause TLB flushes and potential resizing of the TLBs
   * losing the information we need. In those cases we need to recover
- * data from a copy of the iotlbentry. As long as this always occurs
+ * data from a copy of the CPUTLBEntryFull. As long as this always occurs
   * from the same thread (which a mem callback will be) this is safe.
   */
  
@@ -1721,11 +1725,12 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
      if (likely(tlb_hit(tlb_addr, addr))) {
          /* We must have an iotlb entry for MMIO */
          if (tlb_addr & TLB_MMIO) {
-            CPUIOTLBEntry *iotlbentry;
-            iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+            CPUTLBEntryFull *full;
+            full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
              data->is_io = true;
-            data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
-            data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+            data->v.io.section =
+                iotlb_to_section(cpu, full->xlat_section, full->attrs);
+            data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
          } else {
              data->is_io = false;
              data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
@@ -1752,7 +1757,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
                                 MemOpIdx oi, int size, int prot,
                                 uintptr_t retaddr)
  {
-    size_t mmu_idx = get_mmuidx(oi);
+    uintptr_t mmu_idx = get_mmuidx(oi);
      MemOp mop = get_memop(oi);
      int a_bits = get_alignment_bits(mop);
      uintptr_t index;
@@ -1760,6 +1765,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
      target_ulong tlb_addr;
      void *hostaddr;
  
+    tcg_debug_assert(mmu_idx < NB_MMU_MODES);
+
      /* Adjust the given return address.  */
      retaddr -= GETPC_ADJ;
  
@@ -1831,7 +1838,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
  
      if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
          notdirty_write(env_cpu(env), addr, size,
-                       &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
+                       &env_tlb(env)->d[mmu_idx].fulltlb[index], retaddr);
      }
  
      return hostaddr;
@@ -1885,9 +1892,9 @@ load_memop(const void *haddr, MemOp op)
          return (uint32_t)ldl_be_p(haddr);
      case MO_LEUL:
          return (uint32_t)ldl_le_p(haddr);
-    case MO_BEQ:
+    case MO_BEUQ:
          return ldq_be_p(haddr);
-    case MO_LEQ:
+    case MO_LEUQ:
          return ldq_le_p(haddr);
      default:
          qemu_build_not_reached();
@@ -1899,18 +1906,20 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
              uintptr_t retaddr, MemOp op, bool code_read,
              FullLoadHelper *full_load)
  {
-    uintptr_t mmu_idx = get_mmuidx(oi);
-    uintptr_t index = tlb_index(env, mmu_idx, addr);
-    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
-    target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
      const size_t tlb_off = code_read ?
          offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
      const MMUAccessType access_type =
          code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
-    unsigned a_bits = get_alignment_bits(get_memop(oi));
+    const unsigned a_bits = get_alignment_bits(get_memop(oi));
+    const size_t size = memop_size(op);
+    uintptr_t mmu_idx = get_mmuidx(oi);
+    uintptr_t index;
+    CPUTLBEntry *entry;
+    target_ulong tlb_addr;
      void *haddr;
      uint64_t res;
-    size_t size = memop_size(op);
+
+    tcg_debug_assert(mmu_idx < NB_MMU_MODES);
  
      /* Handle CPU specific unaligned behaviour */
      if (addr & ((1 << a_bits) - 1)) {
@@ -1918,6 +1927,10 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
                               mmu_idx, retaddr);
      }
  
+    index = tlb_index(env, mmu_idx, addr);
+    entry = tlb_entry(env, mmu_idx, addr);
+    tlb_addr = code_read ? entry->addr_code : entry->addr_read;
+
      /* If the TLB entry is for a different page, reload and try again.  */
      if (!tlb_hit(tlb_addr, addr)) {
          if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
@@ -1933,7 +1946,7 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
  
      /* Handle anything that isn't just a straight memory access.  */
      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
+        CPUTLBEntryFull *full;
          bool need_swap;
  
          /* For anything that is unaligned, recurse through full_load.  */
@@ -1941,20 +1954,20 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
              goto do_unaligned_access;
          }
  
-        iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
  
          /* Handle watchpoints.  */
          if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
              /* On watchpoint hit, this will longjmp out.  */
              cpu_check_watchpoint(env_cpu(env), addr, size,
-                                 iotlbentry->attrs, BP_MEM_READ, retaddr);
+                                 full->attrs, BP_MEM_READ, retaddr);
          }
  
          need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
  
          /* Handle I/O access.  */
          if (likely(tlb_addr & TLB_MMIO)) {
-            return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
+            return io_readx(env, full, mmu_idx, addr, retaddr,
                              access_type, op ^ (need_swap * MO_BSWAP));
          }
  
@@ -2081,16 +2094,16 @@ tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
  uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
                             MemOpIdx oi, uintptr_t retaddr)
  {
-    validate_memop(oi, MO_LEQ);
-    return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
+    validate_memop(oi, MO_LEUQ);
+    return load_helper(env, addr, oi, retaddr, MO_LEUQ, false,
                         helper_le_ldq_mmu);
  }
  
  uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
                             MemOpIdx oi, uintptr_t retaddr)
  {
-    validate_memop(oi, MO_BEQ);
-    return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
+    validate_memop(oi, MO_BEUQ);
+    return load_helper(env, addr, oi, retaddr, MO_BEUQ, false,
                         helper_be_ldq_mmu);
  }
  
@@ -2140,7 +2153,6 @@ static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
  {
      uint64_t ret;
  
-    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
      ret = full_load(env, addr, oi, retaddr);
      qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
      return ret;
@@ -2166,7 +2178,7 @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
  uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
                          MemOpIdx oi, uintptr_t ra)
  {
-    return cpu_load_helper(env, addr, oi, MO_BEQ, helper_be_ldq_mmu);
+    return cpu_load_helper(env, addr, oi, ra, helper_be_ldq_mmu);
  }
  
  uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
@@ -2210,10 +2222,10 @@ store_memop(void *haddr, uint64_t val, MemOp op)
      case MO_LEUL:
          stl_le_p(haddr, val);
          break;
-    case MO_BEQ:
+    case MO_BEUQ:
          stq_be_p(haddr, val);
          break;
-    case MO_LEQ:
+    case MO_LEUQ:
          stq_le_p(haddr, val);
          break;
      default:
@@ -2232,7 +2244,7 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
      const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
      uintptr_t index, index2;
      CPUTLBEntry *entry, *entry2;
-    target_ulong page2, tlb_addr, tlb_addr2;
+    target_ulong page1, page2, tlb_addr, tlb_addr2;
      MemOpIdx oi;
      size_t size2;
      int i;
@@ -2240,15 +2252,17 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
      /*
       * Ensure the second page is in the TLB.  Note that the first page
       * is already guaranteed to be filled, and that the second page
-     * cannot evict the first.
+     * cannot evict the first.  An exception to this rule is PAGE_WRITE_INV
+     * handling: the first page could have evicted itself.
       */
+    page1 = addr & TARGET_PAGE_MASK;
      page2 = (addr + size) & TARGET_PAGE_MASK;
      size2 = (addr + size) & ~TARGET_PAGE_MASK;
      index2 = tlb_index(env, mmu_idx, page2);
      entry2 = tlb_entry(env, mmu_idx, page2);
  
      tlb_addr2 = tlb_addr_write(entry2);
-    if (!tlb_hit_page(tlb_addr2, page2)) {
+    if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
          if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
              tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
                       mmu_idx, retaddr);
@@ -2268,12 +2282,12 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
       */
      if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
          cpu_check_watchpoint(env_cpu(env), addr, size - size2,
-                             env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
+                             env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
                               BP_MEM_WRITE, retaddr);
      }
      if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
          cpu_check_watchpoint(env_cpu(env), page2, size2,
-                             env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
+                             env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
                               BP_MEM_WRITE, retaddr);
      }
  
@@ -2302,14 +2316,16 @@ static inline void QEMU_ALWAYS_INLINE
  store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
               MemOpIdx oi, uintptr_t retaddr, MemOp op)
  {
-    uintptr_t mmu_idx = get_mmuidx(oi);
-    uintptr_t index = tlb_index(env, mmu_idx, addr);
-    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
-    target_ulong tlb_addr = tlb_addr_write(entry);
      const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
-    unsigned a_bits = get_alignment_bits(get_memop(oi));
+    const unsigned a_bits = get_alignment_bits(get_memop(oi));
+    const size_t size = memop_size(op);
+    uintptr_t mmu_idx = get_mmuidx(oi);
+    uintptr_t index;
+    CPUTLBEntry *entry;
+    target_ulong tlb_addr;
      void *haddr;
-    size_t size = memop_size(op);
+
+    tcg_debug_assert(mmu_idx < NB_MMU_MODES);
  
      /* Handle CPU specific unaligned behaviour */
      if (addr & ((1 << a_bits) - 1)) {
@@ -2317,6 +2333,10 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
                               mmu_idx, retaddr);
      }
  
+    index = tlb_index(env, mmu_idx, addr);
+    entry = tlb_entry(env, mmu_idx, addr);
+    tlb_addr = tlb_addr_write(entry);
+
      /* If the TLB entry is for a different page, reload and try again.  */
      if (!tlb_hit(tlb_addr, addr)) {
          if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
@@ -2331,7 +2351,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
  
      /* Handle anything that isn't just a straight memory access.  */
      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
+        CPUTLBEntryFull *full;
          bool need_swap;
  
          /* For anything that is unaligned, recurse through byte stores.  */
@@ -2339,20 +2359,20 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
              goto do_unaligned_access;
          }
  
-        iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
  
          /* Handle watchpoints.  */
          if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
              /* On watchpoint hit, this will longjmp out.  */
              cpu_check_watchpoint(env_cpu(env), addr, size,
-                                 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
+                                 full->attrs, BP_MEM_WRITE, retaddr);
          }
  
          need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
  
          /* Handle I/O access.  */
          if (tlb_addr & TLB_MMIO) {
-            io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
+            io_writex(env, full, mmu_idx, val, addr, retaddr,
                        op ^ (need_swap * MO_BSWAP));
              return;
          }
@@ -2364,7 +2384,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
  
          /* Handle clean RAM pages.  */
          if (tlb_addr & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
+            notdirty_write(env_cpu(env), addr, size, full, retaddr);
          }
  
          haddr = (void *)((uintptr_t)addr + entry->addend);
@@ -2465,15 +2485,15 @@ void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
  void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
                         MemOpIdx oi, uintptr_t retaddr)
  {
-    validate_memop(oi, MO_LEQ);
-    store_helper(env, addr, val, oi, retaddr, MO_LEQ);
+    validate_memop(oi, MO_LEUQ);
+    store_helper(env, addr, val, oi, retaddr, MO_LEUQ);
  }
  
  void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
                         MemOpIdx oi, uintptr_t retaddr)
  {
-    validate_memop(oi, MO_BEQ);
-    store_helper(env, addr, val, oi, retaddr, MO_BEQ);
+    validate_memop(oi, MO_BEUQ);
+    store_helper(env, addr, val, oi, retaddr, MO_BEUQ);
  }
  
  /*
@@ -2487,7 +2507,6 @@ static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
                                      uint64_t val, MemOpIdx oi, uintptr_t ra,
                                      FullStoreHelper *full_store)
  {
-    trace_guest_st_before_exec(env_cpu(env), addr, oi);
      full_store(env, addr, val, oi, ra);
      qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
  }
@@ -2545,7 +2564,6 @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
      glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
  
  #define ATOMIC_MMU_CLEANUP
-#define ATOMIC_MMU_IDX   get_mmuidx(oi)
  
  #include "atomic_common.c.inc"
  
@@ -2609,11 +2627,11 @@ uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
  static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
                                MemOpIdx oi, uintptr_t retaddr)
  {
-    return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
+    return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code);
  }
  
  uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
  {
-    MemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
+    MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
      return full_ldq_code(env, addr, oi, 0);
  }