accel/tcg/cputlb.c

   1 /*
   2  *  Common CPU TLB handling
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "qemu/main-loop.h"
  22 #include "cpu.h"
  23 #include "exec/exec-all.h"
  24 #include "exec/memory.h"
  25 #include "exec/address-spaces.h"
  26 #include "exec/cpu_ldst.h"
  27 #include "exec/cputlb.h"
  28 #include "exec/memory-internal.h"
  29 #include "exec/ram_addr.h"
  30 #include "tcg/tcg.h"
  31 #include "qemu/error-report.h"
  32 #include "exec/log.h"
  33 #include "exec/helper-proto.h"
  34 #include "qemu/atomic.h"
  35 #include "qemu/atomic128.h"
  36
  37 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
  38 /* #define DEBUG_TLB */
  39 /* #define DEBUG_TLB_LOG */
  40
  41 #ifdef DEBUG_TLB
  42 # define DEBUG_TLB_GATE 1
  43 # ifdef DEBUG_TLB_LOG
  44 #  define DEBUG_TLB_LOG_GATE 1
  45 # else
  46 #  define DEBUG_TLB_LOG_GATE 0
  47 # endif
  48 #else
  49 # define DEBUG_TLB_GATE 0
  50 # define DEBUG_TLB_LOG_GATE 0
  51 #endif
  52
  53 #define tlb_debug(fmt, ...) do { \
  54     if (DEBUG_TLB_LOG_GATE) { \
  55         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
  56                       ## __VA_ARGS__); \
  57     } else if (DEBUG_TLB_GATE) { \
  58         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
  59     } \
  60 } while (0)
  61
  62 #define assert_cpu_is_self(cpu) do {                              \
  63         if (DEBUG_TLB_GATE) {                                     \
  64             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
  65         }                                                         \
  66     } while (0)
  67
  68 /* run_on_cpu_data.target_ptr should always be big enough for a
  69  * target_ulong even on 32 bit builds */
  70 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
  71
  72 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
  73  */
  74 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
  75 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
  76
  77 void tlb_init(CPUState *cpu)
  78 {
  79     CPUArchState *env = cpu->env_ptr;
  80
  81     qemu_spin_init(&env->tlb_c.lock);
  82
  83     /* Ensure that cpu_reset performs a full flush.  */
  84     env->tlb_c.dirty = ALL_MMUIDX_BITS;
  85 }
  86
  87 /* flush_all_helper: run fn across all cpus
  88  *
  89  * If the wait flag is set then the src cpu's helper will be queued as
  90  * "safe" work and the loop exited creating a synchronisation point
  91  * where all queued work will be finished before execution starts
  92  * again.
  93  */
  94 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
  95                              run_on_cpu_data d)
  96 {
  97     CPUState *cpu;
  98
  99     CPU_FOREACH(cpu) {
 100         if (cpu != src) {
 101             async_run_on_cpu(cpu, fn, d);
 102         }
 103     }
 104 }
 105
 106 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
 107 {
 108     CPUState *cpu;
 109     size_t full = 0, part = 0, elide = 0;
 110
 111     CPU_FOREACH(cpu) {
 112         CPUArchState *env = cpu->env_ptr;
 113
 114         full += atomic_read(&env->tlb_c.full_flush_count);
 115         part += atomic_read(&env->tlb_c.part_flush_count);
 116         elide += atomic_read(&env->tlb_c.elide_flush_count);
 117     }
 118     *pfull = full;
 119     *ppart = part;
 120     *pelide = elide;
 121 }
 122
 123 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 124 {
 125     memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
 126     memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
 127     env->tlb_d[mmu_idx].large_page_addr = -1;
 128     env->tlb_d[mmu_idx].large_page_mask = -1;
 129     env->tlb_d[mmu_idx].vindex = 0;
 130 }
 131
 132 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 133 {
 134     CPUArchState *env = cpu->env_ptr;
 135     uint16_t asked = data.host_int;
 136     uint16_t all_dirty, work, to_clean;
 137
 138     assert_cpu_is_self(cpu);
 139
 140     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
 141
 142     qemu_spin_lock(&env->tlb_c.lock);
 143
 144     all_dirty = env->tlb_c.dirty;
 145     to_clean = asked & all_dirty;
 146     all_dirty &= ~to_clean;
 147     env->tlb_c.dirty = all_dirty;
 148
 149     for (work = to_clean; work != 0; work &= work - 1) {
 150         int mmu_idx = ctz32(work);
 151         tlb_flush_one_mmuidx_locked(env, mmu_idx);
 152     }
 153
 154     qemu_spin_unlock(&env->tlb_c.lock);
 155
 156     cpu_tb_jmp_cache_clear(cpu);
 157
 158     if (to_clean == ALL_MMUIDX_BITS) {
 159         atomic_set(&env->tlb_c.full_flush_count,
 160                    env->tlb_c.full_flush_count + 1);
 161     } else {
 162         atomic_set(&env->tlb_c.part_flush_count,
 163                    env->tlb_c.part_flush_count + ctpop16(to_clean));
 164         if (to_clean != asked) {
 165             atomic_set(&env->tlb_c.elide_flush_count,
 166                        env->tlb_c.elide_flush_count +
 167                        ctpop16(asked & ~to_clean));
 168         }
 169     }
 170 }
 171
 172 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 173 {
 174     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
 175
 176     if (cpu->created && !qemu_cpu_is_self(cpu)) {
 177         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
 178                          RUN_ON_CPU_HOST_INT(idxmap));
 179     } else {
 180         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
 181     }
 182 }
 183
 184 void tlb_flush(CPUState *cpu)
 185 {
 186     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
 187 }
 188
 189 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
 190 {
 191     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 192
 193     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
 194
 195     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 196     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
 197 }
 198
 199 void tlb_flush_all_cpus(CPUState *src_cpu)
 200 {
 201     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
 202 }
 203
 204 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
 205 {
 206     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 207
 208     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
 209
 210     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 211     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 212 }
 213
 214 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
 215 {
 216     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
 217 }
 218
 219 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
 220                                         target_ulong page)
 221 {
 222     return tlb_hit_page(tlb_entry->addr_read, page) ||
 223            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
 224            tlb_hit_page(tlb_entry->addr_code, page);
 225 }
 226
 227 /* Called with tlb_c.lock held */
 228 static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
 229                                           target_ulong page)
 230 {
 231     if (tlb_hit_page_anyprot(tlb_entry, page)) {
 232         memset(tlb_entry, -1, sizeof(*tlb_entry));
 233     }
 234 }
 235
 236 /* Called with tlb_c.lock held */
 237 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
 238                                               target_ulong page)
 239 {
 240     int k;
 241
 242     assert_cpu_is_self(ENV_GET_CPU(env));
 243     for (k = 0; k < CPU_VTLB_SIZE; k++) {
 244         tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
 245     }
 246 }
 247
 248 static void tlb_flush_page_locked(CPUArchState *env, int midx,
 249                                   target_ulong page)
 250 {
 251     target_ulong lp_addr = env->tlb_d[midx].large_page_addr;
 252     target_ulong lp_mask = env->tlb_d[midx].large_page_mask;
 253
 254     /* Check if we need to flush due to large pages.  */
 255     if ((page & lp_mask) == lp_addr) {
 256         tlb_debug("forcing full flush midx %d ("
 257                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
 258                   midx, lp_addr, lp_mask);
 259         tlb_flush_one_mmuidx_locked(env, midx);
 260     } else {
 261         tlb_flush_entry_locked(tlb_entry(env, midx, page), page);
 262         tlb_flush_vtlb_page_locked(env, midx, page);
 263     }
 264 }
 265
 266 /* As we are going to hijack the bottom bits of the page address for a
 267  * mmuidx bit mask we need to fail to build if we can't do that
 268  */
 269 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
 270
 271 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
 272                                                 run_on_cpu_data data)
 273 {
 274     CPUArchState *env = cpu->env_ptr;
 275     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
 276     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
 277     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
 278     int mmu_idx;
 279
 280     assert_cpu_is_self(cpu);
 281
 282     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
 283               addr, mmu_idx_bitmap);
 284
 285     qemu_spin_lock(&env->tlb_c.lock);
 286     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 287         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
 288             tlb_flush_page_locked(env, mmu_idx, addr);
 289         }
 290     }
 291     qemu_spin_unlock(&env->tlb_c.lock);
 292
 293     tb_flush_jmp_cache(cpu, addr);
 294 }
 295
 296 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
 297 {
 298     target_ulong addr_and_mmu_idx;
 299
 300     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
 301
 302     /* This should already be page aligned */
 303     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
 304     addr_and_mmu_idx |= idxmap;
 305
 306     if (!qemu_cpu_is_self(cpu)) {
 307         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
 308                          RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 309     } else {
 310         tlb_flush_page_by_mmuidx_async_work(
 311             cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 312     }
 313 }
 314
 315 void tlb_flush_page(CPUState *cpu, target_ulong addr)
 316 {
 317     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
 318 }
 319
 320 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
 321                                        uint16_t idxmap)
 322 {
 323     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
 324     target_ulong addr_and_mmu_idx;
 325
 326     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 327
 328     /* This should already be page aligned */
 329     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
 330     addr_and_mmu_idx |= idxmap;
 331
 332     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 333     fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 334 }
 335
 336 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
 337 {
 338     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
 339 }
 340
 341 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 342                                               target_ulong addr,
 343                                               uint16_t idxmap)
 344 {
 345     const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
 346     target_ulong addr_and_mmu_idx;
 347
 348     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 349
 350     /* This should already be page aligned */
 351     addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
 352     addr_and_mmu_idx |= idxmap;
 353
 354     flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 355     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 356 }
 357
 358 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
 359 {
 360     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
 361 }
 362
 363 /* update the TLBs so that writes to code in the virtual page 'addr'
 364    can be detected */
 365 void tlb_protect_code(ram_addr_t ram_addr)
 366 {
 367     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
 368                                              DIRTY_MEMORY_CODE);
 369 }
 370
 371 /* update the TLB so that writes in physical page 'phys_addr' are no longer
 372    tested for self modifying code */
 373 void tlb_unprotect_code(ram_addr_t ram_addr)
 374 {
 375     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 376 }
 377
 378
 379 /*
 380  * Dirty write flag handling
 381  *
 382  * When the TCG code writes to a location it looks up the address in
 383  * the TLB and uses that data to compute the final address. If any of
 384  * the lower bits of the address are set then the slow path is forced.
 385  * There are a number of reasons to do this but for normal RAM the
 386  * most usual is detecting writes to code regions which may invalidate
 387  * generated code.
 388  *
 389  * Other vCPUs might be reading their TLBs during guest execution, so we update
 390  * te->addr_write with atomic_set. We don't need to worry about this for
 391  * oversized guests as MTTCG is disabled for them.
 392  *
 393  * Called with tlb_c.lock held.
 394  */
 395 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
 396                                          uintptr_t start, uintptr_t length)
 397 {
 398     uintptr_t addr = tlb_entry->addr_write;
 399
 400     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
 401         addr &= TARGET_PAGE_MASK;
 402         addr += tlb_entry->addend;
 403         if ((addr - start) < length) {
 404 #if TCG_OVERSIZED_GUEST
 405             tlb_entry->addr_write |= TLB_NOTDIRTY;
 406 #else
 407             atomic_set(&tlb_entry->addr_write,
 408                        tlb_entry->addr_write | TLB_NOTDIRTY);
 409 #endif
 410         }
 411     }
 412 }
 413
 414 /*
 415  * Called with tlb_c.lock held.
 416  * Called only from the vCPU context, i.e. the TLB's owner thread.
 417  */
 418 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
 419 {
 420     *d = *s;
 421 }
 422
 423 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
 424  * the target vCPU).
 425  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
 426  * thing actually updated is the target TLB entry ->addr_write flags.
 427  */
 428 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
 429 {
 430     CPUArchState *env;
 431
 432     int mmu_idx;
 433
 434     env = cpu->env_ptr;
 435     qemu_spin_lock(&env->tlb_c.lock);
 436     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 437         unsigned int i;
 438
 439         for (i = 0; i < CPU_TLB_SIZE; i++) {
 440             tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
 441                                          length);
 442         }
 443
 444         for (i = 0; i < CPU_VTLB_SIZE; i++) {
 445             tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
 446                                          length);
 447         }
 448     }
 449     qemu_spin_unlock(&env->tlb_c.lock);
 450 }
 451
 452 /* Called with tlb_c.lock held */
 453 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
 454                                          target_ulong vaddr)
 455 {
 456     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
 457         tlb_entry->addr_write = vaddr;
 458     }
 459 }
 460
 461 /* update the TLB corresponding to virtual page vaddr
 462    so that it is no longer dirty */
 463 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 464 {
 465     CPUArchState *env = cpu->env_ptr;
 466     int mmu_idx;
 467
 468     assert_cpu_is_self(cpu);
 469
 470     vaddr &= TARGET_PAGE_MASK;
 471     qemu_spin_lock(&env->tlb_c.lock);
 472     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 473         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
 474     }
 475
 476     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 477         int k;
 478         for (k = 0; k < CPU_VTLB_SIZE; k++) {
 479             tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
 480         }
 481     }
 482     qemu_spin_unlock(&env->tlb_c.lock);
 483 }
 484
 485 /* Our TLB does not support large pages, so remember the area covered by
 486    large pages and trigger a full TLB flush if these are invalidated.  */
 487 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
 488                                target_ulong vaddr, target_ulong size)
 489 {
 490     target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr;
 491     target_ulong lp_mask = ~(size - 1);
 492
 493     if (lp_addr == (target_ulong)-1) {
 494         /* No previous large page.  */
 495         lp_addr = vaddr;
 496     } else {
 497         /* Extend the existing region to include the new page.
 498            This is a compromise between unnecessary flushes and
 499            the cost of maintaining a full variable size TLB.  */
 500         lp_mask &= env->tlb_d[mmu_idx].large_page_mask;
 501         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
 502             lp_mask <<= 1;
 503         }
 504     }
 505     env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask;
 506     env->tlb_d[mmu_idx].large_page_mask = lp_mask;
 507 }
 508
 509 /* Add a new TLB entry. At most one entry for a given virtual address
 510  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
 511  * supplied size is only used by tlb_flush_page.
 512  *
 513  * Called from TCG-generated code, which is under an RCU read-side
 514  * critical section.
 515  */
 516 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
 517                              hwaddr paddr, MemTxAttrs attrs, int prot,
 518                              int mmu_idx, target_ulong size)
 519 {
 520     CPUArchState *env = cpu->env_ptr;
 521     MemoryRegionSection *section;
 522     unsigned int index;
 523     target_ulong address;
 524     target_ulong code_address;
 525     uintptr_t addend;
 526     CPUTLBEntry *te, tn;
 527     hwaddr iotlb, xlat, sz, paddr_page;
 528     target_ulong vaddr_page;
 529     int asidx = cpu_asidx_from_attrs(cpu, attrs);
 530
 531     assert_cpu_is_self(cpu);
 532
 533     if (size <= TARGET_PAGE_SIZE) {
 534         sz = TARGET_PAGE_SIZE;
 535     } else {
 536         tlb_add_large_page(env, mmu_idx, vaddr, size);
 537         sz = size;
 538     }
 539     vaddr_page = vaddr & TARGET_PAGE_MASK;
 540     paddr_page = paddr & TARGET_PAGE_MASK;
 541
 542     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
 543                                                 &xlat, &sz, attrs, &prot);
 544     assert(sz >= TARGET_PAGE_SIZE);
 545
 546     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
 547               " prot=%x idx=%d\n",
 548               vaddr, paddr, prot, mmu_idx);
 549
 550     address = vaddr_page;
 551     if (size < TARGET_PAGE_SIZE) {
 552         /*
 553          * Slow-path the TLB entries; we will repeat the MMU check and TLB
 554          * fill on every access.
 555          */
 556         address |= TLB_RECHECK;
 557     }
 558     if (!memory_region_is_ram(section->mr) &&
 559         !memory_region_is_romd(section->mr)) {
 560         /* IO memory case */
 561         address |= TLB_MMIO;
 562         addend = 0;
 563     } else {
 564         /* TLB_MMIO for rom/romd handled below */
 565         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
 566     }
 567
 568     code_address = address;
 569     iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
 570                                             paddr_page, xlat, prot, &address);
 571
 572     index = tlb_index(env, mmu_idx, vaddr_page);
 573     te = tlb_entry(env, mmu_idx, vaddr_page);
 574
 575     /*
 576      * Hold the TLB lock for the rest of the function. We could acquire/release
 577      * the lock several times in the function, but it is faster to amortize the
 578      * acquisition cost by acquiring it just once. Note that this leads to
 579      * a longer critical section, but this is not a concern since the TLB lock
 580      * is unlikely to be contended.
 581      */
 582     qemu_spin_lock(&env->tlb_c.lock);
 583
 584     /* Note that the tlb is no longer clean.  */
 585     env->tlb_c.dirty |= 1 << mmu_idx;
 586
 587     /* Make sure there's no cached translation for the new page.  */
 588     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
 589
 590     /*
 591      * Only evict the old entry to the victim tlb if it's for a
 592      * different page; otherwise just overwrite the stale data.
 593      */
 594     if (!tlb_hit_page_anyprot(te, vaddr_page)) {
 595         unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE;
 596         CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
 597
 598         /* Evict the old entry into the victim tlb.  */
 599         copy_tlb_helper_locked(tv, te);
 600         env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
 601     }
 602
 603     /* refill the tlb */
 604     /*
 605      * At this point iotlb contains a physical section number in the lower
 606      * TARGET_PAGE_BITS, and either
 607      *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
 608      *  + the offset within section->mr of the page base (otherwise)
 609      * We subtract the vaddr_page (which is page aligned and thus won't
 610      * disturb the low bits) to give an offset which can be added to the
 611      * (non-page-aligned) vaddr of the eventual memory access to get
 612      * the MemoryRegion offset for the access. Note that the vaddr we
 613      * subtract here is that of the page base, and not the same as the
 614      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
 615      */
 616     env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page;
 617     env->iotlb[mmu_idx][index].attrs = attrs;
 618
 619     /* Now calculate the new entry */
 620     tn.addend = addend - vaddr_page;
 621     if (prot & PAGE_READ) {
 622         tn.addr_read = address;
 623     } else {
 624         tn.addr_read = -1;
 625     }
 626
 627     if (prot & PAGE_EXEC) {
 628         tn.addr_code = code_address;
 629     } else {
 630         tn.addr_code = -1;
 631     }
 632
 633     tn.addr_write = -1;
 634     if (prot & PAGE_WRITE) {
 635         if ((memory_region_is_ram(section->mr) && section->readonly)
 636             || memory_region_is_romd(section->mr)) {
 637             /* Write access calls the I/O callback.  */
 638             tn.addr_write = address | TLB_MMIO;
 639         } else if (memory_region_is_ram(section->mr)
 640                    && cpu_physical_memory_is_clean(
 641                        memory_region_get_ram_addr(section->mr) + xlat)) {
 642             tn.addr_write = address | TLB_NOTDIRTY;
 643         } else {
 644             tn.addr_write = address;
 645         }
 646         if (prot & PAGE_WRITE_INV) {
 647             tn.addr_write |= TLB_INVALID_MASK;
 648         }
 649     }
 650
 651     copy_tlb_helper_locked(te, &tn);
 652     qemu_spin_unlock(&env->tlb_c.lock);
 653 }
 654
 655 /* Add a new TLB entry, but without specifying the memory
 656  * transaction attributes to be used.
 657  */
 658 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
 659                   hwaddr paddr, int prot,
 660                   int mmu_idx, target_ulong size)
 661 {
 662     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
 663                             prot, mmu_idx, size);
 664 }
 665
 666 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
 667 {
 668     ram_addr_t ram_addr;
 669
 670     ram_addr = qemu_ram_addr_from_host(ptr);
 671     if (ram_addr == RAM_ADDR_INVALID) {
 672         error_report("Bad ram pointer %p", ptr);
 673         abort();
 674     }
 675     return ram_addr;
 676 }
 677
 678 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
 679                          int mmu_idx,
 680                          target_ulong addr, uintptr_t retaddr,
 681                          bool recheck, MMUAccessType access_type, int size)
 682 {
 683     CPUState *cpu = ENV_GET_CPU(env);
 684     hwaddr mr_offset;
 685     MemoryRegionSection *section;
 686     MemoryRegion *mr;
 687     uint64_t val;
 688     bool locked = false;
 689     MemTxResult r;
 690
 691     if (recheck) {
 692         /*
 693          * This is a TLB_RECHECK access, where the MMU protection
 694          * covers a smaller range than a target page, and we must
 695          * repeat the MMU check here. This tlb_fill() call might
 696          * longjump out if this access should cause a guest exception.
 697          */
 698         CPUTLBEntry *entry;
 699         target_ulong tlb_addr;
 700
 701         tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
 702
 703         entry = tlb_entry(env, mmu_idx, addr);
 704         tlb_addr = entry->addr_read;
 705         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
 706             /* RAM access */
 707             uintptr_t haddr = addr + entry->addend;
 708
 709             return ldn_p((void *)haddr, size);
 710         }
 711         /* Fall through for handling IO accesses */
 712     }
 713
 714     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
 715     mr = section->mr;
 716     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
 717     cpu->mem_io_pc = retaddr;
 718     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
 719         cpu_io_recompile(cpu, retaddr);
 720     }
 721
 722     cpu->mem_io_vaddr = addr;
 723     cpu->mem_io_access_type = access_type;
 724
 725     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
 726         qemu_mutex_lock_iothread();
 727         locked = true;
 728     }
 729     r = memory_region_dispatch_read(mr, mr_offset,
 730                                     &val, size, iotlbentry->attrs);
 731     if (r != MEMTX_OK) {
 732         hwaddr physaddr = mr_offset +
 733             section->offset_within_address_space -
 734             section->offset_within_region;
 735
 736         cpu_transaction_failed(cpu, physaddr, addr, size, access_type,
 737                                mmu_idx, iotlbentry->attrs, r, retaddr);
 738     }
 739     if (locked) {
 740         qemu_mutex_unlock_iothread();
 741     }
 742
 743     return val;
 744 }
 745
 746 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
 747                       int mmu_idx,
 748                       uint64_t val, target_ulong addr,
 749                       uintptr_t retaddr, bool recheck, int size)
 750 {
 751     CPUState *cpu = ENV_GET_CPU(env);
 752     hwaddr mr_offset;
 753     MemoryRegionSection *section;
 754     MemoryRegion *mr;
 755     bool locked = false;
 756     MemTxResult r;
 757
 758     if (recheck) {
 759         /*
 760          * This is a TLB_RECHECK access, where the MMU protection
 761          * covers a smaller range than a target page, and we must
 762          * repeat the MMU check here. This tlb_fill() call might
 763          * longjump out if this access should cause a guest exception.
 764          */
 765         CPUTLBEntry *entry;
 766         target_ulong tlb_addr;
 767
 768         tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
 769
 770         entry = tlb_entry(env, mmu_idx, addr);
 771         tlb_addr = tlb_addr_write(entry);
 772         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
 773             /* RAM access */
 774             uintptr_t haddr = addr + entry->addend;
 775
 776             stn_p((void *)haddr, size, val);
 777             return;
 778         }
 779         /* Fall through for handling IO accesses */
 780     }
 781
 782     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
 783     mr = section->mr;
 784     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
 785     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
 786         cpu_io_recompile(cpu, retaddr);
 787     }
 788     cpu->mem_io_vaddr = addr;
 789     cpu->mem_io_pc = retaddr;
 790
 791     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
 792         qemu_mutex_lock_iothread();
 793         locked = true;
 794     }
 795     r = memory_region_dispatch_write(mr, mr_offset,
 796                                      val, size, iotlbentry->attrs);
 797     if (r != MEMTX_OK) {
 798         hwaddr physaddr = mr_offset +
 799             section->offset_within_address_space -
 800             section->offset_within_region;
 801
 802         cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
 803                                mmu_idx, iotlbentry->attrs, r, retaddr);
 804     }
 805     if (locked) {
 806         qemu_mutex_unlock_iothread();
 807     }
 808 }
 809
 810 /* Return true if ADDR is present in the victim tlb, and has been copied
 811    back to the main tlb.  */
 812 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
 813                            size_t elt_ofs, target_ulong page)
 814 {
 815     size_t vidx;
 816
 817     assert_cpu_is_self(ENV_GET_CPU(env));
 818     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
 819         CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
 820         target_ulong cmp;
 821
 822         /* elt_ofs might correspond to .addr_write, so use atomic_read */
 823 #if TCG_OVERSIZED_GUEST
 824         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
 825 #else
 826         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
 827 #endif
 828
 829         if (cmp == page) {
 830             /* Found entry in victim tlb, swap tlb and iotlb.  */
 831             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
 832
 833             qemu_spin_lock(&env->tlb_c.lock);
 834             copy_tlb_helper_locked(&tmptlb, tlb);
 835             copy_tlb_helper_locked(tlb, vtlb);
 836             copy_tlb_helper_locked(vtlb, &tmptlb);
 837             qemu_spin_unlock(&env->tlb_c.lock);
 838
 839             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
 840             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
 841             tmpio = *io; *io = *vio; *vio = tmpio;
 842             return true;
 843         }
 844     }
 845     return false;
 846 }
 847
 848 /* Macro to call the above, with local variables from the use context.  */
 849 #define VICTIM_TLB_HIT(TY, ADDR) \
 850   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
 851                  (ADDR) & TARGET_PAGE_MASK)
 852
 853 /* NOTE: this function can trigger an exception */
 854 /* NOTE2: the returned address is not exactly the physical address: it
 855  * is actually a ram_addr_t (in system mode; the user mode emulation
 856  * version of this function returns a guest virtual address).
 857  */
 858 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
 859 {
 860     uintptr_t mmu_idx = cpu_mmu_index(env, true);
 861     uintptr_t index = tlb_index(env, mmu_idx, addr);
 862     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
 863     void *p;
 864
 865     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
 866         if (!VICTIM_TLB_HIT(addr_code, addr)) {
 867             tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
 868         }
 869         assert(tlb_hit(entry->addr_code, addr));
 870     }
 871
 872     if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
 873         /*
 874          * Return -1 if we can't translate and execute from an entire
 875          * page of RAM here, which will cause us to execute by loading
 876          * and translating one insn at a time, without caching:
 877          *  - TLB_RECHECK: means the MMU protection covers a smaller range
 878          *    than a target page, so we must redo the MMU check every insn
 879          *  - TLB_MMIO: region is not backed by RAM
 880          */
 881         return -1;
 882     }
 883
 884     p = (void *)((uintptr_t)addr + entry->addend);
 885     return qemu_ram_addr_from_host_nofail(p);
 886 }
 887
 888 /* Probe for whether the specified guest write access is permitted.
 889  * If it is not permitted then an exception will be taken in the same
 890  * way as if this were a real write access (and we will not return).
 891  * Otherwise the function will return, and there will be a valid
 892  * entry in the TLB for this access.
 893  */
 894 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
 895                  uintptr_t retaddr)
 896 {
 897     uintptr_t index = tlb_index(env, mmu_idx, addr);
 898     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
 899
 900     if (!tlb_hit(tlb_addr_write(entry), addr)) {
 901         /* TLB entry is for a different page */
 902         if (!VICTIM_TLB_HIT(addr_write, addr)) {
 903             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
 904                      mmu_idx, retaddr);
 905         }
 906     }
 907 }
 908
 909 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
 910  * operations, or io operations to proceed.  Return the host address.  */
 911 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 912                                TCGMemOpIdx oi, uintptr_t retaddr,
 913                                NotDirtyInfo *ndi)
 914 {
 915     size_t mmu_idx = get_mmuidx(oi);
 916     uintptr_t index = tlb_index(env, mmu_idx, addr);
 917     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
 918     target_ulong tlb_addr = tlb_addr_write(tlbe);
 919     TCGMemOp mop = get_memop(oi);
 920     int a_bits = get_alignment_bits(mop);
 921     int s_bits = mop & MO_SIZE;
 922     void *hostaddr;
 923
 924     /* Adjust the given return address.  */
 925     retaddr -= GETPC_ADJ;
 926
 927     /* Enforce guest required alignment.  */
 928     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
 929         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
 930         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
 931                              mmu_idx, retaddr);
 932     }
 933
 934     /* Enforce qemu required alignment.  */
 935     if (unlikely(addr & ((1 << s_bits) - 1))) {
 936         /* We get here if guest alignment was not requested,
 937            or was not enforced by cpu_unaligned_access above.
 938            We might widen the access and emulate, but for now
 939            mark an exception and exit the cpu loop.  */
 940         goto stop_the_world;
 941     }
 942
 943     /* Check TLB entry and enforce page permissions.  */
 944     if (!tlb_hit(tlb_addr, addr)) {
 945         if (!VICTIM_TLB_HIT(addr_write, addr)) {
 946             tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
 947                      mmu_idx, retaddr);
 948         }
 949         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
 950     }
 951
 952     /* Notice an IO access or a needs-MMU-lookup access */
 953     if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
 954         /* There's really nothing that can be done to
 955            support this apart from stop-the-world.  */
 956         goto stop_the_world;
 957     }
 958
 959     /* Let the guest notice RMW on a write-only page.  */
 960     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
 961         tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_LOAD,
 962                  mmu_idx, retaddr);
 963         /* Since we don't support reads and writes to different addresses,
 964            and we do have the proper page loaded for write, this shouldn't
 965            ever return.  But just in case, handle via stop-the-world.  */
 966         goto stop_the_world;
 967     }
 968
 969     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
 970
 971     ndi->active = false;
 972     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
 973         ndi->active = true;
 974         memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr,
 975                                       qemu_ram_addr_from_host_nofail(hostaddr),
 976                                       1 << s_bits);
 977     }
 978
 979     return hostaddr;
 980
 981  stop_the_world:
 982     cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
 983 }
 984
 985 #ifdef TARGET_WORDS_BIGENDIAN
 986 # define TGT_BE(X)  (X)
 987 # define TGT_LE(X)  BSWAP(X)
 988 #else
 989 # define TGT_BE(X)  BSWAP(X)
 990 # define TGT_LE(X)  (X)
 991 #endif
 992
 993 #define MMUSUFFIX _mmu
 994
 995 #define DATA_SIZE 1
 996 #include "softmmu_template.h"
 997
 998 #define DATA_SIZE 2
 999 #include "softmmu_template.h"
1000
1001 #define DATA_SIZE 4
1002 #include "softmmu_template.h"
1003
1004 #define DATA_SIZE 8
1005 #include "softmmu_template.h"
1006
1007 /* First set of helpers allows passing in of OI and RETADDR.  This makes
1008    them callable from other helpers.  */
1009
1010 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
1011 #define ATOMIC_NAME(X) \
1012     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1013 #define ATOMIC_MMU_DECLS NotDirtyInfo ndi
1014 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
1015 #define ATOMIC_MMU_CLEANUP                              \
1016     do {                                                \
1017         if (unlikely(ndi.active)) {                     \
1018             memory_notdirty_write_complete(&ndi);       \
1019         }                                               \
1020     } while (0)
1021
1022 #define DATA_SIZE 1
1023 #include "atomic_template.h"
1024
1025 #define DATA_SIZE 2
1026 #include "atomic_template.h"
1027
1028 #define DATA_SIZE 4
1029 #include "atomic_template.h"
1030
1031 #ifdef CONFIG_ATOMIC64
1032 #define DATA_SIZE 8
1033 #include "atomic_template.h"
1034 #endif
1035
1036 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1037 #define DATA_SIZE 16
1038 #include "atomic_template.h"
1039 #endif
1040
1041 /* Second set of helpers are directly callable from TCG as helpers.  */
1042
1043 #undef EXTRA_ARGS
1044 #undef ATOMIC_NAME
1045 #undef ATOMIC_MMU_LOOKUP
1046 #define EXTRA_ARGS         , TCGMemOpIdx oi
1047 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1048 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
1049
1050 #define DATA_SIZE 1
1051 #include "atomic_template.h"
1052
1053 #define DATA_SIZE 2
1054 #include "atomic_template.h"
1055
1056 #define DATA_SIZE 4
1057 #include "atomic_template.h"
1058
1059 #ifdef CONFIG_ATOMIC64
1060 #define DATA_SIZE 8
1061 #include "atomic_template.h"
1062 #endif
1063
1064 /* Code access functions.  */
1065
1066 #undef MMUSUFFIX
1067 #define MMUSUFFIX _cmmu
1068 #undef GETPC
1069 #define GETPC() ((uintptr_t)0)
1070 #define SOFTMMU_CODE_ACCESS
1071
1072 #define DATA_SIZE 1
1073 #include "softmmu_template.h"
1074
1075 #define DATA_SIZE 2
1076 #include "softmmu_template.h"
1077
1078 #define DATA_SIZE 4
1079 #include "softmmu_template.h"
1080
1081 #define DATA_SIZE 8
1082 #include "softmmu_template.h"