exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20 #include "qapi/error.h"
  21 #ifndef _WIN32
  22 #endif
  23
  24 #include "qemu/cutils.h"
  25 #include "cpu.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg.h"
  28 #include "hw/qdev-core.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #include "hw/xen/xen.h"
  32 #endif
  33 #include "sysemu/kvm.h"
  34 #include "sysemu/sysemu.h"
  35 #include "qemu/timer.h"
  36 #include "qemu/config-file.h"
  37 #include "qemu/error-report.h"
  38 #if defined(CONFIG_USER_ONLY)
  39 #include "qemu.h"
  40 #else /* !CONFIG_USER_ONLY */
  41 #include "hw/hw.h"
  42 #include "exec/memory.h"
  43 #include "exec/ioport.h"
  44 #include "sysemu/dma.h"
  45 #include "exec/address-spaces.h"
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "translate-all.h"
  53 #include "sysemu/replay.h"
  54
  55 #include "exec/memory-internal.h"
  56 #include "exec/ram_addr.h"
  57 #include "exec/log.h"
  58
  59 #include "migration/vmstate.h"
  60
  61 #include "qemu/range.h"
  62 #ifndef _WIN32
  63 #include "qemu/mmap-alloc.h"
  64 #endif
  65
  66 //#define DEBUG_SUBPAGE
  67
  68 #if !defined(CONFIG_USER_ONLY)
  69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  70  * are protected by the ramlist lock.
  71  */
  72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  73
  74 static MemoryRegion *system_memory;
  75 static MemoryRegion *system_io;
  76
  77 AddressSpace address_space_io;
  78 AddressSpace address_space_memory;
  79
  80 MemoryRegion io_mem_rom, io_mem_notdirty;
  81 static MemoryRegion io_mem_unassigned;
  82
  83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  84 #define RAM_PREALLOC   (1 << 0)
  85
  86 /* RAM is mmap-ed with MAP_SHARED */
  87 #define RAM_SHARED     (1 << 1)
  88
  89 /* Only a portion of RAM (used_length) is actually used, and migrated.
  90  * This used_length size can change across reboots.
  91  */
  92 #define RAM_RESIZEABLE (1 << 2)
  93
  94 #endif
  95
  96 #ifdef TARGET_PAGE_BITS_VARY
  97 int target_page_bits;
  98 bool target_page_bits_decided;
  99 #endif
 100
 101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 102 /* current CPU in the current thread. It is only valid inside
 103    cpu_exec() */
 104 __thread CPUState *current_cpu;
 105 /* 0 = Do not count executed instructions.
 106    1 = Precise instruction counting.
 107    2 = Adaptive rate instruction counting.  */
 108 int use_icount;
 109
 110 bool set_preferred_target_page_bits(int bits)
 111 {
 112     /* The target page size is the lowest common denominator for all
 113      * the CPUs in the system, so we can only make it smaller, never
 114      * larger. And we can't make it smaller once we've committed to
 115      * a particular size.
 116      */
 117 #ifdef TARGET_PAGE_BITS_VARY
 118     assert(bits >= TARGET_PAGE_BITS_MIN);
 119     if (target_page_bits == 0 || target_page_bits > bits) {
 120         if (target_page_bits_decided) {
 121             return false;
 122         }
 123         target_page_bits = bits;
 124     }
 125 #endif
 126     return true;
 127 }
 128
 129 #if !defined(CONFIG_USER_ONLY)
 130
 131 static void finalize_target_page_bits(void)
 132 {
 133 #ifdef TARGET_PAGE_BITS_VARY
 134     if (target_page_bits == 0) {
 135         target_page_bits = TARGET_PAGE_BITS_MIN;
 136     }
 137     target_page_bits_decided = true;
 138 #endif
 139 }
 140
 141 typedef struct PhysPageEntry PhysPageEntry;
 142
 143 struct PhysPageEntry {
 144     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 145     uint32_t skip : 6;
 146      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 147     uint32_t ptr : 26;
 148 };
 149
 150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 151
 152 /* Size of the L2 (and L3, etc) page tables.  */
 153 #define ADDR_SPACE_BITS 64
 154
 155 #define P_L2_BITS 9
 156 #define P_L2_SIZE (1 << P_L2_BITS)
 157
 158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 159
 160 typedef PhysPageEntry Node[P_L2_SIZE];
 161
 162 typedef struct PhysPageMap {
 163     struct rcu_head rcu;
 164
 165     unsigned sections_nb;
 166     unsigned sections_nb_alloc;
 167     unsigned nodes_nb;
 168     unsigned nodes_nb_alloc;
 169     Node *nodes;
 170     MemoryRegionSection *sections;
 171 } PhysPageMap;
 172
 173 struct AddressSpaceDispatch {
 174     struct rcu_head rcu;
 175
 176     MemoryRegionSection *mru_section;
 177     /* This is a multi-level map on the physical address space.
 178      * The bottom level has pointers to MemoryRegionSections.
 179      */
 180     PhysPageEntry phys_map;
 181     PhysPageMap map;
 182     AddressSpace *as;
 183 };
 184
 185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 186 typedef struct subpage_t {
 187     MemoryRegion iomem;
 188     AddressSpace *as;
 189     hwaddr base;
 190     uint16_t sub_section[];
 191 } subpage_t;
 192
 193 #define PHYS_SECTION_UNASSIGNED 0
 194 #define PHYS_SECTION_NOTDIRTY 1
 195 #define PHYS_SECTION_ROM 2
 196 #define PHYS_SECTION_WATCH 3
 197
 198 static void io_mem_init(void);
 199 static void memory_map_init(void);
 200 static void tcg_commit(MemoryListener *listener);
 201
 202 static MemoryRegion io_mem_watch;
 203
 204 /**
 205  * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 206  * @cpu: the CPU whose AddressSpace this is
 207  * @as: the AddressSpace itself
 208  * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 209  * @tcg_as_listener: listener for tracking changes to the AddressSpace
 210  */
 211 struct CPUAddressSpace {
 212     CPUState *cpu;
 213     AddressSpace *as;
 214     struct AddressSpaceDispatch *memory_dispatch;
 215     MemoryListener tcg_as_listener;
 216 };
 217
 218 #endif
 219
 220 #if !defined(CONFIG_USER_ONLY)
 221
 222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 223 {
 224     static unsigned alloc_hint = 16;
 225     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 226         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
 227         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 228         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 229         alloc_hint = map->nodes_nb_alloc;
 230     }
 231 }
 232
 233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 234 {
 235     unsigned i;
 236     uint32_t ret;
 237     PhysPageEntry e;
 238     PhysPageEntry *p;
 239
 240     ret = map->nodes_nb++;
 241     p = map->nodes[ret];
 242     assert(ret != PHYS_MAP_NODE_NIL);
 243     assert(ret != map->nodes_nb_alloc);
 244
 245     e.skip = leaf ? 0 : 1;
 246     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 247     for (i = 0; i < P_L2_SIZE; ++i) {
 248         memcpy(&p[i], &e, sizeof(e));
 249     }
 250     return ret;
 251 }
 252
 253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 254                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 255                                 int level)
 256 {
 257     PhysPageEntry *p;
 258     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 259
 260     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 261         lp->ptr = phys_map_node_alloc(map, level == 0);
 262     }
 263     p = map->nodes[lp->ptr];
 264     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 265
 266     while (*nb && lp < &p[P_L2_SIZE]) {
 267         if ((*index & (step - 1)) == 0 && *nb >= step) {
 268             lp->skip = 0;
 269             lp->ptr = leaf;
 270             *index += step;
 271             *nb -= step;
 272         } else {
 273             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 274         }
 275         ++lp;
 276     }
 277 }
 278
 279 static void phys_page_set(AddressSpaceDispatch *d,
 280                           hwaddr index, hwaddr nb,
 281                           uint16_t leaf)
 282 {
 283     /* Wildly overreserve - it doesn't matter much. */
 284     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 285
 286     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 287 }
 288
 289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 290  * and update our entry so we can skip it and go directly to the destination.
 291  */
 292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
 293 {
 294     unsigned valid_ptr = P_L2_SIZE;
 295     int valid = 0;
 296     PhysPageEntry *p;
 297     int i;
 298
 299     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 300         return;
 301     }
 302
 303     p = nodes[lp->ptr];
 304     for (i = 0; i < P_L2_SIZE; i++) {
 305         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 306             continue;
 307         }
 308
 309         valid_ptr = i;
 310         valid++;
 311         if (p[i].skip) {
 312             phys_page_compact(&p[i], nodes);
 313         }
 314     }
 315
 316     /* We can only compress if there's only one child. */
 317     if (valid != 1) {
 318         return;
 319     }
 320
 321     assert(valid_ptr < P_L2_SIZE);
 322
 323     /* Don't compress if it won't fit in the # of bits we have. */
 324     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 325         return;
 326     }
 327
 328     lp->ptr = p[valid_ptr].ptr;
 329     if (!p[valid_ptr].skip) {
 330         /* If our only child is a leaf, make this a leaf. */
 331         /* By design, we should have made this node a leaf to begin with so we
 332          * should never reach here.
 333          * But since it's so simple to handle this, let's do it just in case we
 334          * change this rule.
 335          */
 336         lp->skip = 0;
 337     } else {
 338         lp->skip += p[valid_ptr].skip;
 339     }
 340 }
 341
 342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 343 {
 344     if (d->phys_map.skip) {
 345         phys_page_compact(&d->phys_map, d->map.nodes);
 346     }
 347 }
 348
 349 static inline bool section_covers_addr(const MemoryRegionSection *section,
 350                                        hwaddr addr)
 351 {
 352     /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 353      * the section must cover the entire address space.
 354      */
 355     return int128_gethi(section->size) ||
 356            range_covers_byte(section->offset_within_address_space,
 357                              int128_getlo(section->size), addr);
 358 }
 359
 360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 361                                            Node *nodes, MemoryRegionSection *sections)
 362 {
 363     PhysPageEntry *p;
 364     hwaddr index = addr >> TARGET_PAGE_BITS;
 365     int i;
 366
 367     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 368         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 369             return &sections[PHYS_SECTION_UNASSIGNED];
 370         }
 371         p = nodes[lp.ptr];
 372         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 373     }
 374
 375     if (section_covers_addr(&sections[lp.ptr], addr)) {
 376         return &sections[lp.ptr];
 377     } else {
 378         return &sections[PHYS_SECTION_UNASSIGNED];
 379     }
 380 }
 381
 382 bool memory_region_is_unassigned(MemoryRegion *mr)
 383 {
 384     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 385         && mr != &io_mem_watch;
 386 }
 387
 388 /* Called from RCU critical section */
 389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 390                                                         hwaddr addr,
 391                                                         bool resolve_subpage)
 392 {
 393     MemoryRegionSection *section = atomic_read(&d->mru_section);
 394     subpage_t *subpage;
 395     bool update;
 396
 397     if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 398         section_covers_addr(section, addr)) {
 399         update = false;
 400     } else {
 401         section = phys_page_find(d->phys_map, addr, d->map.nodes,
 402                                  d->map.sections);
 403         update = true;
 404     }
 405     if (resolve_subpage && section->mr->subpage) {
 406         subpage = container_of(section->mr, subpage_t, iomem);
 407         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 408     }
 409     if (update) {
 410         atomic_set(&d->mru_section, section);
 411     }
 412     return section;
 413 }
 414
 415 /* Called from RCU critical section */
 416 static MemoryRegionSection *
 417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 418                                  hwaddr *plen, bool resolve_subpage)
 419 {
 420     MemoryRegionSection *section;
 421     MemoryRegion *mr;
 422     Int128 diff;
 423
 424     section = address_space_lookup_region(d, addr, resolve_subpage);
 425     /* Compute offset within MemoryRegionSection */
 426     addr -= section->offset_within_address_space;
 427
 428     /* Compute offset within MemoryRegion */
 429     *xlat = addr + section->offset_within_region;
 430
 431     mr = section->mr;
 432
 433     /* MMIO registers can be expected to perform full-width accesses based only
 434      * on their address, without considering adjacent registers that could
 435      * decode to completely different MemoryRegions.  When such registers
 436      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 437      * regions overlap wildly.  For this reason we cannot clamp the accesses
 438      * here.
 439      *
 440      * If the length is small (as is the case for address_space_ldl/stl),
 441      * everything works fine.  If the incoming length is large, however,
 442      * the caller really has to do the clamping through memory_access_size.
 443      */
 444     if (memory_region_is_ram(mr)) {
 445         diff = int128_sub(section->size, int128_make64(addr));
 446         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 447     }
 448     return section;
 449 }
 450
 451 /* Called from RCU critical section */
 452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 453                                       hwaddr *xlat, hwaddr *plen,
 454                                       bool is_write)
 455 {
 456     IOMMUTLBEntry iotlb;
 457     MemoryRegionSection *section;
 458     MemoryRegion *mr;
 459
 460     for (;;) {
 461         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 462         section = address_space_translate_internal(d, addr, &addr, plen, true);
 463         mr = section->mr;
 464
 465         if (!mr->iommu_ops) {
 466             break;
 467         }
 468
 469         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 470         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 471                 | (addr & iotlb.addr_mask));
 472         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 473         if (!(iotlb.perm & (1 << is_write))) {
 474             mr = &io_mem_unassigned;
 475             break;
 476         }
 477
 478         as = iotlb.target_as;
 479     }
 480
 481     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 482         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 483         *plen = MIN(page, *plen);
 484     }
 485
 486     *xlat = addr;
 487     return mr;
 488 }
 489
 490 /* Called from RCU critical section */
 491 MemoryRegionSection *
 492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 493                                   hwaddr *xlat, hwaddr *plen)
 494 {
 495     MemoryRegionSection *section;
 496     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 497
 498     section = address_space_translate_internal(d, addr, xlat, plen, false);
 499
 500     assert(!section->mr->iommu_ops);
 501     return section;
 502 }
 503 #endif
 504
 505 #if !defined(CONFIG_USER_ONLY)
 506
 507 static int cpu_common_post_load(void *opaque, int version_id)
 508 {
 509     CPUState *cpu = opaque;
 510
 511     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 512        version_id is increased. */
 513     cpu->interrupt_request &= ~0x01;
 514     tlb_flush(cpu, 1);
 515
 516     return 0;
 517 }
 518
 519 static int cpu_common_pre_load(void *opaque)
 520 {
 521     CPUState *cpu = opaque;
 522
 523     cpu->exception_index = -1;
 524
 525     return 0;
 526 }
 527
 528 static bool cpu_common_exception_index_needed(void *opaque)
 529 {
 530     CPUState *cpu = opaque;
 531
 532     return tcg_enabled() && cpu->exception_index != -1;
 533 }
 534
 535 static const VMStateDescription vmstate_cpu_common_exception_index = {
 536     .name = "cpu_common/exception_index",
 537     .version_id = 1,
 538     .minimum_version_id = 1,
 539     .needed = cpu_common_exception_index_needed,
 540     .fields = (VMStateField[]) {
 541         VMSTATE_INT32(exception_index, CPUState),
 542         VMSTATE_END_OF_LIST()
 543     }
 544 };
 545
 546 static bool cpu_common_crash_occurred_needed(void *opaque)
 547 {
 548     CPUState *cpu = opaque;
 549
 550     return cpu->crash_occurred;
 551 }
 552
 553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 554     .name = "cpu_common/crash_occurred",
 555     .version_id = 1,
 556     .minimum_version_id = 1,
 557     .needed = cpu_common_crash_occurred_needed,
 558     .fields = (VMStateField[]) {
 559         VMSTATE_BOOL(crash_occurred, CPUState),
 560         VMSTATE_END_OF_LIST()
 561     }
 562 };
 563
 564 const VMStateDescription vmstate_cpu_common = {
 565     .name = "cpu_common",
 566     .version_id = 1,
 567     .minimum_version_id = 1,
 568     .pre_load = cpu_common_pre_load,
 569     .post_load = cpu_common_post_load,
 570     .fields = (VMStateField[]) {
 571         VMSTATE_UINT32(halted, CPUState),
 572         VMSTATE_UINT32(interrupt_request, CPUState),
 573         VMSTATE_END_OF_LIST()
 574     },
 575     .subsections = (const VMStateDescription*[]) {
 576         &vmstate_cpu_common_exception_index,
 577         &vmstate_cpu_common_crash_occurred,
 578         NULL
 579     }
 580 };
 581
 582 #endif
 583
 584 CPUState *qemu_get_cpu(int index)
 585 {
 586     CPUState *cpu;
 587
 588     CPU_FOREACH(cpu) {
 589         if (cpu->cpu_index == index) {
 590             return cpu;
 591         }
 592     }
 593
 594     return NULL;
 595 }
 596
 597 #if !defined(CONFIG_USER_ONLY)
 598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 599 {
 600     CPUAddressSpace *newas;
 601
 602     /* Target code should have set num_ases before calling us */
 603     assert(asidx < cpu->num_ases);
 604
 605     if (asidx == 0) {
 606         /* address space 0 gets the convenience alias */
 607         cpu->as = as;
 608     }
 609
 610     /* KVM cannot currently support multiple address spaces. */
 611     assert(asidx == 0 || !kvm_enabled());
 612
 613     if (!cpu->cpu_ases) {
 614         cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 615     }
 616
 617     newas = &cpu->cpu_ases[asidx];
 618     newas->cpu = cpu;
 619     newas->as = as;
 620     if (tcg_enabled()) {
 621         newas->tcg_as_listener.commit = tcg_commit;
 622         memory_listener_register(&newas->tcg_as_listener, as);
 623     }
 624 }
 625
 626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 627 {
 628     /* Return the AddressSpace corresponding to the specified index */
 629     return cpu->cpu_ases[asidx].as;
 630 }
 631 #endif
 632
 633 void cpu_exec_unrealizefn(CPUState *cpu)
 634 {
 635     CPUClass *cc = CPU_GET_CLASS(cpu);
 636
 637     cpu_list_remove(cpu);
 638
 639     if (cc->vmsd != NULL) {
 640         vmstate_unregister(NULL, cc->vmsd, cpu);
 641     }
 642     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 643         vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
 644     }
 645 }
 646
 647 void cpu_exec_initfn(CPUState *cpu)
 648 {
 649     cpu->as = NULL;
 650     cpu->num_ases = 0;
 651
 652 #ifndef CONFIG_USER_ONLY
 653     cpu->thread_id = qemu_get_thread_id();
 654
 655     /* This is a softmmu CPU object, so create a property for it
 656      * so users can wire up its memory. (This can't go in qom/cpu.c
 657      * because that file is compiled only once for both user-mode
 658      * and system builds.) The default if no link is set up is to use
 659      * the system address space.
 660      */
 661     object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 662                              (Object **)&cpu->memory,
 663                              qdev_prop_allow_set_link_before_realize,
 664                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
 665                              &error_abort);
 666     cpu->memory = system_memory;
 667     object_ref(OBJECT(cpu->memory));
 668 #endif
 669 }
 670
 671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 672 {
 673     CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
 674
 675     cpu_list_add(cpu);
 676
 677 #ifndef CONFIG_USER_ONLY
 678     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 679         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
 680     }
 681     if (cc->vmsd != NULL) {
 682         vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
 683     }
 684 #endif
 685 }
 686
 687 #if defined(CONFIG_USER_ONLY)
 688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 689 {
 690     mmap_lock();
 691     tb_lock();
 692     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 693     tb_unlock();
 694     mmap_unlock();
 695 }
 696 #else
 697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 698 {
 699     MemTxAttrs attrs;
 700     hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
 701     int asidx = cpu_asidx_from_attrs(cpu, attrs);
 702     if (phys != -1) {
 703         /* Locks grabbed by tb_invalidate_phys_addr */
 704         tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
 705                                 phys | (pc & ~TARGET_PAGE_MASK));
 706     }
 707 }
 708 #endif
 709
 710 #if defined(CONFIG_USER_ONLY)
 711 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 712
 713 {
 714 }
 715
 716 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 717                           int flags)
 718 {
 719     return -ENOSYS;
 720 }
 721
 722 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 723 {
 724 }
 725
 726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 727                           int flags, CPUWatchpoint **watchpoint)
 728 {
 729     return -ENOSYS;
 730 }
 731 #else
 732 /* Add a watchpoint.  */
 733 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 734                           int flags, CPUWatchpoint **watchpoint)
 735 {
 736     CPUWatchpoint *wp;
 737
 738     /* forbid ranges which are empty or run off the end of the address space */
 739     if (len == 0 || (addr + len - 1) < addr) {
 740         error_report("tried to set invalid watchpoint at %"
 741                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 742         return -EINVAL;
 743     }
 744     wp = g_malloc(sizeof(*wp));
 745
 746     wp->vaddr = addr;
 747     wp->len = len;
 748     wp->flags = flags;
 749
 750     /* keep all GDB-injected watchpoints in front */
 751     if (flags & BP_GDB) {
 752         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 753     } else {
 754         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 755     }
 756
 757     tlb_flush_page(cpu, addr);
 758
 759     if (watchpoint)
 760         *watchpoint = wp;
 761     return 0;
 762 }
 763
 764 /* Remove a specific watchpoint.  */
 765 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 766                           int flags)
 767 {
 768     CPUWatchpoint *wp;
 769
 770     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 771         if (addr == wp->vaddr && len == wp->len
 772                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 773             cpu_watchpoint_remove_by_ref(cpu, wp);
 774             return 0;
 775         }
 776     }
 777     return -ENOENT;
 778 }
 779
 780 /* Remove a specific watchpoint by reference.  */
 781 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 782 {
 783     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 784
 785     tlb_flush_page(cpu, watchpoint->vaddr);
 786
 787     g_free(watchpoint);
 788 }
 789
 790 /* Remove all matching watchpoints.  */
 791 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 792 {
 793     CPUWatchpoint *wp, *next;
 794
 795     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 796         if (wp->flags & mask) {
 797             cpu_watchpoint_remove_by_ref(cpu, wp);
 798         }
 799     }
 800 }
 801
 802 /* Return true if this watchpoint address matches the specified
 803  * access (ie the address range covered by the watchpoint overlaps
 804  * partially or completely with the address range covered by the
 805  * access).
 806  */
 807 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 808                                                   vaddr addr,
 809                                                   vaddr len)
 810 {
 811     /* We know the lengths are non-zero, but a little caution is
 812      * required to avoid errors in the case where the range ends
 813      * exactly at the top of the address space and so addr + len
 814      * wraps round to zero.
 815      */
 816     vaddr wpend = wp->vaddr + wp->len - 1;
 817     vaddr addrend = addr + len - 1;
 818
 819     return !(addr > wpend || wp->vaddr > addrend);
 820 }
 821
 822 #endif
 823
 824 /* Add a breakpoint.  */
 825 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 826                           CPUBreakpoint **breakpoint)
 827 {
 828     CPUBreakpoint *bp;
 829
 830     bp = g_malloc(sizeof(*bp));
 831
 832     bp->pc = pc;
 833     bp->flags = flags;
 834
 835     /* keep all GDB-injected breakpoints in front */
 836     if (flags & BP_GDB) {
 837         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 838     } else {
 839         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 840     }
 841
 842     breakpoint_invalidate(cpu, pc);
 843
 844     if (breakpoint) {
 845         *breakpoint = bp;
 846     }
 847     return 0;
 848 }
 849
 850 /* Remove a specific breakpoint.  */
 851 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 852 {
 853     CPUBreakpoint *bp;
 854
 855     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 856         if (bp->pc == pc && bp->flags == flags) {
 857             cpu_breakpoint_remove_by_ref(cpu, bp);
 858             return 0;
 859         }
 860     }
 861     return -ENOENT;
 862 }
 863
 864 /* Remove a specific breakpoint by reference.  */
 865 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 866 {
 867     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 868
 869     breakpoint_invalidate(cpu, breakpoint->pc);
 870
 871     g_free(breakpoint);
 872 }
 873
 874 /* Remove all matching breakpoints. */
 875 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 876 {
 877     CPUBreakpoint *bp, *next;
 878
 879     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 880         if (bp->flags & mask) {
 881             cpu_breakpoint_remove_by_ref(cpu, bp);
 882         }
 883     }
 884 }
 885
 886 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 887    CPU loop after each instruction */
 888 void cpu_single_step(CPUState *cpu, int enabled)
 889 {
 890     if (cpu->singlestep_enabled != enabled) {
 891         cpu->singlestep_enabled = enabled;
 892         if (kvm_enabled()) {
 893             kvm_update_guest_debug(cpu, 0);
 894         } else {
 895             /* must flush all the translated code to avoid inconsistencies */
 896             /* XXX: only flush what is necessary */
 897             tb_flush(cpu);
 898         }
 899     }
 900 }
 901
 902 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 903 {
 904     va_list ap;
 905     va_list ap2;
 906
 907     va_start(ap, fmt);
 908     va_copy(ap2, ap);
 909     fprintf(stderr, "qemu: fatal: ");
 910     vfprintf(stderr, fmt, ap);
 911     fprintf(stderr, "\n");
 912     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 913     if (qemu_log_separate()) {
 914         qemu_log("qemu: fatal: ");
 915         qemu_log_vprintf(fmt, ap2);
 916         qemu_log("\n");
 917         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 918         qemu_log_flush();
 919         qemu_log_close();
 920     }
 921     va_end(ap2);
 922     va_end(ap);
 923     replay_finish();
 924 #if defined(CONFIG_USER_ONLY)
 925     {
 926         struct sigaction act;
 927         sigfillset(&act.sa_mask);
 928         act.sa_handler = SIG_DFL;
 929         sigaction(SIGABRT, &act, NULL);
 930     }
 931 #endif
 932     abort();
 933 }
 934
 935 #if !defined(CONFIG_USER_ONLY)
 936 /* Called from RCU critical section */
 937 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 938 {
 939     RAMBlock *block;
 940
 941     block = atomic_rcu_read(&ram_list.mru_block);
 942     if (block && addr - block->offset < block->max_length) {
 943         return block;
 944     }
 945     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 946         if (addr - block->offset < block->max_length) {
 947             goto found;
 948         }
 949     }
 950
 951     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 952     abort();
 953
 954 found:
 955     /* It is safe to write mru_block outside the iothread lock.  This
 956      * is what happens:
 957      *
 958      *     mru_block = xxx
 959      *     rcu_read_unlock()
 960      *                                        xxx removed from list
 961      *                  rcu_read_lock()
 962      *                  read mru_block
 963      *                                        mru_block = NULL;
 964      *                                        call_rcu(reclaim_ramblock, xxx);
 965      *                  rcu_read_unlock()
 966      *
 967      * atomic_rcu_set is not needed here.  The block was already published
 968      * when it was placed into the list.  Here we're just making an extra
 969      * copy of the pointer.
 970      */
 971     ram_list.mru_block = block;
 972     return block;
 973 }
 974
 975 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 976 {
 977     CPUState *cpu;
 978     ram_addr_t start1;
 979     RAMBlock *block;
 980     ram_addr_t end;
 981
 982     end = TARGET_PAGE_ALIGN(start + length);
 983     start &= TARGET_PAGE_MASK;
 984
 985     rcu_read_lock();
 986     block = qemu_get_ram_block(start);
 987     assert(block == qemu_get_ram_block(end - 1));
 988     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 989     CPU_FOREACH(cpu) {
 990         tlb_reset_dirty(cpu, start1, length);
 991     }
 992     rcu_read_unlock();
 993 }
 994
 995 /* Note: start and end must be within the same ram block.  */
 996 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 997                                               ram_addr_t length,
 998                                               unsigned client)
 999 {
1000     DirtyMemoryBlocks *blocks;
1001     unsigned long end, page;
1002     bool dirty = false;
1003
1004     if (length == 0) {
1005         return false;
1006     }
1007
1008     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1009     page = start >> TARGET_PAGE_BITS;
1010
1011     rcu_read_lock();
1012
1013     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1014
1015     while (page < end) {
1016         unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1017         unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1018         unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1019
1020         dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1021                                               offset, num);
1022         page += num;
1023     }
1024
1025     rcu_read_unlock();
1026
1027     if (dirty && tcg_enabled()) {
1028         tlb_reset_dirty_range_all(start, length);
1029     }
1030
1031     return dirty;
1032 }
1033
1034 /* Called from RCU critical section */
1035 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1036                                        MemoryRegionSection *section,
1037                                        target_ulong vaddr,
1038                                        hwaddr paddr, hwaddr xlat,
1039                                        int prot,
1040                                        target_ulong *address)
1041 {
1042     hwaddr iotlb;
1043     CPUWatchpoint *wp;
1044
1045     if (memory_region_is_ram(section->mr)) {
1046         /* Normal RAM.  */
1047         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1048         if (!section->readonly) {
1049             iotlb |= PHYS_SECTION_NOTDIRTY;
1050         } else {
1051             iotlb |= PHYS_SECTION_ROM;
1052         }
1053     } else {
1054         AddressSpaceDispatch *d;
1055
1056         d = atomic_rcu_read(&section->address_space->dispatch);
1057         iotlb = section - d->map.sections;
1058         iotlb += xlat;
1059     }
1060
1061     /* Make accesses to pages with watchpoints go via the
1062        watchpoint trap routines.  */
1063     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1064         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1065             /* Avoid trapping reads of pages with a write breakpoint. */
1066             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1067                 iotlb = PHYS_SECTION_WATCH + paddr;
1068                 *address |= TLB_MMIO;
1069                 break;
1070             }
1071         }
1072     }
1073
1074     return iotlb;
1075 }
1076 #endif /* defined(CONFIG_USER_ONLY) */
1077
1078 #if !defined(CONFIG_USER_ONLY)
1079
1080 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1081                              uint16_t section);
1082 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1083
1084 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1085                                qemu_anon_ram_alloc;
1086
1087 /*
1088  * Set a custom physical guest memory alloator.
1089  * Accelerators with unusual needs may need this.  Hopefully, we can
1090  * get rid of it eventually.
1091  */
1092 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1093 {
1094     phys_mem_alloc = alloc;
1095 }
1096
1097 static uint16_t phys_section_add(PhysPageMap *map,
1098                                  MemoryRegionSection *section)
1099 {
1100     /* The physical section number is ORed with a page-aligned
1101      * pointer to produce the iotlb entries.  Thus it should
1102      * never overflow into the page-aligned value.
1103      */
1104     assert(map->sections_nb < TARGET_PAGE_SIZE);
1105
1106     if (map->sections_nb == map->sections_nb_alloc) {
1107         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1108         map->sections = g_renew(MemoryRegionSection, map->sections,
1109                                 map->sections_nb_alloc);
1110     }
1111     map->sections[map->sections_nb] = *section;
1112     memory_region_ref(section->mr);
1113     return map->sections_nb++;
1114 }
1115
1116 static void phys_section_destroy(MemoryRegion *mr)
1117 {
1118     bool have_sub_page = mr->subpage;
1119
1120     memory_region_unref(mr);
1121
1122     if (have_sub_page) {
1123         subpage_t *subpage = container_of(mr, subpage_t, iomem);
1124         object_unref(OBJECT(&subpage->iomem));
1125         g_free(subpage);
1126     }
1127 }
1128
1129 static void phys_sections_free(PhysPageMap *map)
1130 {
1131     while (map->sections_nb > 0) {
1132         MemoryRegionSection *section = &map->sections[--map->sections_nb];
1133         phys_section_destroy(section->mr);
1134     }
1135     g_free(map->sections);
1136     g_free(map->nodes);
1137 }
1138
1139 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1140 {
1141     subpage_t *subpage;
1142     hwaddr base = section->offset_within_address_space
1143         & TARGET_PAGE_MASK;
1144     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1145                                                    d->map.nodes, d->map.sections);
1146     MemoryRegionSection subsection = {
1147         .offset_within_address_space = base,
1148         .size = int128_make64(TARGET_PAGE_SIZE),
1149     };
1150     hwaddr start, end;
1151
1152     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1153
1154     if (!(existing->mr->subpage)) {
1155         subpage = subpage_init(d->as, base);
1156         subsection.address_space = d->as;
1157         subsection.mr = &subpage->iomem;
1158         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1159                       phys_section_add(&d->map, &subsection));
1160     } else {
1161         subpage = container_of(existing->mr, subpage_t, iomem);
1162     }
1163     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1164     end = start + int128_get64(section->size) - 1;
1165     subpage_register(subpage, start, end,
1166                      phys_section_add(&d->map, section));
1167 }
1168
1169
1170 static void register_multipage(AddressSpaceDispatch *d,
1171                                MemoryRegionSection *section)
1172 {
1173     hwaddr start_addr = section->offset_within_address_space;
1174     uint16_t section_index = phys_section_add(&d->map, section);
1175     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1176                                                     TARGET_PAGE_BITS));
1177
1178     assert(num_pages);
1179     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1180 }
1181
1182 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1183 {
1184     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1185     AddressSpaceDispatch *d = as->next_dispatch;
1186     MemoryRegionSection now = *section, remain = *section;
1187     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1188
1189     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1191                        - now.offset_within_address_space;
1192
1193         now.size = int128_min(int128_make64(left), now.size);
1194         register_subpage(d, &now);
1195     } else {
1196         now.size = int128_zero();
1197     }
1198     while (int128_ne(remain.size, now.size)) {
1199         remain.size = int128_sub(remain.size, now.size);
1200         remain.offset_within_address_space += int128_get64(now.size);
1201         remain.offset_within_region += int128_get64(now.size);
1202         now = remain;
1203         if (int128_lt(remain.size, page_size)) {
1204             register_subpage(d, &now);
1205         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1206             now.size = page_size;
1207             register_subpage(d, &now);
1208         } else {
1209             now.size = int128_and(now.size, int128_neg(page_size));
1210             register_multipage(d, &now);
1211         }
1212     }
1213 }
1214
1215 void qemu_flush_coalesced_mmio_buffer(void)
1216 {
1217     if (kvm_enabled())
1218         kvm_flush_coalesced_mmio_buffer();
1219 }
1220
1221 void qemu_mutex_lock_ramlist(void)
1222 {
1223     qemu_mutex_lock(&ram_list.mutex);
1224 }
1225
1226 void qemu_mutex_unlock_ramlist(void)
1227 {
1228     qemu_mutex_unlock(&ram_list.mutex);
1229 }
1230
1231 #ifdef __linux__
1232 static int64_t get_file_size(int fd)
1233 {
1234     int64_t size = lseek(fd, 0, SEEK_END);
1235     if (size < 0) {
1236         return -errno;
1237     }
1238     return size;
1239 }
1240
1241 static void *file_ram_alloc(RAMBlock *block,
1242                             ram_addr_t memory,
1243                             const char *path,
1244                             Error **errp)
1245 {
1246     bool unlink_on_error = false;
1247     char *filename;
1248     char *sanitized_name;
1249     char *c;
1250     void *area = MAP_FAILED;
1251     int fd = -1;
1252     int64_t file_size;
1253
1254     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1255         error_setg(errp,
1256                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1257         return NULL;
1258     }
1259
1260     for (;;) {
1261         fd = open(path, O_RDWR);
1262         if (fd >= 0) {
1263             /* @path names an existing file, use it */
1264             break;
1265         }
1266         if (errno == ENOENT) {
1267             /* @path names a file that doesn't exist, create it */
1268             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1269             if (fd >= 0) {
1270                 unlink_on_error = true;
1271                 break;
1272             }
1273         } else if (errno == EISDIR) {
1274             /* @path names a directory, create a file there */
1275             /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1276             sanitized_name = g_strdup(memory_region_name(block->mr));
1277             for (c = sanitized_name; *c != '\0'; c++) {
1278                 if (*c == '/') {
1279                     *c = '_';
1280                 }
1281             }
1282
1283             filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1284                                        sanitized_name);
1285             g_free(sanitized_name);
1286
1287             fd = mkstemp(filename);
1288             if (fd >= 0) {
1289                 unlink(filename);
1290                 g_free(filename);
1291                 break;
1292             }
1293             g_free(filename);
1294         }
1295         if (errno != EEXIST && errno != EINTR) {
1296             error_setg_errno(errp, errno,
1297                              "can't open backing store %s for guest RAM",
1298                              path);
1299             goto error;
1300         }
1301         /*
1302          * Try again on EINTR and EEXIST.  The latter happens when
1303          * something else creates the file between our two open().
1304          */
1305     }
1306
1307     block->page_size = qemu_fd_getpagesize(fd);
1308     block->mr->align = block->page_size;
1309 #if defined(__s390x__)
1310     if (kvm_enabled()) {
1311         block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1312     }
1313 #endif
1314
1315     file_size = get_file_size(fd);
1316
1317     if (memory < block->page_size) {
1318         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1319                    "or larger than page size 0x%zx",
1320                    memory, block->page_size);
1321         goto error;
1322     }
1323
1324     memory = ROUND_UP(memory, block->page_size);
1325
1326     /*
1327      * ftruncate is not supported by hugetlbfs in older
1328      * hosts, so don't bother bailing out on errors.
1329      * If anything goes wrong with it under other filesystems,
1330      * mmap will fail.
1331      *
1332      * Do not truncate the non-empty backend file to avoid corrupting
1333      * the existing data in the file. Disabling shrinking is not
1334      * enough. For example, the current vNVDIMM implementation stores
1335      * the guest NVDIMM labels at the end of the backend file. If the
1336      * backend file is later extended, QEMU will not be able to find
1337      * those labels. Therefore, extending the non-empty backend file
1338      * is disabled as well.
1339      */
1340     if (!file_size && ftruncate(fd, memory)) {
1341         perror("ftruncate");
1342     }
1343
1344     area = qemu_ram_mmap(fd, memory, block->mr->align,
1345                          block->flags & RAM_SHARED);
1346     if (area == MAP_FAILED) {
1347         error_setg_errno(errp, errno,
1348                          "unable to map backing store for guest RAM");
1349         goto error;
1350     }
1351
1352     if (mem_prealloc) {
1353         os_mem_prealloc(fd, area, memory, errp);
1354         if (errp && *errp) {
1355             goto error;
1356         }
1357     }
1358
1359     block->fd = fd;
1360     return area;
1361
1362 error:
1363     if (area != MAP_FAILED) {
1364         qemu_ram_munmap(area, memory);
1365     }
1366     if (unlink_on_error) {
1367         unlink(path);
1368     }
1369     if (fd != -1) {
1370         close(fd);
1371     }
1372     return NULL;
1373 }
1374 #endif
1375
1376 /* Called with the ramlist lock held.  */
1377 static ram_addr_t find_ram_offset(ram_addr_t size)
1378 {
1379     RAMBlock *block, *next_block;
1380     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1381
1382     assert(size != 0); /* it would hand out same offset multiple times */
1383
1384     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1385         return 0;
1386     }
1387
1388     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1389         ram_addr_t end, next = RAM_ADDR_MAX;
1390
1391         end = block->offset + block->max_length;
1392
1393         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1394             if (next_block->offset >= end) {
1395                 next = MIN(next, next_block->offset);
1396             }
1397         }
1398         if (next - end >= size && next - end < mingap) {
1399             offset = end;
1400             mingap = next - end;
1401         }
1402     }
1403
1404     if (offset == RAM_ADDR_MAX) {
1405         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1406                 (uint64_t)size);
1407         abort();
1408     }
1409
1410     return offset;
1411 }
1412
1413 ram_addr_t last_ram_offset(void)
1414 {
1415     RAMBlock *block;
1416     ram_addr_t last = 0;
1417
1418     rcu_read_lock();
1419     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1420         last = MAX(last, block->offset + block->max_length);
1421     }
1422     rcu_read_unlock();
1423     return last;
1424 }
1425
1426 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1427 {
1428     int ret;
1429
1430     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1431     if (!machine_dump_guest_core(current_machine)) {
1432         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1433         if (ret) {
1434             perror("qemu_madvise");
1435             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1436                             "but dump_guest_core=off specified\n");
1437         }
1438     }
1439 }
1440
1441 const char *qemu_ram_get_idstr(RAMBlock *rb)
1442 {
1443     return rb->idstr;
1444 }
1445
1446 /* Called with iothread lock held.  */
1447 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1448 {
1449     RAMBlock *block;
1450
1451     assert(new_block);
1452     assert(!new_block->idstr[0]);
1453
1454     if (dev) {
1455         char *id = qdev_get_dev_path(dev);
1456         if (id) {
1457             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1458             g_free(id);
1459         }
1460     }
1461     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1462
1463     rcu_read_lock();
1464     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1465         if (block != new_block &&
1466             !strcmp(block->idstr, new_block->idstr)) {
1467             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1468                     new_block->idstr);
1469             abort();
1470         }
1471     }
1472     rcu_read_unlock();
1473 }
1474
1475 /* Called with iothread lock held.  */
1476 void qemu_ram_unset_idstr(RAMBlock *block)
1477 {
1478     /* FIXME: arch_init.c assumes that this is not called throughout
1479      * migration.  Ignore the problem since hot-unplug during migration
1480      * does not work anyway.
1481      */
1482     if (block) {
1483         memset(block->idstr, 0, sizeof(block->idstr));
1484     }
1485 }
1486
1487 size_t qemu_ram_pagesize(RAMBlock *rb)
1488 {
1489     return rb->page_size;
1490 }
1491
1492 static int memory_try_enable_merging(void *addr, size_t len)
1493 {
1494     if (!machine_mem_merge(current_machine)) {
1495         /* disabled by the user */
1496         return 0;
1497     }
1498
1499     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1500 }
1501
1502 /* Only legal before guest might have detected the memory size: e.g. on
1503  * incoming migration, or right after reset.
1504  *
1505  * As memory core doesn't know how is memory accessed, it is up to
1506  * resize callback to update device state and/or add assertions to detect
1507  * misuse, if necessary.
1508  */
1509 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1510 {
1511     assert(block);
1512
1513     newsize = HOST_PAGE_ALIGN(newsize);
1514
1515     if (block->used_length == newsize) {
1516         return 0;
1517     }
1518
1519     if (!(block->flags & RAM_RESIZEABLE)) {
1520         error_setg_errno(errp, EINVAL,
1521                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1522                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1523                          newsize, block->used_length);
1524         return -EINVAL;
1525     }
1526
1527     if (block->max_length < newsize) {
1528         error_setg_errno(errp, EINVAL,
1529                          "Length too large: %s: 0x" RAM_ADDR_FMT
1530                          " > 0x" RAM_ADDR_FMT, block->idstr,
1531                          newsize, block->max_length);
1532         return -EINVAL;
1533     }
1534
1535     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1536     block->used_length = newsize;
1537     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1538                                         DIRTY_CLIENTS_ALL);
1539     memory_region_set_size(block->mr, newsize);
1540     if (block->resized) {
1541         block->resized(block->idstr, newsize, block->host);
1542     }
1543     return 0;
1544 }
1545
1546 /* Called with ram_list.mutex held */
1547 static void dirty_memory_extend(ram_addr_t old_ram_size,
1548                                 ram_addr_t new_ram_size)
1549 {
1550     ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1551                                              DIRTY_MEMORY_BLOCK_SIZE);
1552     ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1553                                              DIRTY_MEMORY_BLOCK_SIZE);
1554     int i;
1555
1556     /* Only need to extend if block count increased */
1557     if (new_num_blocks <= old_num_blocks) {
1558         return;
1559     }
1560
1561     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1562         DirtyMemoryBlocks *old_blocks;
1563         DirtyMemoryBlocks *new_blocks;
1564         int j;
1565
1566         old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1567         new_blocks = g_malloc(sizeof(*new_blocks) +
1568                               sizeof(new_blocks->blocks[0]) * new_num_blocks);
1569
1570         if (old_num_blocks) {
1571             memcpy(new_blocks->blocks, old_blocks->blocks,
1572                    old_num_blocks * sizeof(old_blocks->blocks[0]));
1573         }
1574
1575         for (j = old_num_blocks; j < new_num_blocks; j++) {
1576             new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1577         }
1578
1579         atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1580
1581         if (old_blocks) {
1582             g_free_rcu(old_blocks, rcu);
1583         }
1584     }
1585 }
1586
1587 static void ram_block_add(RAMBlock *new_block, Error **errp)
1588 {
1589     RAMBlock *block;
1590     RAMBlock *last_block = NULL;
1591     ram_addr_t old_ram_size, new_ram_size;
1592     Error *err = NULL;
1593
1594     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1595
1596     qemu_mutex_lock_ramlist();
1597     new_block->offset = find_ram_offset(new_block->max_length);
1598
1599     if (!new_block->host) {
1600         if (xen_enabled()) {
1601             xen_ram_alloc(new_block->offset, new_block->max_length,
1602                           new_block->mr, &err);
1603             if (err) {
1604                 error_propagate(errp, err);
1605                 qemu_mutex_unlock_ramlist();
1606                 return;
1607             }
1608         } else {
1609             new_block->host = phys_mem_alloc(new_block->max_length,
1610                                              &new_block->mr->align);
1611             if (!new_block->host) {
1612                 error_setg_errno(errp, errno,
1613                                  "cannot set up guest memory '%s'",
1614                                  memory_region_name(new_block->mr));
1615                 qemu_mutex_unlock_ramlist();
1616                 return;
1617             }
1618             memory_try_enable_merging(new_block->host, new_block->max_length);
1619         }
1620     }
1621
1622     new_ram_size = MAX(old_ram_size,
1623               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1624     if (new_ram_size > old_ram_size) {
1625         migration_bitmap_extend(old_ram_size, new_ram_size);
1626         dirty_memory_extend(old_ram_size, new_ram_size);
1627     }
1628     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1629      * QLIST (which has an RCU-friendly variant) does not have insertion at
1630      * tail, so save the last element in last_block.
1631      */
1632     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1633         last_block = block;
1634         if (block->max_length < new_block->max_length) {
1635             break;
1636         }
1637     }
1638     if (block) {
1639         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1640     } else if (last_block) {
1641         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1642     } else { /* list is empty */
1643         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1644     }
1645     ram_list.mru_block = NULL;
1646
1647     /* Write list before version */
1648     smp_wmb();
1649     ram_list.version++;
1650     qemu_mutex_unlock_ramlist();
1651
1652     cpu_physical_memory_set_dirty_range(new_block->offset,
1653                                         new_block->used_length,
1654                                         DIRTY_CLIENTS_ALL);
1655
1656     if (new_block->host) {
1657         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1658         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1659         /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1660         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1661     }
1662 }
1663
1664 #ifdef __linux__
1665 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1666                                    bool share, const char *mem_path,
1667                                    Error **errp)
1668 {
1669     RAMBlock *new_block;
1670     Error *local_err = NULL;
1671
1672     if (xen_enabled()) {
1673         error_setg(errp, "-mem-path not supported with Xen");
1674         return NULL;
1675     }
1676
1677     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1678         /*
1679          * file_ram_alloc() needs to allocate just like
1680          * phys_mem_alloc, but we haven't bothered to provide
1681          * a hook there.
1682          */
1683         error_setg(errp,
1684                    "-mem-path not supported with this accelerator");
1685         return NULL;
1686     }
1687
1688     size = HOST_PAGE_ALIGN(size);
1689     new_block = g_malloc0(sizeof(*new_block));
1690     new_block->mr = mr;
1691     new_block->used_length = size;
1692     new_block->max_length = size;
1693     new_block->flags = share ? RAM_SHARED : 0;
1694     new_block->host = file_ram_alloc(new_block, size,
1695                                      mem_path, errp);
1696     if (!new_block->host) {
1697         g_free(new_block);
1698         return NULL;
1699     }
1700
1701     ram_block_add(new_block, &local_err);
1702     if (local_err) {
1703         g_free(new_block);
1704         error_propagate(errp, local_err);
1705         return NULL;
1706     }
1707     return new_block;
1708 }
1709 #endif
1710
1711 static
1712 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1713                                   void (*resized)(const char*,
1714                                                   uint64_t length,
1715                                                   void *host),
1716                                   void *host, bool resizeable,
1717                                   MemoryRegion *mr, Error **errp)
1718 {
1719     RAMBlock *new_block;
1720     Error *local_err = NULL;
1721
1722     size = HOST_PAGE_ALIGN(size);
1723     max_size = HOST_PAGE_ALIGN(max_size);
1724     new_block = g_malloc0(sizeof(*new_block));
1725     new_block->mr = mr;
1726     new_block->resized = resized;
1727     new_block->used_length = size;
1728     new_block->max_length = max_size;
1729     assert(max_size >= size);
1730     new_block->fd = -1;
1731     new_block->page_size = getpagesize();
1732     new_block->host = host;
1733     if (host) {
1734         new_block->flags |= RAM_PREALLOC;
1735     }
1736     if (resizeable) {
1737         new_block->flags |= RAM_RESIZEABLE;
1738     }
1739     ram_block_add(new_block, &local_err);
1740     if (local_err) {
1741         g_free(new_block);
1742         error_propagate(errp, local_err);
1743         return NULL;
1744     }
1745     return new_block;
1746 }
1747
1748 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1749                                    MemoryRegion *mr, Error **errp)
1750 {
1751     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1752 }
1753
1754 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1755 {
1756     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1757 }
1758
1759 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1760                                      void (*resized)(const char*,
1761                                                      uint64_t length,
1762                                                      void *host),
1763                                      MemoryRegion *mr, Error **errp)
1764 {
1765     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1766 }
1767
1768 static void reclaim_ramblock(RAMBlock *block)
1769 {
1770     if (block->flags & RAM_PREALLOC) {
1771         ;
1772     } else if (xen_enabled()) {
1773         xen_invalidate_map_cache_entry(block->host);
1774 #ifndef _WIN32
1775     } else if (block->fd >= 0) {
1776         qemu_ram_munmap(block->host, block->max_length);
1777         close(block->fd);
1778 #endif
1779     } else {
1780         qemu_anon_ram_free(block->host, block->max_length);
1781     }
1782     g_free(block);
1783 }
1784
1785 void qemu_ram_free(RAMBlock *block)
1786 {
1787     if (!block) {
1788         return;
1789     }
1790
1791     qemu_mutex_lock_ramlist();
1792     QLIST_REMOVE_RCU(block, next);
1793     ram_list.mru_block = NULL;
1794     /* Write list before version */
1795     smp_wmb();
1796     ram_list.version++;
1797     call_rcu(block, reclaim_ramblock, rcu);
1798     qemu_mutex_unlock_ramlist();
1799 }
1800
1801 #ifndef _WIN32
1802 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1803 {
1804     RAMBlock *block;
1805     ram_addr_t offset;
1806     int flags;
1807     void *area, *vaddr;
1808
1809     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1810         offset = addr - block->offset;
1811         if (offset < block->max_length) {
1812             vaddr = ramblock_ptr(block, offset);
1813             if (block->flags & RAM_PREALLOC) {
1814                 ;
1815             } else if (xen_enabled()) {
1816                 abort();
1817             } else {
1818                 flags = MAP_FIXED;
1819                 if (block->fd >= 0) {
1820                     flags |= (block->flags & RAM_SHARED ?
1821                               MAP_SHARED : MAP_PRIVATE);
1822                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1823                                 flags, block->fd, offset);
1824                 } else {
1825                     /*
1826                      * Remap needs to match alloc.  Accelerators that
1827                      * set phys_mem_alloc never remap.  If they did,
1828                      * we'd need a remap hook here.
1829                      */
1830                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1831
1832                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1833                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1834                                 flags, -1, 0);
1835                 }
1836                 if (area != vaddr) {
1837                     fprintf(stderr, "Could not remap addr: "
1838                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1839                             length, addr);
1840                     exit(1);
1841                 }
1842                 memory_try_enable_merging(vaddr, length);
1843                 qemu_ram_setup_dump(vaddr, length);
1844             }
1845         }
1846     }
1847 }
1848 #endif /* !_WIN32 */
1849
1850 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1851  * This should not be used for general purpose DMA.  Use address_space_map
1852  * or address_space_rw instead. For local memory (e.g. video ram) that the
1853  * device owns, use memory_region_get_ram_ptr.
1854  *
1855  * Called within RCU critical section.
1856  */
1857 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1858 {
1859     RAMBlock *block = ram_block;
1860
1861     if (block == NULL) {
1862         block = qemu_get_ram_block(addr);
1863         addr -= block->offset;
1864     }
1865
1866     if (xen_enabled() && block->host == NULL) {
1867         /* We need to check if the requested address is in the RAM
1868          * because we don't want to map the entire memory in QEMU.
1869          * In that case just map until the end of the page.
1870          */
1871         if (block->offset == 0) {
1872             return xen_map_cache(addr, 0, 0);
1873         }
1874
1875         block->host = xen_map_cache(block->offset, block->max_length, 1);
1876     }
1877     return ramblock_ptr(block, addr);
1878 }
1879
1880 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1881  * but takes a size argument.
1882  *
1883  * Called within RCU critical section.
1884  */
1885 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1886                                  hwaddr *size)
1887 {
1888     RAMBlock *block = ram_block;
1889     if (*size == 0) {
1890         return NULL;
1891     }
1892
1893     if (block == NULL) {
1894         block = qemu_get_ram_block(addr);
1895         addr -= block->offset;
1896     }
1897     *size = MIN(*size, block->max_length - addr);
1898
1899     if (xen_enabled() && block->host == NULL) {
1900         /* We need to check if the requested address is in the RAM
1901          * because we don't want to map the entire memory in QEMU.
1902          * In that case just map the requested area.
1903          */
1904         if (block->offset == 0) {
1905             return xen_map_cache(addr, *size, 1);
1906         }
1907
1908         block->host = xen_map_cache(block->offset, block->max_length, 1);
1909     }
1910
1911     return ramblock_ptr(block, addr);
1912 }
1913
1914 /*
1915  * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1916  * in that RAMBlock.
1917  *
1918  * ptr: Host pointer to look up
1919  * round_offset: If true round the result offset down to a page boundary
1920  * *ram_addr: set to result ram_addr
1921  * *offset: set to result offset within the RAMBlock
1922  *
1923  * Returns: RAMBlock (or NULL if not found)
1924  *
1925  * By the time this function returns, the returned pointer is not protected
1926  * by RCU anymore.  If the caller is not within an RCU critical section and
1927  * does not hold the iothread lock, it must have other means of protecting the
1928  * pointer, such as a reference to the region that includes the incoming
1929  * ram_addr_t.
1930  */
1931 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1932                                    ram_addr_t *offset)
1933 {
1934     RAMBlock *block;
1935     uint8_t *host = ptr;
1936
1937     if (xen_enabled()) {
1938         ram_addr_t ram_addr;
1939         rcu_read_lock();
1940         ram_addr = xen_ram_addr_from_mapcache(ptr);
1941         block = qemu_get_ram_block(ram_addr);
1942         if (block) {
1943             *offset = ram_addr - block->offset;
1944         }
1945         rcu_read_unlock();
1946         return block;
1947     }
1948
1949     rcu_read_lock();
1950     block = atomic_rcu_read(&ram_list.mru_block);
1951     if (block && block->host && host - block->host < block->max_length) {
1952         goto found;
1953     }
1954
1955     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1956         /* This case append when the block is not mapped. */
1957         if (block->host == NULL) {
1958             continue;
1959         }
1960         if (host - block->host < block->max_length) {
1961             goto found;
1962         }
1963     }
1964
1965     rcu_read_unlock();
1966     return NULL;
1967
1968 found:
1969     *offset = (host - block->host);
1970     if (round_offset) {
1971         *offset &= TARGET_PAGE_MASK;
1972     }
1973     rcu_read_unlock();
1974     return block;
1975 }
1976
1977 /*
1978  * Finds the named RAMBlock
1979  *
1980  * name: The name of RAMBlock to find
1981  *
1982  * Returns: RAMBlock (or NULL if not found)
1983  */
1984 RAMBlock *qemu_ram_block_by_name(const char *name)
1985 {
1986     RAMBlock *block;
1987
1988     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1989         if (!strcmp(name, block->idstr)) {
1990             return block;
1991         }
1992     }
1993
1994     return NULL;
1995 }
1996
1997 /* Some of the softmmu routines need to translate from a host pointer
1998    (typically a TLB entry) back to a ram offset.  */
1999 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2000 {
2001     RAMBlock *block;
2002     ram_addr_t offset;
2003
2004     block = qemu_ram_block_from_host(ptr, false, &offset);
2005     if (!block) {
2006         return RAM_ADDR_INVALID;
2007     }
2008
2009     return block->offset + offset;
2010 }
2011
2012 /* Called within RCU critical section.  */
2013 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2014                                uint64_t val, unsigned size)
2015 {
2016     bool locked = false;
2017
2018     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2019         locked = true;
2020         tb_lock();
2021         tb_invalidate_phys_page_fast(ram_addr, size);
2022     }
2023     switch (size) {
2024     case 1:
2025         stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2026         break;
2027     case 2:
2028         stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2029         break;
2030     case 4:
2031         stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2032         break;
2033     default:
2034         abort();
2035     }
2036
2037     if (locked) {
2038         tb_unlock();
2039     }
2040
2041     /* Set both VGA and migration bits for simplicity and to remove
2042      * the notdirty callback faster.
2043      */
2044     cpu_physical_memory_set_dirty_range(ram_addr, size,
2045                                         DIRTY_CLIENTS_NOCODE);
2046     /* we remove the notdirty callback only if the code has been
2047        flushed */
2048     if (!cpu_physical_memory_is_clean(ram_addr)) {
2049         tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2050     }
2051 }
2052
2053 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2054                                  unsigned size, bool is_write)
2055 {
2056     return is_write;
2057 }
2058
2059 static const MemoryRegionOps notdirty_mem_ops = {
2060     .write = notdirty_mem_write,
2061     .valid.accepts = notdirty_mem_accepts,
2062     .endianness = DEVICE_NATIVE_ENDIAN,
2063 };
2064
2065 /* Generate a debug exception if a watchpoint has been hit.  */
2066 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2067 {
2068     CPUState *cpu = current_cpu;
2069     CPUClass *cc = CPU_GET_CLASS(cpu);
2070     CPUArchState *env = cpu->env_ptr;
2071     target_ulong pc, cs_base;
2072     target_ulong vaddr;
2073     CPUWatchpoint *wp;
2074     uint32_t cpu_flags;
2075
2076     if (cpu->watchpoint_hit) {
2077         /* We re-entered the check after replacing the TB. Now raise
2078          * the debug interrupt so that is will trigger after the
2079          * current instruction. */
2080         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2081         return;
2082     }
2083     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2084     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2085         if (cpu_watchpoint_address_matches(wp, vaddr, len)
2086             && (wp->flags & flags)) {
2087             if (flags == BP_MEM_READ) {
2088                 wp->flags |= BP_WATCHPOINT_HIT_READ;
2089             } else {
2090                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2091             }
2092             wp->hitaddr = vaddr;
2093             wp->hitattrs = attrs;
2094             if (!cpu->watchpoint_hit) {
2095                 if (wp->flags & BP_CPU &&
2096                     !cc->debug_check_watchpoint(cpu, wp)) {
2097                     wp->flags &= ~BP_WATCHPOINT_HIT;
2098                     continue;
2099                 }
2100                 cpu->watchpoint_hit = wp;
2101
2102                 /* The tb_lock will be reset when cpu_loop_exit or
2103                  * cpu_loop_exit_noexc longjmp back into the cpu_exec
2104                  * main loop.
2105                  */
2106                 tb_lock();
2107                 tb_check_watchpoint(cpu);
2108                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2109                     cpu->exception_index = EXCP_DEBUG;
2110                     cpu_loop_exit(cpu);
2111                 } else {
2112                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2113                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2114                     cpu_loop_exit_noexc(cpu);
2115                 }
2116             }
2117         } else {
2118             wp->flags &= ~BP_WATCHPOINT_HIT;
2119         }
2120     }
2121 }
2122
2123 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2124    so these check for a hit then pass through to the normal out-of-line
2125    phys routines.  */
2126 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2127                                   unsigned size, MemTxAttrs attrs)
2128 {
2129     MemTxResult res;
2130     uint64_t data;
2131     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2132     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2133
2134     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2135     switch (size) {
2136     case 1:
2137         data = address_space_ldub(as, addr, attrs, &res);
2138         break;
2139     case 2:
2140         data = address_space_lduw(as, addr, attrs, &res);
2141         break;
2142     case 4:
2143         data = address_space_ldl(as, addr, attrs, &res);
2144         break;
2145     default: abort();
2146     }
2147     *pdata = data;
2148     return res;
2149 }
2150
2151 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2152                                    uint64_t val, unsigned size,
2153                                    MemTxAttrs attrs)
2154 {
2155     MemTxResult res;
2156     int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2157     AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2158
2159     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2160     switch (size) {
2161     case 1:
2162         address_space_stb(as, addr, val, attrs, &res);
2163         break;
2164     case 2:
2165         address_space_stw(as, addr, val, attrs, &res);
2166         break;
2167     case 4:
2168         address_space_stl(as, addr, val, attrs, &res);
2169         break;
2170     default: abort();
2171     }
2172     return res;
2173 }
2174
2175 static const MemoryRegionOps watch_mem_ops = {
2176     .read_with_attrs = watch_mem_read,
2177     .write_with_attrs = watch_mem_write,
2178     .endianness = DEVICE_NATIVE_ENDIAN,
2179 };
2180
2181 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2182                                 unsigned len, MemTxAttrs attrs)
2183 {
2184     subpage_t *subpage = opaque;
2185     uint8_t buf[8];
2186     MemTxResult res;
2187
2188 #if defined(DEBUG_SUBPAGE)
2189     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2190            subpage, len, addr);
2191 #endif
2192     res = address_space_read(subpage->as, addr + subpage->base,
2193                              attrs, buf, len);
2194     if (res) {
2195         return res;
2196     }
2197     switch (len) {
2198     case 1:
2199         *data = ldub_p(buf);
2200         return MEMTX_OK;
2201     case 2:
2202         *data = lduw_p(buf);
2203         return MEMTX_OK;
2204     case 4:
2205         *data = ldl_p(buf);
2206         return MEMTX_OK;
2207     case 8:
2208         *data = ldq_p(buf);
2209         return MEMTX_OK;
2210     default:
2211         abort();
2212     }
2213 }
2214
2215 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2216                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2217 {
2218     subpage_t *subpage = opaque;
2219     uint8_t buf[8];
2220
2221 #if defined(DEBUG_SUBPAGE)
2222     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2223            " value %"PRIx64"\n",
2224            __func__, subpage, len, addr, value);
2225 #endif
2226     switch (len) {
2227     case 1:
2228         stb_p(buf, value);
2229         break;
2230     case 2:
2231         stw_p(buf, value);
2232         break;
2233     case 4:
2234         stl_p(buf, value);
2235         break;
2236     case 8:
2237         stq_p(buf, value);
2238         break;
2239     default:
2240         abort();
2241     }
2242     return address_space_write(subpage->as, addr + subpage->base,
2243                                attrs, buf, len);
2244 }
2245
2246 static bool subpage_accepts(void *opaque, hwaddr addr,
2247                             unsigned len, bool is_write)
2248 {
2249     subpage_t *subpage = opaque;
2250 #if defined(DEBUG_SUBPAGE)
2251     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2252            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2253 #endif
2254
2255     return address_space_access_valid(subpage->as, addr + subpage->base,
2256                                       len, is_write);
2257 }
2258
2259 static const MemoryRegionOps subpage_ops = {
2260     .read_with_attrs = subpage_read,
2261     .write_with_attrs = subpage_write,
2262     .impl.min_access_size = 1,
2263     .impl.max_access_size = 8,
2264     .valid.min_access_size = 1,
2265     .valid.max_access_size = 8,
2266     .valid.accepts = subpage_accepts,
2267     .endianness = DEVICE_NATIVE_ENDIAN,
2268 };
2269
2270 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2271                              uint16_t section)
2272 {
2273     int idx, eidx;
2274
2275     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2276         return -1;
2277     idx = SUBPAGE_IDX(start);
2278     eidx = SUBPAGE_IDX(end);
2279 #if defined(DEBUG_SUBPAGE)
2280     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2281            __func__, mmio, start, end, idx, eidx, section);
2282 #endif
2283     for (; idx <= eidx; idx++) {
2284         mmio->sub_section[idx] = section;
2285     }
2286
2287     return 0;
2288 }
2289
2290 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2291 {
2292     subpage_t *mmio;
2293
2294     mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2295     mmio->as = as;
2296     mmio->base = base;
2297     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2298                           NULL, TARGET_PAGE_SIZE);
2299     mmio->iomem.subpage = true;
2300 #if defined(DEBUG_SUBPAGE)
2301     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2302            mmio, base, TARGET_PAGE_SIZE);
2303 #endif
2304     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2305
2306     return mmio;
2307 }
2308
2309 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2310                               MemoryRegion *mr)
2311 {
2312     assert(as);
2313     MemoryRegionSection section = {
2314         .address_space = as,
2315         .mr = mr,
2316         .offset_within_address_space = 0,
2317         .offset_within_region = 0,
2318         .size = int128_2_64(),
2319     };
2320
2321     return phys_section_add(map, &section);
2322 }
2323
2324 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2325 {
2326     int asidx = cpu_asidx_from_attrs(cpu, attrs);
2327     CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2328     AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2329     MemoryRegionSection *sections = d->map.sections;
2330
2331     return sections[index & ~TARGET_PAGE_MASK].mr;
2332 }
2333
2334 static void io_mem_init(void)
2335 {
2336     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2337     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2338                           NULL, UINT64_MAX);
2339     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2340                           NULL, UINT64_MAX);
2341     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2342                           NULL, UINT64_MAX);
2343 }
2344
2345 static void mem_begin(MemoryListener *listener)
2346 {
2347     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2348     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2349     uint16_t n;
2350
2351     n = dummy_section(&d->map, as, &io_mem_unassigned);
2352     assert(n == PHYS_SECTION_UNASSIGNED);
2353     n = dummy_section(&d->map, as, &io_mem_notdirty);
2354     assert(n == PHYS_SECTION_NOTDIRTY);
2355     n = dummy_section(&d->map, as, &io_mem_rom);
2356     assert(n == PHYS_SECTION_ROM);
2357     n = dummy_section(&d->map, as, &io_mem_watch);
2358     assert(n == PHYS_SECTION_WATCH);
2359
2360     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2361     d->as = as;
2362     as->next_dispatch = d;
2363 }
2364
2365 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2366 {
2367     phys_sections_free(&d->map);
2368     g_free(d);
2369 }
2370
2371 static void mem_commit(MemoryListener *listener)
2372 {
2373     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2374     AddressSpaceDispatch *cur = as->dispatch;
2375     AddressSpaceDispatch *next = as->next_dispatch;
2376
2377     phys_page_compact_all(next, next->map.nodes_nb);
2378
2379     atomic_rcu_set(&as->dispatch, next);
2380     if (cur) {
2381         call_rcu(cur, address_space_dispatch_free, rcu);
2382     }
2383 }
2384
2385 static void tcg_commit(MemoryListener *listener)
2386 {
2387     CPUAddressSpace *cpuas;
2388     AddressSpaceDispatch *d;
2389
2390     /* since each CPU stores ram addresses in its TLB cache, we must
2391        reset the modified entries */
2392     cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2393     cpu_reloading_memory_map();
2394     /* The CPU and TLB are protected by the iothread lock.
2395      * We reload the dispatch pointer now because cpu_reloading_memory_map()
2396      * may have split the RCU critical section.
2397      */
2398     d = atomic_rcu_read(&cpuas->as->dispatch);
2399     atomic_rcu_set(&cpuas->memory_dispatch, d);
2400     tlb_flush(cpuas->cpu, 1);
2401 }
2402
2403 void address_space_init_dispatch(AddressSpace *as)
2404 {
2405     as->dispatch = NULL;
2406     as->dispatch_listener = (MemoryListener) {
2407         .begin = mem_begin,
2408         .commit = mem_commit,
2409         .region_add = mem_add,
2410         .region_nop = mem_add,
2411         .priority = 0,
2412     };
2413     memory_listener_register(&as->dispatch_listener, as);
2414 }
2415
2416 void address_space_unregister(AddressSpace *as)
2417 {
2418     memory_listener_unregister(&as->dispatch_listener);
2419 }
2420
2421 void address_space_destroy_dispatch(AddressSpace *as)
2422 {
2423     AddressSpaceDispatch *d = as->dispatch;
2424
2425     atomic_rcu_set(&as->dispatch, NULL);
2426     if (d) {
2427         call_rcu(d, address_space_dispatch_free, rcu);
2428     }
2429 }
2430
2431 static void memory_map_init(void)
2432 {
2433     system_memory = g_malloc(sizeof(*system_memory));
2434
2435     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2436     address_space_init(&address_space_memory, system_memory, "memory");
2437
2438     system_io = g_malloc(sizeof(*system_io));
2439     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2440                           65536);
2441     address_space_init(&address_space_io, system_io, "I/O");
2442 }
2443
2444 MemoryRegion *get_system_memory(void)
2445 {
2446     return system_memory;
2447 }
2448
2449 MemoryRegion *get_system_io(void)
2450 {
2451     return system_io;
2452 }
2453
2454 #endif /* !defined(CONFIG_USER_ONLY) */
2455
2456 /* physical memory access (slow version, mainly for debug) */
2457 #if defined(CONFIG_USER_ONLY)
2458 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2459                         uint8_t *buf, int len, int is_write)
2460 {
2461     int l, flags;
2462     target_ulong page;
2463     void * p;
2464
2465     while (len > 0) {
2466         page = addr & TARGET_PAGE_MASK;
2467         l = (page + TARGET_PAGE_SIZE) - addr;
2468         if (l > len)
2469             l = len;
2470         flags = page_get_flags(page);
2471         if (!(flags & PAGE_VALID))
2472             return -1;
2473         if (is_write) {
2474             if (!(flags & PAGE_WRITE))
2475                 return -1;
2476             /* XXX: this code should not depend on lock_user */
2477             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2478                 return -1;
2479             memcpy(p, buf, l);
2480             unlock_user(p, addr, l);
2481         } else {
2482             if (!(flags & PAGE_READ))
2483                 return -1;
2484             /* XXX: this code should not depend on lock_user */
2485             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2486                 return -1;
2487             memcpy(buf, p, l);
2488             unlock_user(p, addr, 0);
2489         }
2490         len -= l;
2491         buf += l;
2492         addr += l;
2493     }
2494     return 0;
2495 }
2496
2497 #else
2498
2499 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2500                                      hwaddr length)
2501 {
2502     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2503     addr += memory_region_get_ram_addr(mr);
2504
2505     /* No early return if dirty_log_mask is or becomes 0, because
2506      * cpu_physical_memory_set_dirty_range will still call
2507      * xen_modified_memory.
2508      */
2509     if (dirty_log_mask) {
2510         dirty_log_mask =
2511             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2512     }
2513     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2514         tb_lock();
2515         tb_invalidate_phys_range(addr, addr + length);
2516         tb_unlock();
2517         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2518     }
2519     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2520 }
2521
2522 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2523 {
2524     unsigned access_size_max = mr->ops->valid.max_access_size;
2525
2526     /* Regions are assumed to support 1-4 byte accesses unless
2527        otherwise specified.  */
2528     if (access_size_max == 0) {
2529         access_size_max = 4;
2530     }
2531
2532     /* Bound the maximum access by the alignment of the address.  */
2533     if (!mr->ops->impl.unaligned) {
2534         unsigned align_size_max = addr & -addr;
2535         if (align_size_max != 0 && align_size_max < access_size_max) {
2536             access_size_max = align_size_max;
2537         }
2538     }
2539
2540     /* Don't attempt accesses larger than the maximum.  */
2541     if (l > access_size_max) {
2542         l = access_size_max;
2543     }
2544     l = pow2floor(l);
2545
2546     return l;
2547 }
2548
2549 static bool prepare_mmio_access(MemoryRegion *mr)
2550 {
2551     bool unlocked = !qemu_mutex_iothread_locked();
2552     bool release_lock = false;
2553
2554     if (unlocked && mr->global_locking) {
2555         qemu_mutex_lock_iothread();
2556         unlocked = false;
2557         release_lock = true;
2558     }
2559     if (mr->flush_coalesced_mmio) {
2560         if (unlocked) {
2561             qemu_mutex_lock_iothread();
2562         }
2563         qemu_flush_coalesced_mmio_buffer();
2564         if (unlocked) {
2565             qemu_mutex_unlock_iothread();
2566         }
2567     }
2568
2569     return release_lock;
2570 }
2571
2572 /* Called within RCU critical section.  */
2573 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2574                                                 MemTxAttrs attrs,
2575                                                 const uint8_t *buf,
2576                                                 int len, hwaddr addr1,
2577                                                 hwaddr l, MemoryRegion *mr)
2578 {
2579     uint8_t *ptr;
2580     uint64_t val;
2581     MemTxResult result = MEMTX_OK;
2582     bool release_lock = false;
2583
2584     for (;;) {
2585         if (!memory_access_is_direct(mr, true)) {
2586             release_lock |= prepare_mmio_access(mr);
2587             l = memory_access_size(mr, l, addr1);
2588             /* XXX: could force current_cpu to NULL to avoid
2589                potential bugs */
2590             switch (l) {
2591             case 8:
2592                 /* 64 bit write access */
2593                 val = ldq_p(buf);
2594                 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2595                                                        attrs);
2596                 break;
2597             case 4:
2598                 /* 32 bit write access */
2599                 val = ldl_p(buf);
2600                 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2601                                                        attrs);
2602                 break;
2603             case 2:
2604                 /* 16 bit write access */
2605                 val = lduw_p(buf);
2606                 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2607                                                        attrs);
2608                 break;
2609             case 1:
2610                 /* 8 bit write access */
2611                 val = ldub_p(buf);
2612                 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2613                                                        attrs);
2614                 break;
2615             default:
2616                 abort();
2617             }
2618         } else {
2619             /* RAM case */
2620             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2621             memcpy(ptr, buf, l);
2622             invalidate_and_set_dirty(mr, addr1, l);
2623         }
2624
2625         if (release_lock) {
2626             qemu_mutex_unlock_iothread();
2627             release_lock = false;
2628         }
2629
2630         len -= l;
2631         buf += l;
2632         addr += l;
2633
2634         if (!len) {
2635             break;
2636         }
2637
2638         l = len;
2639         mr = address_space_translate(as, addr, &addr1, &l, true);
2640     }
2641
2642     return result;
2643 }
2644
2645 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2646                                 const uint8_t *buf, int len)
2647 {
2648     hwaddr l;
2649     hwaddr addr1;
2650     MemoryRegion *mr;
2651     MemTxResult result = MEMTX_OK;
2652
2653     if (len > 0) {
2654         rcu_read_lock();
2655         l = len;
2656         mr = address_space_translate(as, addr, &addr1, &l, true);
2657         result = address_space_write_continue(as, addr, attrs, buf, len,
2658                                               addr1, l, mr);
2659         rcu_read_unlock();
2660     }
2661
2662     return result;
2663 }
2664
2665 /* Called within RCU critical section.  */
2666 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2667                                         MemTxAttrs attrs, uint8_t *buf,
2668                                         int len, hwaddr addr1, hwaddr l,
2669                                         MemoryRegion *mr)
2670 {
2671     uint8_t *ptr;
2672     uint64_t val;
2673     MemTxResult result = MEMTX_OK;
2674     bool release_lock = false;
2675
2676     for (;;) {
2677         if (!memory_access_is_direct(mr, false)) {
2678             /* I/O case */
2679             release_lock |= prepare_mmio_access(mr);
2680             l = memory_access_size(mr, l, addr1);
2681             switch (l) {
2682             case 8:
2683                 /* 64 bit read access */
2684                 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2685                                                       attrs);
2686                 stq_p(buf, val);
2687                 break;
2688             case 4:
2689                 /* 32 bit read access */
2690                 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2691                                                       attrs);
2692                 stl_p(buf, val);
2693                 break;
2694             case 2:
2695                 /* 16 bit read access */
2696                 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2697                                                       attrs);
2698                 stw_p(buf, val);
2699                 break;
2700             case 1:
2701                 /* 8 bit read access */
2702                 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2703                                                       attrs);
2704                 stb_p(buf, val);
2705                 break;
2706             default:
2707                 abort();
2708             }
2709         } else {
2710             /* RAM case */
2711             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2712             memcpy(buf, ptr, l);
2713         }
2714
2715         if (release_lock) {
2716             qemu_mutex_unlock_iothread();
2717             release_lock = false;
2718         }
2719
2720         len -= l;
2721         buf += l;
2722         addr += l;
2723
2724         if (!len) {
2725             break;
2726         }
2727
2728         l = len;
2729         mr = address_space_translate(as, addr, &addr1, &l, false);
2730     }
2731
2732     return result;
2733 }
2734
2735 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2736                                     MemTxAttrs attrs, uint8_t *buf, int len)
2737 {
2738     hwaddr l;
2739     hwaddr addr1;
2740     MemoryRegion *mr;
2741     MemTxResult result = MEMTX_OK;
2742
2743     if (len > 0) {
2744         rcu_read_lock();
2745         l = len;
2746         mr = address_space_translate(as, addr, &addr1, &l, false);
2747         result = address_space_read_continue(as, addr, attrs, buf, len,
2748                                              addr1, l, mr);
2749         rcu_read_unlock();
2750     }
2751
2752     return result;
2753 }
2754
2755 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2756                              uint8_t *buf, int len, bool is_write)
2757 {
2758     if (is_write) {
2759         return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2760     } else {
2761         return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2762     }
2763 }
2764
2765 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2766                             int len, int is_write)
2767 {
2768     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2769                      buf, len, is_write);
2770 }
2771
2772 enum write_rom_type {
2773     WRITE_DATA,
2774     FLUSH_CACHE,
2775 };
2776
2777 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2778     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2779 {
2780     hwaddr l;
2781     uint8_t *ptr;
2782     hwaddr addr1;
2783     MemoryRegion *mr;
2784
2785     rcu_read_lock();
2786     while (len > 0) {
2787         l = len;
2788         mr = address_space_translate(as, addr, &addr1, &l, true);
2789
2790         if (!(memory_region_is_ram(mr) ||
2791               memory_region_is_romd(mr))) {
2792             l = memory_access_size(mr, l, addr1);
2793         } else {
2794             /* ROM/RAM case */
2795             ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2796             switch (type) {
2797             case WRITE_DATA:
2798                 memcpy(ptr, buf, l);
2799                 invalidate_and_set_dirty(mr, addr1, l);
2800                 break;
2801             case FLUSH_CACHE:
2802                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2803                 break;
2804             }
2805         }
2806         len -= l;
2807         buf += l;
2808         addr += l;
2809     }
2810     rcu_read_unlock();
2811 }
2812
2813 /* used for ROM loading : can write in RAM and ROM */
2814 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2815                                    const uint8_t *buf, int len)
2816 {
2817     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2818 }
2819
2820 void cpu_flush_icache_range(hwaddr start, int len)
2821 {
2822     /*
2823      * This function should do the same thing as an icache flush that was
2824      * triggered from within the guest. For TCG we are always cache coherent,
2825      * so there is no need to flush anything. For KVM / Xen we need to flush
2826      * the host's instruction cache at least.
2827      */
2828     if (tcg_enabled()) {
2829         return;
2830     }
2831
2832     cpu_physical_memory_write_rom_internal(&address_space_memory,
2833                                            start, NULL, len, FLUSH_CACHE);
2834 }
2835
2836 typedef struct {
2837     MemoryRegion *mr;
2838     void *buffer;
2839     hwaddr addr;
2840     hwaddr len;
2841     bool in_use;
2842 } BounceBuffer;
2843
2844 static BounceBuffer bounce;
2845
2846 typedef struct MapClient {
2847     QEMUBH *bh;
2848     QLIST_ENTRY(MapClient) link;
2849 } MapClient;
2850
2851 QemuMutex map_client_list_lock;
2852 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2853     = QLIST_HEAD_INITIALIZER(map_client_list);
2854
2855 static void cpu_unregister_map_client_do(MapClient *client)
2856 {
2857     QLIST_REMOVE(client, link);
2858     g_free(client);
2859 }
2860
2861 static void cpu_notify_map_clients_locked(void)
2862 {
2863     MapClient *client;
2864
2865     while (!QLIST_EMPTY(&map_client_list)) {
2866         client = QLIST_FIRST(&map_client_list);
2867         qemu_bh_schedule(client->bh);
2868         cpu_unregister_map_client_do(client);
2869     }
2870 }
2871
2872 void cpu_register_map_client(QEMUBH *bh)
2873 {
2874     MapClient *client = g_malloc(sizeof(*client));
2875
2876     qemu_mutex_lock(&map_client_list_lock);
2877     client->bh = bh;
2878     QLIST_INSERT_HEAD(&map_client_list, client, link);
2879     if (!atomic_read(&bounce.in_use)) {
2880         cpu_notify_map_clients_locked();
2881     }
2882     qemu_mutex_unlock(&map_client_list_lock);
2883 }
2884
2885 void cpu_exec_init_all(void)
2886 {
2887     qemu_mutex_init(&ram_list.mutex);
2888     /* The data structures we set up here depend on knowing the page size,
2889      * so no more changes can be made after this point.
2890      * In an ideal world, nothing we did before we had finished the
2891      * machine setup would care about the target page size, and we could
2892      * do this much later, rather than requiring board models to state
2893      * up front what their requirements are.
2894      */
2895     finalize_target_page_bits();
2896     io_mem_init();
2897     memory_map_init();
2898     qemu_mutex_init(&map_client_list_lock);
2899 }
2900
2901 void cpu_unregister_map_client(QEMUBH *bh)
2902 {
2903     MapClient *client;
2904
2905     qemu_mutex_lock(&map_client_list_lock);
2906     QLIST_FOREACH(client, &map_client_list, link) {
2907         if (client->bh == bh) {
2908             cpu_unregister_map_client_do(client);
2909             break;
2910         }
2911     }
2912     qemu_mutex_unlock(&map_client_list_lock);
2913 }
2914
2915 static void cpu_notify_map_clients(void)
2916 {
2917     qemu_mutex_lock(&map_client_list_lock);
2918     cpu_notify_map_clients_locked();
2919     qemu_mutex_unlock(&map_client_list_lock);
2920 }
2921
2922 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2923 {
2924     MemoryRegion *mr;
2925     hwaddr l, xlat;
2926
2927     rcu_read_lock();
2928     while (len > 0) {
2929         l = len;
2930         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2931         if (!memory_access_is_direct(mr, is_write)) {
2932             l = memory_access_size(mr, l, addr);
2933             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2934                 return false;
2935             }
2936         }
2937
2938         len -= l;
2939         addr += l;
2940     }
2941     rcu_read_unlock();
2942     return true;
2943 }
2944
2945 /* Map a physical memory region into a host virtual address.
2946  * May map a subset of the requested range, given by and returned in *plen.
2947  * May return NULL if resources needed to perform the mapping are exhausted.
2948  * Use only for reads OR writes - not for read-modify-write operations.
2949  * Use cpu_register_map_client() to know when retrying the map operation is
2950  * likely to succeed.
2951  */
2952 void *address_space_map(AddressSpace *as,
2953                         hwaddr addr,
2954                         hwaddr *plen,
2955                         bool is_write)
2956 {
2957     hwaddr len = *plen;
2958     hwaddr done = 0;
2959     hwaddr l, xlat, base;
2960     MemoryRegion *mr, *this_mr;
2961     void *ptr;
2962
2963     if (len == 0) {
2964         return NULL;
2965     }
2966
2967     l = len;
2968     rcu_read_lock();
2969     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2970
2971     if (!memory_access_is_direct(mr, is_write)) {
2972         if (atomic_xchg(&bounce.in_use, true)) {
2973             rcu_read_unlock();
2974             return NULL;
2975         }
2976         /* Avoid unbounded allocations */
2977         l = MIN(l, TARGET_PAGE_SIZE);
2978         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2979         bounce.addr = addr;
2980         bounce.len = l;
2981
2982         memory_region_ref(mr);
2983         bounce.mr = mr;
2984         if (!is_write) {
2985             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2986                                bounce.buffer, l);
2987         }
2988
2989         rcu_read_unlock();
2990         *plen = l;
2991         return bounce.buffer;
2992     }
2993
2994     base = xlat;
2995
2996     for (;;) {
2997         len -= l;
2998         addr += l;
2999         done += l;
3000         if (len == 0) {
3001             break;
3002         }
3003
3004         l = len;
3005         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3006         if (this_mr != mr || xlat != base + done) {
3007             break;
3008         }
3009     }
3010
3011     memory_region_ref(mr);
3012     *plen = done;
3013     ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3014     rcu_read_unlock();
3015
3016     return ptr;
3017 }
3018
3019 /* Unmaps a memory region previously mapped by address_space_map().
3020  * Will also mark the memory as dirty if is_write == 1.  access_len gives
3021  * the amount of memory that was actually read or written by the caller.
3022  */
3023 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3024                          int is_write, hwaddr access_len)
3025 {
3026     if (buffer != bounce.buffer) {
3027         MemoryRegion *mr;
3028         ram_addr_t addr1;
3029
3030         mr = memory_region_from_host(buffer, &addr1);
3031         assert(mr != NULL);
3032         if (is_write) {
3033             invalidate_and_set_dirty(mr, addr1, access_len);
3034         }
3035         if (xen_enabled()) {
3036             xen_invalidate_map_cache_entry(buffer);
3037         }
3038         memory_region_unref(mr);
3039         return;
3040     }
3041     if (is_write) {
3042         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3043                             bounce.buffer, access_len);
3044     }
3045     qemu_vfree(bounce.buffer);
3046     bounce.buffer = NULL;
3047     memory_region_unref(bounce.mr);
3048     atomic_mb_set(&bounce.in_use, false);
3049     cpu_notify_map_clients();
3050 }
3051
3052 void *cpu_physical_memory_map(hwaddr addr,
3053                               hwaddr *plen,
3054                               int is_write)
3055 {
3056     return address_space_map(&address_space_memory, addr, plen, is_write);
3057 }
3058
3059 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3060                                int is_write, hwaddr access_len)
3061 {
3062     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3063 }
3064
3065 /* warning: addr must be aligned */
3066 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3067                                                   MemTxAttrs attrs,
3068                                                   MemTxResult *result,
3069                                                   enum device_endian endian)
3070 {
3071     uint8_t *ptr;
3072     uint64_t val;
3073     MemoryRegion *mr;
3074     hwaddr l = 4;
3075     hwaddr addr1;
3076     MemTxResult r;
3077     bool release_lock = false;
3078
3079     rcu_read_lock();
3080     mr = address_space_translate(as, addr, &addr1, &l, false);
3081     if (l < 4 || !memory_access_is_direct(mr, false)) {
3082         release_lock |= prepare_mmio_access(mr);
3083
3084         /* I/O case */
3085         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3086 #if defined(TARGET_WORDS_BIGENDIAN)
3087         if (endian == DEVICE_LITTLE_ENDIAN) {
3088             val = bswap32(val);
3089         }
3090 #else
3091         if (endian == DEVICE_BIG_ENDIAN) {
3092             val = bswap32(val);
3093         }
3094 #endif
3095     } else {
3096         /* RAM case */
3097         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3098         switch (endian) {
3099         case DEVICE_LITTLE_ENDIAN:
3100             val = ldl_le_p(ptr);
3101             break;
3102         case DEVICE_BIG_ENDIAN:
3103             val = ldl_be_p(ptr);
3104             break;
3105         default:
3106             val = ldl_p(ptr);
3107             break;
3108         }
3109         r = MEMTX_OK;
3110     }
3111     if (result) {
3112         *result = r;
3113     }
3114     if (release_lock) {
3115         qemu_mutex_unlock_iothread();
3116     }
3117     rcu_read_unlock();
3118     return val;
3119 }
3120
3121 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3122                            MemTxAttrs attrs, MemTxResult *result)
3123 {
3124     return address_space_ldl_internal(as, addr, attrs, result,
3125                                       DEVICE_NATIVE_ENDIAN);
3126 }
3127
3128 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3129                               MemTxAttrs attrs, MemTxResult *result)
3130 {
3131     return address_space_ldl_internal(as, addr, attrs, result,
3132                                       DEVICE_LITTLE_ENDIAN);
3133 }
3134
3135 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3136                               MemTxAttrs attrs, MemTxResult *result)
3137 {
3138     return address_space_ldl_internal(as, addr, attrs, result,
3139                                       DEVICE_BIG_ENDIAN);
3140 }
3141
3142 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3143 {
3144     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3145 }
3146
3147 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3148 {
3149     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3150 }
3151
3152 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3153 {
3154     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3155 }
3156
3157 /* warning: addr must be aligned */
3158 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3159                                                   MemTxAttrs attrs,
3160                                                   MemTxResult *result,
3161                                                   enum device_endian endian)
3162 {
3163     uint8_t *ptr;
3164     uint64_t val;
3165     MemoryRegion *mr;
3166     hwaddr l = 8;
3167     hwaddr addr1;
3168     MemTxResult r;
3169     bool release_lock = false;
3170
3171     rcu_read_lock();
3172     mr = address_space_translate(as, addr, &addr1, &l,
3173                                  false);
3174     if (l < 8 || !memory_access_is_direct(mr, false)) {
3175         release_lock |= prepare_mmio_access(mr);
3176
3177         /* I/O case */
3178         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3179 #if defined(TARGET_WORDS_BIGENDIAN)
3180         if (endian == DEVICE_LITTLE_ENDIAN) {
3181             val = bswap64(val);
3182         }
3183 #else
3184         if (endian == DEVICE_BIG_ENDIAN) {
3185             val = bswap64(val);
3186         }
3187 #endif
3188     } else {
3189         /* RAM case */
3190         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3191         switch (endian) {
3192         case DEVICE_LITTLE_ENDIAN:
3193             val = ldq_le_p(ptr);
3194             break;
3195         case DEVICE_BIG_ENDIAN:
3196             val = ldq_be_p(ptr);
3197             break;
3198         default:
3199             val = ldq_p(ptr);
3200             break;
3201         }
3202         r = MEMTX_OK;
3203     }
3204     if (result) {
3205         *result = r;
3206     }
3207     if (release_lock) {
3208         qemu_mutex_unlock_iothread();
3209     }
3210     rcu_read_unlock();
3211     return val;
3212 }
3213
3214 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3215                            MemTxAttrs attrs, MemTxResult *result)
3216 {
3217     return address_space_ldq_internal(as, addr, attrs, result,
3218                                       DEVICE_NATIVE_ENDIAN);
3219 }
3220
3221 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3222                            MemTxAttrs attrs, MemTxResult *result)
3223 {
3224     return address_space_ldq_internal(as, addr, attrs, result,
3225                                       DEVICE_LITTLE_ENDIAN);
3226 }
3227
3228 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3229                            MemTxAttrs attrs, MemTxResult *result)
3230 {
3231     return address_space_ldq_internal(as, addr, attrs, result,
3232                                       DEVICE_BIG_ENDIAN);
3233 }
3234
3235 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3236 {
3237     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3238 }
3239
3240 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3241 {
3242     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3243 }
3244
3245 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3246 {
3247     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3248 }
3249
3250 /* XXX: optimize */
3251 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3252                             MemTxAttrs attrs, MemTxResult *result)
3253 {
3254     uint8_t val;
3255     MemTxResult r;
3256
3257     r = address_space_rw(as, addr, attrs, &val, 1, 0);
3258     if (result) {
3259         *result = r;
3260     }
3261     return val;
3262 }
3263
3264 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3265 {
3266     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3267 }
3268
3269 /* warning: addr must be aligned */
3270 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3271                                                    hwaddr addr,
3272                                                    MemTxAttrs attrs,
3273                                                    MemTxResult *result,
3274                                                    enum device_endian endian)
3275 {
3276     uint8_t *ptr;
3277     uint64_t val;
3278     MemoryRegion *mr;
3279     hwaddr l = 2;
3280     hwaddr addr1;
3281     MemTxResult r;
3282     bool release_lock = false;
3283
3284     rcu_read_lock();
3285     mr = address_space_translate(as, addr, &addr1, &l,
3286                                  false);
3287     if (l < 2 || !memory_access_is_direct(mr, false)) {
3288         release_lock |= prepare_mmio_access(mr);
3289
3290         /* I/O case */
3291         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3292 #if defined(TARGET_WORDS_BIGENDIAN)
3293         if (endian == DEVICE_LITTLE_ENDIAN) {
3294             val = bswap16(val);
3295         }
3296 #else
3297         if (endian == DEVICE_BIG_ENDIAN) {
3298             val = bswap16(val);
3299         }
3300 #endif
3301     } else {
3302         /* RAM case */
3303         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3304         switch (endian) {
3305         case DEVICE_LITTLE_ENDIAN:
3306             val = lduw_le_p(ptr);
3307             break;
3308         case DEVICE_BIG_ENDIAN:
3309             val = lduw_be_p(ptr);
3310             break;
3311         default:
3312             val = lduw_p(ptr);
3313             break;
3314         }
3315         r = MEMTX_OK;
3316     }
3317     if (result) {
3318         *result = r;
3319     }
3320     if (release_lock) {
3321         qemu_mutex_unlock_iothread();
3322     }
3323     rcu_read_unlock();
3324     return val;
3325 }
3326
3327 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3328                            MemTxAttrs attrs, MemTxResult *result)
3329 {
3330     return address_space_lduw_internal(as, addr, attrs, result,
3331                                        DEVICE_NATIVE_ENDIAN);
3332 }
3333
3334 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3335                            MemTxAttrs attrs, MemTxResult *result)
3336 {
3337     return address_space_lduw_internal(as, addr, attrs, result,
3338                                        DEVICE_LITTLE_ENDIAN);
3339 }
3340
3341 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3342                            MemTxAttrs attrs, MemTxResult *result)
3343 {
3344     return address_space_lduw_internal(as, addr, attrs, result,
3345                                        DEVICE_BIG_ENDIAN);
3346 }
3347
3348 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3349 {
3350     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3351 }
3352
3353 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3354 {
3355     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3356 }
3357
3358 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3359 {
3360     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3361 }
3362
3363 /* warning: addr must be aligned. The ram page is not masked as dirty
3364    and the code inside is not invalidated. It is useful if the dirty
3365    bits are used to track modified PTEs */
3366 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3367                                 MemTxAttrs attrs, MemTxResult *result)
3368 {
3369     uint8_t *ptr;
3370     MemoryRegion *mr;
3371     hwaddr l = 4;
3372     hwaddr addr1;
3373     MemTxResult r;
3374     uint8_t dirty_log_mask;
3375     bool release_lock = false;
3376
3377     rcu_read_lock();
3378     mr = address_space_translate(as, addr, &addr1, &l,
3379                                  true);
3380     if (l < 4 || !memory_access_is_direct(mr, true)) {
3381         release_lock |= prepare_mmio_access(mr);
3382
3383         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3384     } else {
3385         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3386         stl_p(ptr, val);
3387
3388         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3389         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3390         cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3391                                             4, dirty_log_mask);
3392         r = MEMTX_OK;
3393     }
3394     if (result) {
3395         *result = r;
3396     }
3397     if (release_lock) {
3398         qemu_mutex_unlock_iothread();
3399     }
3400     rcu_read_unlock();
3401 }
3402
3403 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3404 {
3405     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3406 }
3407
3408 /* warning: addr must be aligned */
3409 static inline void address_space_stl_internal(AddressSpace *as,
3410                                               hwaddr addr, uint32_t val,
3411                                               MemTxAttrs attrs,
3412                                               MemTxResult *result,
3413                                               enum device_endian endian)
3414 {
3415     uint8_t *ptr;
3416     MemoryRegion *mr;
3417     hwaddr l = 4;
3418     hwaddr addr1;
3419     MemTxResult r;
3420     bool release_lock = false;
3421
3422     rcu_read_lock();
3423     mr = address_space_translate(as, addr, &addr1, &l,
3424                                  true);
3425     if (l < 4 || !memory_access_is_direct(mr, true)) {
3426         release_lock |= prepare_mmio_access(mr);
3427
3428 #if defined(TARGET_WORDS_BIGENDIAN)
3429         if (endian == DEVICE_LITTLE_ENDIAN) {
3430             val = bswap32(val);
3431         }
3432 #else
3433         if (endian == DEVICE_BIG_ENDIAN) {
3434             val = bswap32(val);
3435         }
3436 #endif
3437         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3438     } else {
3439         /* RAM case */
3440         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3441         switch (endian) {
3442         case DEVICE_LITTLE_ENDIAN:
3443             stl_le_p(ptr, val);
3444             break;
3445         case DEVICE_BIG_ENDIAN:
3446             stl_be_p(ptr, val);
3447             break;
3448         default:
3449             stl_p(ptr, val);
3450             break;
3451         }
3452         invalidate_and_set_dirty(mr, addr1, 4);
3453         r = MEMTX_OK;
3454     }
3455     if (result) {
3456         *result = r;
3457     }
3458     if (release_lock) {
3459         qemu_mutex_unlock_iothread();
3460     }
3461     rcu_read_unlock();
3462 }
3463
3464 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3465                        MemTxAttrs attrs, MemTxResult *result)
3466 {
3467     address_space_stl_internal(as, addr, val, attrs, result,
3468                                DEVICE_NATIVE_ENDIAN);
3469 }
3470
3471 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3472                        MemTxAttrs attrs, MemTxResult *result)
3473 {
3474     address_space_stl_internal(as, addr, val, attrs, result,
3475                                DEVICE_LITTLE_ENDIAN);
3476 }
3477
3478 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3479                        MemTxAttrs attrs, MemTxResult *result)
3480 {
3481     address_space_stl_internal(as, addr, val, attrs, result,
3482                                DEVICE_BIG_ENDIAN);
3483 }
3484
3485 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3486 {
3487     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3488 }
3489
3490 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3491 {
3492     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3493 }
3494
3495 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3496 {
3497     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3498 }
3499
3500 /* XXX: optimize */
3501 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3502                        MemTxAttrs attrs, MemTxResult *result)
3503 {
3504     uint8_t v = val;
3505     MemTxResult r;
3506
3507     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3508     if (result) {
3509         *result = r;
3510     }
3511 }
3512
3513 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3514 {
3515     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3516 }
3517
3518 /* warning: addr must be aligned */
3519 static inline void address_space_stw_internal(AddressSpace *as,
3520                                               hwaddr addr, uint32_t val,
3521                                               MemTxAttrs attrs,
3522                                               MemTxResult *result,
3523                                               enum device_endian endian)
3524 {
3525     uint8_t *ptr;
3526     MemoryRegion *mr;
3527     hwaddr l = 2;
3528     hwaddr addr1;
3529     MemTxResult r;
3530     bool release_lock = false;
3531
3532     rcu_read_lock();
3533     mr = address_space_translate(as, addr, &addr1, &l, true);
3534     if (l < 2 || !memory_access_is_direct(mr, true)) {
3535         release_lock |= prepare_mmio_access(mr);
3536
3537 #if defined(TARGET_WORDS_BIGENDIAN)
3538         if (endian == DEVICE_LITTLE_ENDIAN) {
3539             val = bswap16(val);
3540         }
3541 #else
3542         if (endian == DEVICE_BIG_ENDIAN) {
3543             val = bswap16(val);
3544         }
3545 #endif
3546         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3547     } else {
3548         /* RAM case */
3549         ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3550         switch (endian) {
3551         case DEVICE_LITTLE_ENDIAN:
3552             stw_le_p(ptr, val);
3553             break;
3554         case DEVICE_BIG_ENDIAN:
3555             stw_be_p(ptr, val);
3556             break;
3557         default:
3558             stw_p(ptr, val);
3559             break;
3560         }
3561         invalidate_and_set_dirty(mr, addr1, 2);
3562         r = MEMTX_OK;
3563     }
3564     if (result) {
3565         *result = r;
3566     }
3567     if (release_lock) {
3568         qemu_mutex_unlock_iothread();
3569     }
3570     rcu_read_unlock();
3571 }
3572
3573 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3574                        MemTxAttrs attrs, MemTxResult *result)
3575 {
3576     address_space_stw_internal(as, addr, val, attrs, result,
3577                                DEVICE_NATIVE_ENDIAN);
3578 }
3579
3580 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3581                        MemTxAttrs attrs, MemTxResult *result)
3582 {
3583     address_space_stw_internal(as, addr, val, attrs, result,
3584                                DEVICE_LITTLE_ENDIAN);
3585 }
3586
3587 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3588                        MemTxAttrs attrs, MemTxResult *result)
3589 {
3590     address_space_stw_internal(as, addr, val, attrs, result,
3591                                DEVICE_BIG_ENDIAN);
3592 }
3593
3594 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3595 {
3596     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3597 }
3598
3599 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3600 {
3601     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3602 }
3603
3604 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3605 {
3606     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3607 }
3608
3609 /* XXX: optimize */
3610 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3611                        MemTxAttrs attrs, MemTxResult *result)
3612 {
3613     MemTxResult r;
3614     val = tswap64(val);
3615     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3616     if (result) {
3617         *result = r;
3618     }
3619 }
3620
3621 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3622                        MemTxAttrs attrs, MemTxResult *result)
3623 {
3624     MemTxResult r;
3625     val = cpu_to_le64(val);
3626     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3627     if (result) {
3628         *result = r;
3629     }
3630 }
3631 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3632                        MemTxAttrs attrs, MemTxResult *result)
3633 {
3634     MemTxResult r;
3635     val = cpu_to_be64(val);
3636     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3637     if (result) {
3638         *result = r;
3639     }
3640 }
3641
3642 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3643 {
3644     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3645 }
3646
3647 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3648 {
3649     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3650 }
3651
3652 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3653 {
3654     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3655 }
3656
3657 /* virtual memory access for debug (includes writing to ROM) */
3658 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3659                         uint8_t *buf, int len, int is_write)
3660 {
3661     int l;
3662     hwaddr phys_addr;
3663     target_ulong page;
3664
3665     while (len > 0) {
3666         int asidx;
3667         MemTxAttrs attrs;
3668
3669         page = addr & TARGET_PAGE_MASK;
3670         phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3671         asidx = cpu_asidx_from_attrs(cpu, attrs);
3672         /* if no physical page mapped, return an error */
3673         if (phys_addr == -1)
3674             return -1;
3675         l = (page + TARGET_PAGE_SIZE) - addr;
3676         if (l > len)
3677             l = len;
3678         phys_addr += (addr & ~TARGET_PAGE_MASK);
3679         if (is_write) {
3680             cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3681                                           phys_addr, buf, l);
3682         } else {
3683             address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3684                              MEMTXATTRS_UNSPECIFIED,
3685                              buf, l, 0);
3686         }
3687         len -= l;
3688         buf += l;
3689         addr += l;
3690     }
3691     return 0;
3692 }
3693
3694 /*
3695  * Allows code that needs to deal with migration bitmaps etc to still be built
3696  * target independent.
3697  */
3698 size_t qemu_target_page_bits(void)
3699 {
3700     return TARGET_PAGE_BITS;
3701 }
3702
3703 #endif
3704
3705 /*
3706  * A helper function for the _utterly broken_ virtio device model to find out if
3707  * it's running on a big endian machine. Don't do this at home kids!
3708  */
3709 bool target_words_bigendian(void);
3710 bool target_words_bigendian(void)
3711 {
3712 #if defined(TARGET_WORDS_BIGENDIAN)
3713     return true;
3714 #else
3715     return false;
3716 #endif
3717 }
3718
3719 #ifndef CONFIG_USER_ONLY
3720 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3721 {
3722     MemoryRegion*mr;
3723     hwaddr l = 1;
3724     bool res;
3725
3726     rcu_read_lock();
3727     mr = address_space_translate(&address_space_memory,
3728                                  phys_addr, &phys_addr, &l, false);
3729
3730     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3731     rcu_read_unlock();
3732     return res;
3733 }
3734
3735 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3736 {
3737     RAMBlock *block;
3738     int ret = 0;
3739
3740     rcu_read_lock();
3741     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3742         ret = func(block->idstr, block->host, block->offset,
3743                    block->used_length, opaque);
3744         if (ret) {
3745             break;
3746         }
3747     }
3748     rcu_read_unlock();
3749     return ret;
3750 }
3751 #endif