exec.c

   1 /*
   2  *  Virtual page mapping
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "config.h"
  20 #ifndef _WIN32
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #endif
  24
  25 #include "qemu-common.h"
  26 #include "cpu.h"
  27 #include "tcg.h"
  28 #include "hw/hw.h"
  29 #if !defined(CONFIG_USER_ONLY)
  30 #include "hw/boards.h"
  31 #endif
  32 #include "hw/qdev.h"
  33 #include "qemu/osdep.h"
  34 #include "sysemu/kvm.h"
  35 #include "sysemu/sysemu.h"
  36 #include "hw/xen/xen.h"
  37 #include "qemu/timer.h"
  38 #include "qemu/config-file.h"
  39 #include "qemu/error-report.h"
  40 #include "exec/memory.h"
  41 #include "sysemu/dma.h"
  42 #include "exec/address-spaces.h"
  43 #if defined(CONFIG_USER_ONLY)
  44 #include <qemu.h>
  45 #else /* !CONFIG_USER_ONLY */
  46 #include "sysemu/xen-mapcache.h"
  47 #include "trace.h"
  48 #endif
  49 #include "exec/cpu-all.h"
  50 #include "qemu/rcu_queue.h"
  51 #include "qemu/main-loop.h"
  52 #include "exec/cputlb.h"
  53 #include "translate-all.h"
  54
  55 #include "exec/memory-internal.h"
  56 #include "exec/ram_addr.h"
  57
  58 #include "qemu/range.h"
  59
  60 //#define DEBUG_SUBPAGE
  61
  62 #if !defined(CONFIG_USER_ONLY)
  63 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  64  * are protected by the ramlist lock.
  65  */
  66 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  67
  68 static MemoryRegion *system_memory;
  69 static MemoryRegion *system_io;
  70
  71 AddressSpace address_space_io;
  72 AddressSpace address_space_memory;
  73
  74 MemoryRegion io_mem_rom, io_mem_notdirty;
  75 static MemoryRegion io_mem_unassigned;
  76
  77 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  78 #define RAM_PREALLOC   (1 << 0)
  79
  80 /* RAM is mmap-ed with MAP_SHARED */
  81 #define RAM_SHARED     (1 << 1)
  82
  83 /* Only a portion of RAM (used_length) is actually used, and migrated.
  84  * This used_length size can change across reboots.
  85  */
  86 #define RAM_RESIZEABLE (1 << 2)
  87
  88 #endif
  89
  90 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  91 /* current CPU in the current thread. It is only valid inside
  92    cpu_exec() */
  93 DEFINE_TLS(CPUState *, current_cpu);
  94 /* 0 = Do not count executed instructions.
  95    1 = Precise instruction counting.
  96    2 = Adaptive rate instruction counting.  */
  97 int use_icount;
  98
  99 #if !defined(CONFIG_USER_ONLY)
 100
 101 typedef struct PhysPageEntry PhysPageEntry;
 102
 103 struct PhysPageEntry {
 104     /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 105     uint32_t skip : 6;
 106      /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 107     uint32_t ptr : 26;
 108 };
 109
 110 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 111
 112 /* Size of the L2 (and L3, etc) page tables.  */
 113 #define ADDR_SPACE_BITS 64
 114
 115 #define P_L2_BITS 9
 116 #define P_L2_SIZE (1 << P_L2_BITS)
 117
 118 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 119
 120 typedef PhysPageEntry Node[P_L2_SIZE];
 121
 122 typedef struct PhysPageMap {
 123     struct rcu_head rcu;
 124
 125     unsigned sections_nb;
 126     unsigned sections_nb_alloc;
 127     unsigned nodes_nb;
 128     unsigned nodes_nb_alloc;
 129     Node *nodes;
 130     MemoryRegionSection *sections;
 131 } PhysPageMap;
 132
 133 struct AddressSpaceDispatch {
 134     struct rcu_head rcu;
 135
 136     /* This is a multi-level map on the physical address space.
 137      * The bottom level has pointers to MemoryRegionSections.
 138      */
 139     PhysPageEntry phys_map;
 140     PhysPageMap map;
 141     AddressSpace *as;
 142 };
 143
 144 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 145 typedef struct subpage_t {
 146     MemoryRegion iomem;
 147     AddressSpace *as;
 148     hwaddr base;
 149     uint16_t sub_section[TARGET_PAGE_SIZE];
 150 } subpage_t;
 151
 152 #define PHYS_SECTION_UNASSIGNED 0
 153 #define PHYS_SECTION_NOTDIRTY 1
 154 #define PHYS_SECTION_ROM 2
 155 #define PHYS_SECTION_WATCH 3
 156
 157 static void io_mem_init(void);
 158 static void memory_map_init(void);
 159 static void tcg_commit(MemoryListener *listener);
 160
 161 static MemoryRegion io_mem_watch;
 162 #endif
 163
 164 #if !defined(CONFIG_USER_ONLY)
 165
 166 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 167 {
 168     if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 169         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 170         map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 171         map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 172     }
 173 }
 174
 175 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 176 {
 177     unsigned i;
 178     uint32_t ret;
 179     PhysPageEntry e;
 180     PhysPageEntry *p;
 181
 182     ret = map->nodes_nb++;
 183     p = map->nodes[ret];
 184     assert(ret != PHYS_MAP_NODE_NIL);
 185     assert(ret != map->nodes_nb_alloc);
 186
 187     e.skip = leaf ? 0 : 1;
 188     e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 189     for (i = 0; i < P_L2_SIZE; ++i) {
 190         memcpy(&p[i], &e, sizeof(e));
 191     }
 192     return ret;
 193 }
 194
 195 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 196                                 hwaddr *index, hwaddr *nb, uint16_t leaf,
 197                                 int level)
 198 {
 199     PhysPageEntry *p;
 200     hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 201
 202     if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 203         lp->ptr = phys_map_node_alloc(map, level == 0);
 204     }
 205     p = map->nodes[lp->ptr];
 206     lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 207
 208     while (*nb && lp < &p[P_L2_SIZE]) {
 209         if ((*index & (step - 1)) == 0 && *nb >= step) {
 210             lp->skip = 0;
 211             lp->ptr = leaf;
 212             *index += step;
 213             *nb -= step;
 214         } else {
 215             phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 216         }
 217         ++lp;
 218     }
 219 }
 220
 221 static void phys_page_set(AddressSpaceDispatch *d,
 222                           hwaddr index, hwaddr nb,
 223                           uint16_t leaf)
 224 {
 225     /* Wildly overreserve - it doesn't matter much. */
 226     phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 227
 228     phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 229 }
 230
 231 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
 232  * and update our entry so we can skip it and go directly to the destination.
 233  */
 234 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 235 {
 236     unsigned valid_ptr = P_L2_SIZE;
 237     int valid = 0;
 238     PhysPageEntry *p;
 239     int i;
 240
 241     if (lp->ptr == PHYS_MAP_NODE_NIL) {
 242         return;
 243     }
 244
 245     p = nodes[lp->ptr];
 246     for (i = 0; i < P_L2_SIZE; i++) {
 247         if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 248             continue;
 249         }
 250
 251         valid_ptr = i;
 252         valid++;
 253         if (p[i].skip) {
 254             phys_page_compact(&p[i], nodes, compacted);
 255         }
 256     }
 257
 258     /* We can only compress if there's only one child. */
 259     if (valid != 1) {
 260         return;
 261     }
 262
 263     assert(valid_ptr < P_L2_SIZE);
 264
 265     /* Don't compress if it won't fit in the # of bits we have. */
 266     if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 267         return;
 268     }
 269
 270     lp->ptr = p[valid_ptr].ptr;
 271     if (!p[valid_ptr].skip) {
 272         /* If our only child is a leaf, make this a leaf. */
 273         /* By design, we should have made this node a leaf to begin with so we
 274          * should never reach here.
 275          * But since it's so simple to handle this, let's do it just in case we
 276          * change this rule.
 277          */
 278         lp->skip = 0;
 279     } else {
 280         lp->skip += p[valid_ptr].skip;
 281     }
 282 }
 283
 284 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 285 {
 286     DECLARE_BITMAP(compacted, nodes_nb);
 287
 288     if (d->phys_map.skip) {
 289         phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 290     }
 291 }
 292
 293 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 294                                            Node *nodes, MemoryRegionSection *sections)
 295 {
 296     PhysPageEntry *p;
 297     hwaddr index = addr >> TARGET_PAGE_BITS;
 298     int i;
 299
 300     for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 301         if (lp.ptr == PHYS_MAP_NODE_NIL) {
 302             return &sections[PHYS_SECTION_UNASSIGNED];
 303         }
 304         p = nodes[lp.ptr];
 305         lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 306     }
 307
 308     if (sections[lp.ptr].size.hi ||
 309         range_covers_byte(sections[lp.ptr].offset_within_address_space,
 310                           sections[lp.ptr].size.lo, addr)) {
 311         return &sections[lp.ptr];
 312     } else {
 313         return &sections[PHYS_SECTION_UNASSIGNED];
 314     }
 315 }
 316
 317 bool memory_region_is_unassigned(MemoryRegion *mr)
 318 {
 319     return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 320         && mr != &io_mem_watch;
 321 }
 322
 323 /* Called from RCU critical section */
 324 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 325                                                         hwaddr addr,
 326                                                         bool resolve_subpage)
 327 {
 328     MemoryRegionSection *section;
 329     subpage_t *subpage;
 330
 331     section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 332     if (resolve_subpage && section->mr->subpage) {
 333         subpage = container_of(section->mr, subpage_t, iomem);
 334         section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 335     }
 336     return section;
 337 }
 338
 339 /* Called from RCU critical section */
 340 static MemoryRegionSection *
 341 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 342                                  hwaddr *plen, bool resolve_subpage)
 343 {
 344     MemoryRegionSection *section;
 345     MemoryRegion *mr;
 346     Int128 diff;
 347
 348     section = address_space_lookup_region(d, addr, resolve_subpage);
 349     /* Compute offset within MemoryRegionSection */
 350     addr -= section->offset_within_address_space;
 351
 352     /* Compute offset within MemoryRegion */
 353     *xlat = addr + section->offset_within_region;
 354
 355     mr = section->mr;
 356
 357     /* MMIO registers can be expected to perform full-width accesses based only
 358      * on their address, without considering adjacent registers that could
 359      * decode to completely different MemoryRegions.  When such registers
 360      * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 361      * regions overlap wildly.  For this reason we cannot clamp the accesses
 362      * here.
 363      *
 364      * If the length is small (as is the case for address_space_ldl/stl),
 365      * everything works fine.  If the incoming length is large, however,
 366      * the caller really has to do the clamping through memory_access_size.
 367      */
 368     if (memory_region_is_ram(mr)) {
 369         diff = int128_sub(section->size, int128_make64(addr));
 370         *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 371     }
 372     return section;
 373 }
 374
 375 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 376 {
 377     if (memory_region_is_ram(mr)) {
 378         return !(is_write && mr->readonly);
 379     }
 380     if (memory_region_is_romd(mr)) {
 381         return !is_write;
 382     }
 383
 384     return false;
 385 }
 386
 387 /* Called from RCU critical section */
 388 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 389                                       hwaddr *xlat, hwaddr *plen,
 390                                       bool is_write)
 391 {
 392     IOMMUTLBEntry iotlb;
 393     MemoryRegionSection *section;
 394     MemoryRegion *mr;
 395
 396     for (;;) {
 397         AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 398         section = address_space_translate_internal(d, addr, &addr, plen, true);
 399         mr = section->mr;
 400
 401         if (!mr->iommu_ops) {
 402             break;
 403         }
 404
 405         iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 406         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 407                 | (addr & iotlb.addr_mask));
 408         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 409         if (!(iotlb.perm & (1 << is_write))) {
 410             mr = &io_mem_unassigned;
 411             break;
 412         }
 413
 414         as = iotlb.target_as;
 415     }
 416
 417     if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 418         hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 419         *plen = MIN(page, *plen);
 420     }
 421
 422     *xlat = addr;
 423     return mr;
 424 }
 425
 426 /* Called from RCU critical section */
 427 MemoryRegionSection *
 428 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 429                                   hwaddr *xlat, hwaddr *plen)
 430 {
 431     MemoryRegionSection *section;
 432     section = address_space_translate_internal(cpu->memory_dispatch,
 433                                                addr, xlat, plen, false);
 434
 435     assert(!section->mr->iommu_ops);
 436     return section;
 437 }
 438 #endif
 439
 440 #if !defined(CONFIG_USER_ONLY)
 441
 442 static int cpu_common_post_load(void *opaque, int version_id)
 443 {
 444     CPUState *cpu = opaque;
 445
 446     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 447        version_id is increased. */
 448     cpu->interrupt_request &= ~0x01;
 449     tlb_flush(cpu, 1);
 450
 451     return 0;
 452 }
 453
 454 static int cpu_common_pre_load(void *opaque)
 455 {
 456     CPUState *cpu = opaque;
 457
 458     cpu->exception_index = -1;
 459
 460     return 0;
 461 }
 462
 463 static bool cpu_common_exception_index_needed(void *opaque)
 464 {
 465     CPUState *cpu = opaque;
 466
 467     return tcg_enabled() && cpu->exception_index != -1;
 468 }
 469
 470 static const VMStateDescription vmstate_cpu_common_exception_index = {
 471     .name = "cpu_common/exception_index",
 472     .version_id = 1,
 473     .minimum_version_id = 1,
 474     .needed = cpu_common_exception_index_needed,
 475     .fields = (VMStateField[]) {
 476         VMSTATE_INT32(exception_index, CPUState),
 477         VMSTATE_END_OF_LIST()
 478     }
 479 };
 480
 481 const VMStateDescription vmstate_cpu_common = {
 482     .name = "cpu_common",
 483     .version_id = 1,
 484     .minimum_version_id = 1,
 485     .pre_load = cpu_common_pre_load,
 486     .post_load = cpu_common_post_load,
 487     .fields = (VMStateField[]) {
 488         VMSTATE_UINT32(halted, CPUState),
 489         VMSTATE_UINT32(interrupt_request, CPUState),
 490         VMSTATE_END_OF_LIST()
 491     },
 492     .subsections = (const VMStateDescription*[]) {
 493         &vmstate_cpu_common_exception_index,
 494         NULL
 495     }
 496 };
 497
 498 #endif
 499
 500 CPUState *qemu_get_cpu(int index)
 501 {
 502     CPUState *cpu;
 503
 504     CPU_FOREACH(cpu) {
 505         if (cpu->cpu_index == index) {
 506             return cpu;
 507         }
 508     }
 509
 510     return NULL;
 511 }
 512
 513 #if !defined(CONFIG_USER_ONLY)
 514 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 515 {
 516     /* We only support one address space per cpu at the moment.  */
 517     assert(cpu->as == as);
 518
 519     if (cpu->tcg_as_listener) {
 520         memory_listener_unregister(cpu->tcg_as_listener);
 521     } else {
 522         cpu->tcg_as_listener = g_new0(MemoryListener, 1);
 523     }
 524     cpu->tcg_as_listener->commit = tcg_commit;
 525     memory_listener_register(cpu->tcg_as_listener, as);
 526 }
 527 #endif
 528
 529 void cpu_exec_init(CPUArchState *env)
 530 {
 531     CPUState *cpu = ENV_GET_CPU(env);
 532     CPUClass *cc = CPU_GET_CLASS(cpu);
 533     CPUState *some_cpu;
 534     int cpu_index;
 535
 536 #if defined(CONFIG_USER_ONLY)
 537     cpu_list_lock();
 538 #endif
 539     cpu_index = 0;
 540     CPU_FOREACH(some_cpu) {
 541         cpu_index++;
 542     }
 543     cpu->cpu_index = cpu_index;
 544     QTAILQ_INIT(&cpu->breakpoints);
 545     QTAILQ_INIT(&cpu->watchpoints);
 546 #ifndef CONFIG_USER_ONLY
 547     cpu->as = &address_space_memory;
 548     cpu->thread_id = qemu_get_thread_id();
 549     cpu_reload_memory_map(cpu);
 550 #endif
 551     QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 552 #if defined(CONFIG_USER_ONLY)
 553     cpu_list_unlock();
 554 #endif
 555     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 556         vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 557     }
 558 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 559     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 560                     cpu_save, cpu_load, env);
 561     assert(cc->vmsd == NULL);
 562     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 563 #endif
 564     if (cc->vmsd != NULL) {
 565         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 566     }
 567 }
 568
 569 #if defined(CONFIG_USER_ONLY)
 570 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 571 {
 572     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 573 }
 574 #else
 575 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 576 {
 577     hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 578     if (phys != -1) {
 579         tb_invalidate_phys_addr(cpu->as,
 580                                 phys | (pc & ~TARGET_PAGE_MASK));
 581     }
 582 }
 583 #endif
 584
 585 #if defined(CONFIG_USER_ONLY)
 586 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 587
 588 {
 589 }
 590
 591 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 592                           int flags)
 593 {
 594     return -ENOSYS;
 595 }
 596
 597 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 598 {
 599 }
 600
 601 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 602                           int flags, CPUWatchpoint **watchpoint)
 603 {
 604     return -ENOSYS;
 605 }
 606 #else
 607 /* Add a watchpoint.  */
 608 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 609                           int flags, CPUWatchpoint **watchpoint)
 610 {
 611     CPUWatchpoint *wp;
 612
 613     /* forbid ranges which are empty or run off the end of the address space */
 614     if (len == 0 || (addr + len - 1) < addr) {
 615         error_report("tried to set invalid watchpoint at %"
 616                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 617         return -EINVAL;
 618     }
 619     wp = g_malloc(sizeof(*wp));
 620
 621     wp->vaddr = addr;
 622     wp->len = len;
 623     wp->flags = flags;
 624
 625     /* keep all GDB-injected watchpoints in front */
 626     if (flags & BP_GDB) {
 627         QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 628     } else {
 629         QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 630     }
 631
 632     tlb_flush_page(cpu, addr);
 633
 634     if (watchpoint)
 635         *watchpoint = wp;
 636     return 0;
 637 }
 638
 639 /* Remove a specific watchpoint.  */
 640 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 641                           int flags)
 642 {
 643     CPUWatchpoint *wp;
 644
 645     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 646         if (addr == wp->vaddr && len == wp->len
 647                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 648             cpu_watchpoint_remove_by_ref(cpu, wp);
 649             return 0;
 650         }
 651     }
 652     return -ENOENT;
 653 }
 654
 655 /* Remove a specific watchpoint by reference.  */
 656 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 657 {
 658     QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 659
 660     tlb_flush_page(cpu, watchpoint->vaddr);
 661
 662     g_free(watchpoint);
 663 }
 664
 665 /* Remove all matching watchpoints.  */
 666 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 667 {
 668     CPUWatchpoint *wp, *next;
 669
 670     QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 671         if (wp->flags & mask) {
 672             cpu_watchpoint_remove_by_ref(cpu, wp);
 673         }
 674     }
 675 }
 676
 677 /* Return true if this watchpoint address matches the specified
 678  * access (ie the address range covered by the watchpoint overlaps
 679  * partially or completely with the address range covered by the
 680  * access).
 681  */
 682 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 683                                                   vaddr addr,
 684                                                   vaddr len)
 685 {
 686     /* We know the lengths are non-zero, but a little caution is
 687      * required to avoid errors in the case where the range ends
 688      * exactly at the top of the address space and so addr + len
 689      * wraps round to zero.
 690      */
 691     vaddr wpend = wp->vaddr + wp->len - 1;
 692     vaddr addrend = addr + len - 1;
 693
 694     return !(addr > wpend || wp->vaddr > addrend);
 695 }
 696
 697 #endif
 698
 699 /* Add a breakpoint.  */
 700 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 701                           CPUBreakpoint **breakpoint)
 702 {
 703     CPUBreakpoint *bp;
 704
 705     bp = g_malloc(sizeof(*bp));
 706
 707     bp->pc = pc;
 708     bp->flags = flags;
 709
 710     /* keep all GDB-injected breakpoints in front */
 711     if (flags & BP_GDB) {
 712         QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 713     } else {
 714         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 715     }
 716
 717     breakpoint_invalidate(cpu, pc);
 718
 719     if (breakpoint) {
 720         *breakpoint = bp;
 721     }
 722     return 0;
 723 }
 724
 725 /* Remove a specific breakpoint.  */
 726 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 727 {
 728     CPUBreakpoint *bp;
 729
 730     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 731         if (bp->pc == pc && bp->flags == flags) {
 732             cpu_breakpoint_remove_by_ref(cpu, bp);
 733             return 0;
 734         }
 735     }
 736     return -ENOENT;
 737 }
 738
 739 /* Remove a specific breakpoint by reference.  */
 740 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 741 {
 742     QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 743
 744     breakpoint_invalidate(cpu, breakpoint->pc);
 745
 746     g_free(breakpoint);
 747 }
 748
 749 /* Remove all matching breakpoints. */
 750 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 751 {
 752     CPUBreakpoint *bp, *next;
 753
 754     QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 755         if (bp->flags & mask) {
 756             cpu_breakpoint_remove_by_ref(cpu, bp);
 757         }
 758     }
 759 }
 760
 761 /* enable or disable single step mode. EXCP_DEBUG is returned by the
 762    CPU loop after each instruction */
 763 void cpu_single_step(CPUState *cpu, int enabled)
 764 {
 765     if (cpu->singlestep_enabled != enabled) {
 766         cpu->singlestep_enabled = enabled;
 767         if (kvm_enabled()) {
 768             kvm_update_guest_debug(cpu, 0);
 769         } else {
 770             /* must flush all the translated code to avoid inconsistencies */
 771             /* XXX: only flush what is necessary */
 772             CPUArchState *env = cpu->env_ptr;
 773             tb_flush(env);
 774         }
 775     }
 776 }
 777
 778 void cpu_abort(CPUState *cpu, const char *fmt, ...)
 779 {
 780     va_list ap;
 781     va_list ap2;
 782
 783     va_start(ap, fmt);
 784     va_copy(ap2, ap);
 785     fprintf(stderr, "qemu: fatal: ");
 786     vfprintf(stderr, fmt, ap);
 787     fprintf(stderr, "\n");
 788     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 789     if (qemu_log_enabled()) {
 790         qemu_log("qemu: fatal: ");
 791         qemu_log_vprintf(fmt, ap2);
 792         qemu_log("\n");
 793         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 794         qemu_log_flush();
 795         qemu_log_close();
 796     }
 797     va_end(ap2);
 798     va_end(ap);
 799 #if defined(CONFIG_USER_ONLY)
 800     {
 801         struct sigaction act;
 802         sigfillset(&act.sa_mask);
 803         act.sa_handler = SIG_DFL;
 804         sigaction(SIGABRT, &act, NULL);
 805     }
 806 #endif
 807     abort();
 808 }
 809
 810 #if !defined(CONFIG_USER_ONLY)
 811 /* Called from RCU critical section */
 812 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 813 {
 814     RAMBlock *block;
 815
 816     block = atomic_rcu_read(&ram_list.mru_block);
 817     if (block && addr - block->offset < block->max_length) {
 818         goto found;
 819     }
 820     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 821         if (addr - block->offset < block->max_length) {
 822             goto found;
 823         }
 824     }
 825
 826     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 827     abort();
 828
 829 found:
 830     /* It is safe to write mru_block outside the iothread lock.  This
 831      * is what happens:
 832      *
 833      *     mru_block = xxx
 834      *     rcu_read_unlock()
 835      *                                        xxx removed from list
 836      *                  rcu_read_lock()
 837      *                  read mru_block
 838      *                                        mru_block = NULL;
 839      *                                        call_rcu(reclaim_ramblock, xxx);
 840      *                  rcu_read_unlock()
 841      *
 842      * atomic_rcu_set is not needed here.  The block was already published
 843      * when it was placed into the list.  Here we're just making an extra
 844      * copy of the pointer.
 845      */
 846     ram_list.mru_block = block;
 847     return block;
 848 }
 849
 850 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 851 {
 852     ram_addr_t start1;
 853     RAMBlock *block;
 854     ram_addr_t end;
 855
 856     end = TARGET_PAGE_ALIGN(start + length);
 857     start &= TARGET_PAGE_MASK;
 858
 859     rcu_read_lock();
 860     block = qemu_get_ram_block(start);
 861     assert(block == qemu_get_ram_block(end - 1));
 862     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 863     cpu_tlb_reset_dirty_all(start1, length);
 864     rcu_read_unlock();
 865 }
 866
 867 /* Note: start and end must be within the same ram block.  */
 868 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 869                                               ram_addr_t length,
 870                                               unsigned client)
 871 {
 872     unsigned long end, page;
 873     bool dirty;
 874
 875     if (length == 0) {
 876         return false;
 877     }
 878
 879     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 880     page = start >> TARGET_PAGE_BITS;
 881     dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 882                                          page, end - page);
 883
 884     if (dirty && tcg_enabled()) {
 885         tlb_reset_dirty_range_all(start, length);
 886     }
 887
 888     return dirty;
 889 }
 890
 891 /* Called from RCU critical section */
 892 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 893                                        MemoryRegionSection *section,
 894                                        target_ulong vaddr,
 895                                        hwaddr paddr, hwaddr xlat,
 896                                        int prot,
 897                                        target_ulong *address)
 898 {
 899     hwaddr iotlb;
 900     CPUWatchpoint *wp;
 901
 902     if (memory_region_is_ram(section->mr)) {
 903         /* Normal RAM.  */
 904         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 905             + xlat;
 906         if (!section->readonly) {
 907             iotlb |= PHYS_SECTION_NOTDIRTY;
 908         } else {
 909             iotlb |= PHYS_SECTION_ROM;
 910         }
 911     } else {
 912         iotlb = section - section->address_space->dispatch->map.sections;
 913         iotlb += xlat;
 914     }
 915
 916     /* Make accesses to pages with watchpoints go via the
 917        watchpoint trap routines.  */
 918     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 919         if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
 920             /* Avoid trapping reads of pages with a write breakpoint. */
 921             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
 922                 iotlb = PHYS_SECTION_WATCH + paddr;
 923                 *address |= TLB_MMIO;
 924                 break;
 925             }
 926         }
 927     }
 928
 929     return iotlb;
 930 }
 931 #endif /* defined(CONFIG_USER_ONLY) */
 932
 933 #if !defined(CONFIG_USER_ONLY)
 934
 935 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
 936                              uint16_t section);
 937 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
 938
 939 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
 940                                qemu_anon_ram_alloc;
 941
 942 /*
 943  * Set a custom physical guest memory alloator.
 944  * Accelerators with unusual needs may need this.  Hopefully, we can
 945  * get rid of it eventually.
 946  */
 947 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
 948 {
 949     phys_mem_alloc = alloc;
 950 }
 951
 952 static uint16_t phys_section_add(PhysPageMap *map,
 953                                  MemoryRegionSection *section)
 954 {
 955     /* The physical section number is ORed with a page-aligned
 956      * pointer to produce the iotlb entries.  Thus it should
 957      * never overflow into the page-aligned value.
 958      */
 959     assert(map->sections_nb < TARGET_PAGE_SIZE);
 960
 961     if (map->sections_nb == map->sections_nb_alloc) {
 962         map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
 963         map->sections = g_renew(MemoryRegionSection, map->sections,
 964                                 map->sections_nb_alloc);
 965     }
 966     map->sections[map->sections_nb] = *section;
 967     memory_region_ref(section->mr);
 968     return map->sections_nb++;
 969 }
 970
 971 static void phys_section_destroy(MemoryRegion *mr)
 972 {
 973     memory_region_unref(mr);
 974
 975     if (mr->subpage) {
 976         subpage_t *subpage = container_of(mr, subpage_t, iomem);
 977         object_unref(OBJECT(&subpage->iomem));
 978         g_free(subpage);
 979     }
 980 }
 981
 982 static void phys_sections_free(PhysPageMap *map)
 983 {
 984     while (map->sections_nb > 0) {
 985         MemoryRegionSection *section = &map->sections[--map->sections_nb];
 986         phys_section_destroy(section->mr);
 987     }
 988     g_free(map->sections);
 989     g_free(map->nodes);
 990 }
 991
 992 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 993 {
 994     subpage_t *subpage;
 995     hwaddr base = section->offset_within_address_space
 996         & TARGET_PAGE_MASK;
 997     MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
 998                                                    d->map.nodes, d->map.sections);
 999     MemoryRegionSection subsection = {
1000         .offset_within_address_space = base,
1001         .size = int128_make64(TARGET_PAGE_SIZE),
1002     };
1003     hwaddr start, end;
1004
1005     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1006
1007     if (!(existing->mr->subpage)) {
1008         subpage = subpage_init(d->as, base);
1009         subsection.address_space = d->as;
1010         subsection.mr = &subpage->iomem;
1011         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1012                       phys_section_add(&d->map, &subsection));
1013     } else {
1014         subpage = container_of(existing->mr, subpage_t, iomem);
1015     }
1016     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1017     end = start + int128_get64(section->size) - 1;
1018     subpage_register(subpage, start, end,
1019                      phys_section_add(&d->map, section));
1020 }
1021
1022
1023 static void register_multipage(AddressSpaceDispatch *d,
1024                                MemoryRegionSection *section)
1025 {
1026     hwaddr start_addr = section->offset_within_address_space;
1027     uint16_t section_index = phys_section_add(&d->map, section);
1028     uint64_t num_pages = int128_get64(int128_rshift(section->size,
1029                                                     TARGET_PAGE_BITS));
1030
1031     assert(num_pages);
1032     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1033 }
1034
1035 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1036 {
1037     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1038     AddressSpaceDispatch *d = as->next_dispatch;
1039     MemoryRegionSection now = *section, remain = *section;
1040     Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1041
1042     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1043         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1044                        - now.offset_within_address_space;
1045
1046         now.size = int128_min(int128_make64(left), now.size);
1047         register_subpage(d, &now);
1048     } else {
1049         now.size = int128_zero();
1050     }
1051     while (int128_ne(remain.size, now.size)) {
1052         remain.size = int128_sub(remain.size, now.size);
1053         remain.offset_within_address_space += int128_get64(now.size);
1054         remain.offset_within_region += int128_get64(now.size);
1055         now = remain;
1056         if (int128_lt(remain.size, page_size)) {
1057             register_subpage(d, &now);
1058         } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1059             now.size = page_size;
1060             register_subpage(d, &now);
1061         } else {
1062             now.size = int128_and(now.size, int128_neg(page_size));
1063             register_multipage(d, &now);
1064         }
1065     }
1066 }
1067
1068 void qemu_flush_coalesced_mmio_buffer(void)
1069 {
1070     if (kvm_enabled())
1071         kvm_flush_coalesced_mmio_buffer();
1072 }
1073
1074 void qemu_mutex_lock_ramlist(void)
1075 {
1076     qemu_mutex_lock(&ram_list.mutex);
1077 }
1078
1079 void qemu_mutex_unlock_ramlist(void)
1080 {
1081     qemu_mutex_unlock(&ram_list.mutex);
1082 }
1083
1084 #ifdef __linux__
1085
1086 #include <sys/vfs.h>
1087
1088 #define HUGETLBFS_MAGIC       0x958458f6
1089
1090 static long gethugepagesize(const char *path, Error **errp)
1091 {
1092     struct statfs fs;
1093     int ret;
1094
1095     do {
1096         ret = statfs(path, &fs);
1097     } while (ret != 0 && errno == EINTR);
1098
1099     if (ret != 0) {
1100         error_setg_errno(errp, errno, "failed to get page size of file %s",
1101                          path);
1102         return 0;
1103     }
1104
1105     if (fs.f_type != HUGETLBFS_MAGIC)
1106         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1107
1108     return fs.f_bsize;
1109 }
1110
1111 static void *file_ram_alloc(RAMBlock *block,
1112                             ram_addr_t memory,
1113                             const char *path,
1114                             Error **errp)
1115 {
1116     char *filename;
1117     char *sanitized_name;
1118     char *c;
1119     void *area = NULL;
1120     int fd;
1121     uint64_t hpagesize;
1122     Error *local_err = NULL;
1123
1124     hpagesize = gethugepagesize(path, &local_err);
1125     if (local_err) {
1126         error_propagate(errp, local_err);
1127         goto error;
1128     }
1129     block->mr->align = hpagesize;
1130
1131     if (memory < hpagesize) {
1132         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1133                    "or larger than huge page size 0x%" PRIx64,
1134                    memory, hpagesize);
1135         goto error;
1136     }
1137
1138     if (kvm_enabled() && !kvm_has_sync_mmu()) {
1139         error_setg(errp,
1140                    "host lacks kvm mmu notifiers, -mem-path unsupported");
1141         goto error;
1142     }
1143
1144     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1145     sanitized_name = g_strdup(memory_region_name(block->mr));
1146     for (c = sanitized_name; *c != '\0'; c++) {
1147         if (*c == '/')
1148             *c = '_';
1149     }
1150
1151     filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1152                                sanitized_name);
1153     g_free(sanitized_name);
1154
1155     fd = mkstemp(filename);
1156     if (fd < 0) {
1157         error_setg_errno(errp, errno,
1158                          "unable to create backing store for hugepages");
1159         g_free(filename);
1160         goto error;
1161     }
1162     unlink(filename);
1163     g_free(filename);
1164
1165     memory = (memory+hpagesize-1) & ~(hpagesize-1);
1166
1167     /*
1168      * ftruncate is not supported by hugetlbfs in older
1169      * hosts, so don't bother bailing out on errors.
1170      * If anything goes wrong with it under other filesystems,
1171      * mmap will fail.
1172      */
1173     if (ftruncate(fd, memory)) {
1174         perror("ftruncate");
1175     }
1176
1177     area = mmap(0, memory, PROT_READ | PROT_WRITE,
1178                 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1179                 fd, 0);
1180     if (area == MAP_FAILED) {
1181         error_setg_errno(errp, errno,
1182                          "unable to map backing store for hugepages");
1183         close(fd);
1184         goto error;
1185     }
1186
1187     if (mem_prealloc) {
1188         os_mem_prealloc(fd, area, memory);
1189     }
1190
1191     block->fd = fd;
1192     return area;
1193
1194 error:
1195     if (mem_prealloc) {
1196         error_report("%s", error_get_pretty(*errp));
1197         exit(1);
1198     }
1199     return NULL;
1200 }
1201 #endif
1202
1203 /* Called with the ramlist lock held.  */
1204 static ram_addr_t find_ram_offset(ram_addr_t size)
1205 {
1206     RAMBlock *block, *next_block;
1207     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1208
1209     assert(size != 0); /* it would hand out same offset multiple times */
1210
1211     if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1212         return 0;
1213     }
1214
1215     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1216         ram_addr_t end, next = RAM_ADDR_MAX;
1217
1218         end = block->offset + block->max_length;
1219
1220         QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1221             if (next_block->offset >= end) {
1222                 next = MIN(next, next_block->offset);
1223             }
1224         }
1225         if (next - end >= size && next - end < mingap) {
1226             offset = end;
1227             mingap = next - end;
1228         }
1229     }
1230
1231     if (offset == RAM_ADDR_MAX) {
1232         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1233                 (uint64_t)size);
1234         abort();
1235     }
1236
1237     return offset;
1238 }
1239
1240 ram_addr_t last_ram_offset(void)
1241 {
1242     RAMBlock *block;
1243     ram_addr_t last = 0;
1244
1245     rcu_read_lock();
1246     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1247         last = MAX(last, block->offset + block->max_length);
1248     }
1249     rcu_read_unlock();
1250     return last;
1251 }
1252
1253 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1254 {
1255     int ret;
1256
1257     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1258     if (!machine_dump_guest_core(current_machine)) {
1259         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1260         if (ret) {
1261             perror("qemu_madvise");
1262             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1263                             "but dump_guest_core=off specified\n");
1264         }
1265     }
1266 }
1267
1268 /* Called within an RCU critical section, or while the ramlist lock
1269  * is held.
1270  */
1271 static RAMBlock *find_ram_block(ram_addr_t addr)
1272 {
1273     RAMBlock *block;
1274
1275     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1276         if (block->offset == addr) {
1277             return block;
1278         }
1279     }
1280
1281     return NULL;
1282 }
1283
1284 /* Called with iothread lock held.  */
1285 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1286 {
1287     RAMBlock *new_block, *block;
1288
1289     rcu_read_lock();
1290     new_block = find_ram_block(addr);
1291     assert(new_block);
1292     assert(!new_block->idstr[0]);
1293
1294     if (dev) {
1295         char *id = qdev_get_dev_path(dev);
1296         if (id) {
1297             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1298             g_free(id);
1299         }
1300     }
1301     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1302
1303     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1304         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1305             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1306                     new_block->idstr);
1307             abort();
1308         }
1309     }
1310     rcu_read_unlock();
1311 }
1312
1313 /* Called with iothread lock held.  */
1314 void qemu_ram_unset_idstr(ram_addr_t addr)
1315 {
1316     RAMBlock *block;
1317
1318     /* FIXME: arch_init.c assumes that this is not called throughout
1319      * migration.  Ignore the problem since hot-unplug during migration
1320      * does not work anyway.
1321      */
1322
1323     rcu_read_lock();
1324     block = find_ram_block(addr);
1325     if (block) {
1326         memset(block->idstr, 0, sizeof(block->idstr));
1327     }
1328     rcu_read_unlock();
1329 }
1330
1331 static int memory_try_enable_merging(void *addr, size_t len)
1332 {
1333     if (!machine_mem_merge(current_machine)) {
1334         /* disabled by the user */
1335         return 0;
1336     }
1337
1338     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1339 }
1340
1341 /* Only legal before guest might have detected the memory size: e.g. on
1342  * incoming migration, or right after reset.
1343  *
1344  * As memory core doesn't know how is memory accessed, it is up to
1345  * resize callback to update device state and/or add assertions to detect
1346  * misuse, if necessary.
1347  */
1348 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1349 {
1350     RAMBlock *block = find_ram_block(base);
1351
1352     assert(block);
1353
1354     newsize = TARGET_PAGE_ALIGN(newsize);
1355
1356     if (block->used_length == newsize) {
1357         return 0;
1358     }
1359
1360     if (!(block->flags & RAM_RESIZEABLE)) {
1361         error_setg_errno(errp, EINVAL,
1362                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
1363                          " in != 0x" RAM_ADDR_FMT, block->idstr,
1364                          newsize, block->used_length);
1365         return -EINVAL;
1366     }
1367
1368     if (block->max_length < newsize) {
1369         error_setg_errno(errp, EINVAL,
1370                          "Length too large: %s: 0x" RAM_ADDR_FMT
1371                          " > 0x" RAM_ADDR_FMT, block->idstr,
1372                          newsize, block->max_length);
1373         return -EINVAL;
1374     }
1375
1376     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1377     block->used_length = newsize;
1378     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1379                                         DIRTY_CLIENTS_ALL);
1380     memory_region_set_size(block->mr, newsize);
1381     if (block->resized) {
1382         block->resized(block->idstr, newsize, block->host);
1383     }
1384     return 0;
1385 }
1386
1387 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1388 {
1389     RAMBlock *block;
1390     RAMBlock *last_block = NULL;
1391     ram_addr_t old_ram_size, new_ram_size;
1392
1393     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1394
1395     qemu_mutex_lock_ramlist();
1396     new_block->offset = find_ram_offset(new_block->max_length);
1397
1398     if (!new_block->host) {
1399         if (xen_enabled()) {
1400             xen_ram_alloc(new_block->offset, new_block->max_length,
1401                           new_block->mr);
1402         } else {
1403             new_block->host = phys_mem_alloc(new_block->max_length,
1404                                              &new_block->mr->align);
1405             if (!new_block->host) {
1406                 error_setg_errno(errp, errno,
1407                                  "cannot set up guest memory '%s'",
1408                                  memory_region_name(new_block->mr));
1409                 qemu_mutex_unlock_ramlist();
1410                 return -1;
1411             }
1412             memory_try_enable_merging(new_block->host, new_block->max_length);
1413         }
1414     }
1415
1416     new_ram_size = MAX(old_ram_size,
1417               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1418     if (new_ram_size > old_ram_size) {
1419         migration_bitmap_extend(old_ram_size, new_ram_size);
1420     }
1421     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1422      * QLIST (which has an RCU-friendly variant) does not have insertion at
1423      * tail, so save the last element in last_block.
1424      */
1425     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1426         last_block = block;
1427         if (block->max_length < new_block->max_length) {
1428             break;
1429         }
1430     }
1431     if (block) {
1432         QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1433     } else if (last_block) {
1434         QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1435     } else { /* list is empty */
1436         QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1437     }
1438     ram_list.mru_block = NULL;
1439
1440     /* Write list before version */
1441     smp_wmb();
1442     ram_list.version++;
1443     qemu_mutex_unlock_ramlist();
1444
1445     new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1446
1447     if (new_ram_size > old_ram_size) {
1448         int i;
1449
1450         /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1451         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1452             ram_list.dirty_memory[i] =
1453                 bitmap_zero_extend(ram_list.dirty_memory[i],
1454                                    old_ram_size, new_ram_size);
1455        }
1456     }
1457     cpu_physical_memory_set_dirty_range(new_block->offset,
1458                                         new_block->used_length,
1459                                         DIRTY_CLIENTS_ALL);
1460
1461     if (new_block->host) {
1462         qemu_ram_setup_dump(new_block->host, new_block->max_length);
1463         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1464         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1465         if (kvm_enabled()) {
1466             kvm_setup_guest_memory(new_block->host, new_block->max_length);
1467         }
1468     }
1469
1470     return new_block->offset;
1471 }
1472
1473 #ifdef __linux__
1474 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1475                                     bool share, const char *mem_path,
1476                                     Error **errp)
1477 {
1478     RAMBlock *new_block;
1479     ram_addr_t addr;
1480     Error *local_err = NULL;
1481
1482     if (xen_enabled()) {
1483         error_setg(errp, "-mem-path not supported with Xen");
1484         return -1;
1485     }
1486
1487     if (phys_mem_alloc != qemu_anon_ram_alloc) {
1488         /*
1489          * file_ram_alloc() needs to allocate just like
1490          * phys_mem_alloc, but we haven't bothered to provide
1491          * a hook there.
1492          */
1493         error_setg(errp,
1494                    "-mem-path not supported with this accelerator");
1495         return -1;
1496     }
1497
1498     size = TARGET_PAGE_ALIGN(size);
1499     new_block = g_malloc0(sizeof(*new_block));
1500     new_block->mr = mr;
1501     new_block->used_length = size;
1502     new_block->max_length = size;
1503     new_block->flags = share ? RAM_SHARED : 0;
1504     new_block->host = file_ram_alloc(new_block, size,
1505                                      mem_path, errp);
1506     if (!new_block->host) {
1507         g_free(new_block);
1508         return -1;
1509     }
1510
1511     addr = ram_block_add(new_block, &local_err);
1512     if (local_err) {
1513         g_free(new_block);
1514         error_propagate(errp, local_err);
1515         return -1;
1516     }
1517     return addr;
1518 }
1519 #endif
1520
1521 static
1522 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1523                                    void (*resized)(const char*,
1524                                                    uint64_t length,
1525                                                    void *host),
1526                                    void *host, bool resizeable,
1527                                    MemoryRegion *mr, Error **errp)
1528 {
1529     RAMBlock *new_block;
1530     ram_addr_t addr;
1531     Error *local_err = NULL;
1532
1533     size = TARGET_PAGE_ALIGN(size);
1534     max_size = TARGET_PAGE_ALIGN(max_size);
1535     new_block = g_malloc0(sizeof(*new_block));
1536     new_block->mr = mr;
1537     new_block->resized = resized;
1538     new_block->used_length = size;
1539     new_block->max_length = max_size;
1540     assert(max_size >= size);
1541     new_block->fd = -1;
1542     new_block->host = host;
1543     if (host) {
1544         new_block->flags |= RAM_PREALLOC;
1545     }
1546     if (resizeable) {
1547         new_block->flags |= RAM_RESIZEABLE;
1548     }
1549     addr = ram_block_add(new_block, &local_err);
1550     if (local_err) {
1551         g_free(new_block);
1552         error_propagate(errp, local_err);
1553         return -1;
1554     }
1555     return addr;
1556 }
1557
1558 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1559                                    MemoryRegion *mr, Error **errp)
1560 {
1561     return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1562 }
1563
1564 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1565 {
1566     return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1567 }
1568
1569 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1570                                      void (*resized)(const char*,
1571                                                      uint64_t length,
1572                                                      void *host),
1573                                      MemoryRegion *mr, Error **errp)
1574 {
1575     return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1576 }
1577
1578 void qemu_ram_free_from_ptr(ram_addr_t addr)
1579 {
1580     RAMBlock *block;
1581
1582     qemu_mutex_lock_ramlist();
1583     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1584         if (addr == block->offset) {
1585             QLIST_REMOVE_RCU(block, next);
1586             ram_list.mru_block = NULL;
1587             /* Write list before version */
1588             smp_wmb();
1589             ram_list.version++;
1590             g_free_rcu(block, rcu);
1591             break;
1592         }
1593     }
1594     qemu_mutex_unlock_ramlist();
1595 }
1596
1597 static void reclaim_ramblock(RAMBlock *block)
1598 {
1599     if (block->flags & RAM_PREALLOC) {
1600         ;
1601     } else if (xen_enabled()) {
1602         xen_invalidate_map_cache_entry(block->host);
1603 #ifndef _WIN32
1604     } else if (block->fd >= 0) {
1605         munmap(block->host, block->max_length);
1606         close(block->fd);
1607 #endif
1608     } else {
1609         qemu_anon_ram_free(block->host, block->max_length);
1610     }
1611     g_free(block);
1612 }
1613
1614 void qemu_ram_free(ram_addr_t addr)
1615 {
1616     RAMBlock *block;
1617
1618     qemu_mutex_lock_ramlist();
1619     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1620         if (addr == block->offset) {
1621             QLIST_REMOVE_RCU(block, next);
1622             ram_list.mru_block = NULL;
1623             /* Write list before version */
1624             smp_wmb();
1625             ram_list.version++;
1626             call_rcu(block, reclaim_ramblock, rcu);
1627             break;
1628         }
1629     }
1630     qemu_mutex_unlock_ramlist();
1631 }
1632
1633 #ifndef _WIN32
1634 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1635 {
1636     RAMBlock *block;
1637     ram_addr_t offset;
1638     int flags;
1639     void *area, *vaddr;
1640
1641     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1642         offset = addr - block->offset;
1643         if (offset < block->max_length) {
1644             vaddr = ramblock_ptr(block, offset);
1645             if (block->flags & RAM_PREALLOC) {
1646                 ;
1647             } else if (xen_enabled()) {
1648                 abort();
1649             } else {
1650                 flags = MAP_FIXED;
1651                 if (block->fd >= 0) {
1652                     flags |= (block->flags & RAM_SHARED ?
1653                               MAP_SHARED : MAP_PRIVATE);
1654                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1655                                 flags, block->fd, offset);
1656                 } else {
1657                     /*
1658                      * Remap needs to match alloc.  Accelerators that
1659                      * set phys_mem_alloc never remap.  If they did,
1660                      * we'd need a remap hook here.
1661                      */
1662                     assert(phys_mem_alloc == qemu_anon_ram_alloc);
1663
1664                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1665                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1666                                 flags, -1, 0);
1667                 }
1668                 if (area != vaddr) {
1669                     fprintf(stderr, "Could not remap addr: "
1670                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1671                             length, addr);
1672                     exit(1);
1673                 }
1674                 memory_try_enable_merging(vaddr, length);
1675                 qemu_ram_setup_dump(vaddr, length);
1676             }
1677         }
1678     }
1679 }
1680 #endif /* !_WIN32 */
1681
1682 int qemu_get_ram_fd(ram_addr_t addr)
1683 {
1684     RAMBlock *block;
1685     int fd;
1686
1687     rcu_read_lock();
1688     block = qemu_get_ram_block(addr);
1689     fd = block->fd;
1690     rcu_read_unlock();
1691     return fd;
1692 }
1693
1694 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1695 {
1696     RAMBlock *block;
1697     void *ptr;
1698
1699     rcu_read_lock();
1700     block = qemu_get_ram_block(addr);
1701     ptr = ramblock_ptr(block, 0);
1702     rcu_read_unlock();
1703     return ptr;
1704 }
1705
1706 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1707  * This should not be used for general purpose DMA.  Use address_space_map
1708  * or address_space_rw instead. For local memory (e.g. video ram) that the
1709  * device owns, use memory_region_get_ram_ptr.
1710  *
1711  * By the time this function returns, the returned pointer is not protected
1712  * by RCU anymore.  If the caller is not within an RCU critical section and
1713  * does not hold the iothread lock, it must have other means of protecting the
1714  * pointer, such as a reference to the region that includes the incoming
1715  * ram_addr_t.
1716  */
1717 void *qemu_get_ram_ptr(ram_addr_t addr)
1718 {
1719     RAMBlock *block;
1720     void *ptr;
1721
1722     rcu_read_lock();
1723     block = qemu_get_ram_block(addr);
1724
1725     if (xen_enabled() && block->host == NULL) {
1726         /* We need to check if the requested address is in the RAM
1727          * because we don't want to map the entire memory in QEMU.
1728          * In that case just map until the end of the page.
1729          */
1730         if (block->offset == 0) {
1731             ptr = xen_map_cache(addr, 0, 0);
1732             goto unlock;
1733         }
1734
1735         block->host = xen_map_cache(block->offset, block->max_length, 1);
1736     }
1737     ptr = ramblock_ptr(block, addr - block->offset);
1738
1739 unlock:
1740     rcu_read_unlock();
1741     return ptr;
1742 }
1743
1744 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1745  * but takes a size argument.
1746  *
1747  * By the time this function returns, the returned pointer is not protected
1748  * by RCU anymore.  If the caller is not within an RCU critical section and
1749  * does not hold the iothread lock, it must have other means of protecting the
1750  * pointer, such as a reference to the region that includes the incoming
1751  * ram_addr_t.
1752  */
1753 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1754 {
1755     void *ptr;
1756     if (*size == 0) {
1757         return NULL;
1758     }
1759     if (xen_enabled()) {
1760         return xen_map_cache(addr, *size, 1);
1761     } else {
1762         RAMBlock *block;
1763         rcu_read_lock();
1764         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1765             if (addr - block->offset < block->max_length) {
1766                 if (addr - block->offset + *size > block->max_length)
1767                     *size = block->max_length - addr + block->offset;
1768                 ptr = ramblock_ptr(block, addr - block->offset);
1769                 rcu_read_unlock();
1770                 return ptr;
1771             }
1772         }
1773
1774         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1775         abort();
1776     }
1777 }
1778
1779 /* Some of the softmmu routines need to translate from a host pointer
1780  * (typically a TLB entry) back to a ram offset.
1781  *
1782  * By the time this function returns, the returned pointer is not protected
1783  * by RCU anymore.  If the caller is not within an RCU critical section and
1784  * does not hold the iothread lock, it must have other means of protecting the
1785  * pointer, such as a reference to the region that includes the incoming
1786  * ram_addr_t.
1787  */
1788 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1789 {
1790     RAMBlock *block;
1791     uint8_t *host = ptr;
1792     MemoryRegion *mr;
1793
1794     if (xen_enabled()) {
1795         rcu_read_lock();
1796         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1797         mr = qemu_get_ram_block(*ram_addr)->mr;
1798         rcu_read_unlock();
1799         return mr;
1800     }
1801
1802     rcu_read_lock();
1803     block = atomic_rcu_read(&ram_list.mru_block);
1804     if (block && block->host && host - block->host < block->max_length) {
1805         goto found;
1806     }
1807
1808     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1809         /* This case append when the block is not mapped. */
1810         if (block->host == NULL) {
1811             continue;
1812         }
1813         if (host - block->host < block->max_length) {
1814             goto found;
1815         }
1816     }
1817
1818     rcu_read_unlock();
1819     return NULL;
1820
1821 found:
1822     *ram_addr = block->offset + (host - block->host);
1823     mr = block->mr;
1824     rcu_read_unlock();
1825     return mr;
1826 }
1827
1828 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1829                                uint64_t val, unsigned size)
1830 {
1831     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1832         tb_invalidate_phys_page_fast(ram_addr, size);
1833     }
1834     switch (size) {
1835     case 1:
1836         stb_p(qemu_get_ram_ptr(ram_addr), val);
1837         break;
1838     case 2:
1839         stw_p(qemu_get_ram_ptr(ram_addr), val);
1840         break;
1841     case 4:
1842         stl_p(qemu_get_ram_ptr(ram_addr), val);
1843         break;
1844     default:
1845         abort();
1846     }
1847     /* Set both VGA and migration bits for simplicity and to remove
1848      * the notdirty callback faster.
1849      */
1850     cpu_physical_memory_set_dirty_range(ram_addr, size,
1851                                         DIRTY_CLIENTS_NOCODE);
1852     /* we remove the notdirty callback only if the code has been
1853        flushed */
1854     if (!cpu_physical_memory_is_clean(ram_addr)) {
1855         CPUArchState *env = current_cpu->env_ptr;
1856         tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1857     }
1858 }
1859
1860 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1861                                  unsigned size, bool is_write)
1862 {
1863     return is_write;
1864 }
1865
1866 static const MemoryRegionOps notdirty_mem_ops = {
1867     .write = notdirty_mem_write,
1868     .valid.accepts = notdirty_mem_accepts,
1869     .endianness = DEVICE_NATIVE_ENDIAN,
1870 };
1871
1872 /* Generate a debug exception if a watchpoint has been hit.  */
1873 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1874 {
1875     CPUState *cpu = current_cpu;
1876     CPUArchState *env = cpu->env_ptr;
1877     target_ulong pc, cs_base;
1878     target_ulong vaddr;
1879     CPUWatchpoint *wp;
1880     int cpu_flags;
1881
1882     if (cpu->watchpoint_hit) {
1883         /* We re-entered the check after replacing the TB. Now raise
1884          * the debug interrupt so that is will trigger after the
1885          * current instruction. */
1886         cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1887         return;
1888     }
1889     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1890     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1891         if (cpu_watchpoint_address_matches(wp, vaddr, len)
1892             && (wp->flags & flags)) {
1893             if (flags == BP_MEM_READ) {
1894                 wp->flags |= BP_WATCHPOINT_HIT_READ;
1895             } else {
1896                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1897             }
1898             wp->hitaddr = vaddr;
1899             wp->hitattrs = attrs;
1900             if (!cpu->watchpoint_hit) {
1901                 cpu->watchpoint_hit = wp;
1902                 tb_check_watchpoint(cpu);
1903                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1904                     cpu->exception_index = EXCP_DEBUG;
1905                     cpu_loop_exit(cpu);
1906                 } else {
1907                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1908                     tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1909                     cpu_resume_from_signal(cpu, NULL);
1910                 }
1911             }
1912         } else {
1913             wp->flags &= ~BP_WATCHPOINT_HIT;
1914         }
1915     }
1916 }
1917
1918 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1919    so these check for a hit then pass through to the normal out-of-line
1920    phys routines.  */
1921 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1922                                   unsigned size, MemTxAttrs attrs)
1923 {
1924     MemTxResult res;
1925     uint64_t data;
1926
1927     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1928     switch (size) {
1929     case 1:
1930         data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1931         break;
1932     case 2:
1933         data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1934         break;
1935     case 4:
1936         data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1937         break;
1938     default: abort();
1939     }
1940     *pdata = data;
1941     return res;
1942 }
1943
1944 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1945                                    uint64_t val, unsigned size,
1946                                    MemTxAttrs attrs)
1947 {
1948     MemTxResult res;
1949
1950     check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1951     switch (size) {
1952     case 1:
1953         address_space_stb(&address_space_memory, addr, val, attrs, &res);
1954         break;
1955     case 2:
1956         address_space_stw(&address_space_memory, addr, val, attrs, &res);
1957         break;
1958     case 4:
1959         address_space_stl(&address_space_memory, addr, val, attrs, &res);
1960         break;
1961     default: abort();
1962     }
1963     return res;
1964 }
1965
1966 static const MemoryRegionOps watch_mem_ops = {
1967     .read_with_attrs = watch_mem_read,
1968     .write_with_attrs = watch_mem_write,
1969     .endianness = DEVICE_NATIVE_ENDIAN,
1970 };
1971
1972 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1973                                 unsigned len, MemTxAttrs attrs)
1974 {
1975     subpage_t *subpage = opaque;
1976     uint8_t buf[8];
1977     MemTxResult res;
1978
1979 #if defined(DEBUG_SUBPAGE)
1980     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1981            subpage, len, addr);
1982 #endif
1983     res = address_space_read(subpage->as, addr + subpage->base,
1984                              attrs, buf, len);
1985     if (res) {
1986         return res;
1987     }
1988     switch (len) {
1989     case 1:
1990         *data = ldub_p(buf);
1991         return MEMTX_OK;
1992     case 2:
1993         *data = lduw_p(buf);
1994         return MEMTX_OK;
1995     case 4:
1996         *data = ldl_p(buf);
1997         return MEMTX_OK;
1998     case 8:
1999         *data = ldq_p(buf);
2000         return MEMTX_OK;
2001     default:
2002         abort();
2003     }
2004 }
2005
2006 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2007                                  uint64_t value, unsigned len, MemTxAttrs attrs)
2008 {
2009     subpage_t *subpage = opaque;
2010     uint8_t buf[8];
2011
2012 #if defined(DEBUG_SUBPAGE)
2013     printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2014            " value %"PRIx64"\n",
2015            __func__, subpage, len, addr, value);
2016 #endif
2017     switch (len) {
2018     case 1:
2019         stb_p(buf, value);
2020         break;
2021     case 2:
2022         stw_p(buf, value);
2023         break;
2024     case 4:
2025         stl_p(buf, value);
2026         break;
2027     case 8:
2028         stq_p(buf, value);
2029         break;
2030     default:
2031         abort();
2032     }
2033     return address_space_write(subpage->as, addr + subpage->base,
2034                                attrs, buf, len);
2035 }
2036
2037 static bool subpage_accepts(void *opaque, hwaddr addr,
2038                             unsigned len, bool is_write)
2039 {
2040     subpage_t *subpage = opaque;
2041 #if defined(DEBUG_SUBPAGE)
2042     printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2043            __func__, subpage, is_write ? 'w' : 'r', len, addr);
2044 #endif
2045
2046     return address_space_access_valid(subpage->as, addr + subpage->base,
2047                                       len, is_write);
2048 }
2049
2050 static const MemoryRegionOps subpage_ops = {
2051     .read_with_attrs = subpage_read,
2052     .write_with_attrs = subpage_write,
2053     .impl.min_access_size = 1,
2054     .impl.max_access_size = 8,
2055     .valid.min_access_size = 1,
2056     .valid.max_access_size = 8,
2057     .valid.accepts = subpage_accepts,
2058     .endianness = DEVICE_NATIVE_ENDIAN,
2059 };
2060
2061 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2062                              uint16_t section)
2063 {
2064     int idx, eidx;
2065
2066     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2067         return -1;
2068     idx = SUBPAGE_IDX(start);
2069     eidx = SUBPAGE_IDX(end);
2070 #if defined(DEBUG_SUBPAGE)
2071     printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2072            __func__, mmio, start, end, idx, eidx, section);
2073 #endif
2074     for (; idx <= eidx; idx++) {
2075         mmio->sub_section[idx] = section;
2076     }
2077
2078     return 0;
2079 }
2080
2081 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2082 {
2083     subpage_t *mmio;
2084
2085     mmio = g_malloc0(sizeof(subpage_t));
2086
2087     mmio->as = as;
2088     mmio->base = base;
2089     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2090                           NULL, TARGET_PAGE_SIZE);
2091     mmio->iomem.subpage = true;
2092 #if defined(DEBUG_SUBPAGE)
2093     printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2094            mmio, base, TARGET_PAGE_SIZE);
2095 #endif
2096     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2097
2098     return mmio;
2099 }
2100
2101 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2102                               MemoryRegion *mr)
2103 {
2104     assert(as);
2105     MemoryRegionSection section = {
2106         .address_space = as,
2107         .mr = mr,
2108         .offset_within_address_space = 0,
2109         .offset_within_region = 0,
2110         .size = int128_2_64(),
2111     };
2112
2113     return phys_section_add(map, &section);
2114 }
2115
2116 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2117 {
2118     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2119     MemoryRegionSection *sections = d->map.sections;
2120
2121     return sections[index & ~TARGET_PAGE_MASK].mr;
2122 }
2123
2124 static void io_mem_init(void)
2125 {
2126     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2127     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2128                           NULL, UINT64_MAX);
2129     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2130                           NULL, UINT64_MAX);
2131     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2132                           NULL, UINT64_MAX);
2133 }
2134
2135 static void mem_begin(MemoryListener *listener)
2136 {
2137     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2138     AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2139     uint16_t n;
2140
2141     n = dummy_section(&d->map, as, &io_mem_unassigned);
2142     assert(n == PHYS_SECTION_UNASSIGNED);
2143     n = dummy_section(&d->map, as, &io_mem_notdirty);
2144     assert(n == PHYS_SECTION_NOTDIRTY);
2145     n = dummy_section(&d->map, as, &io_mem_rom);
2146     assert(n == PHYS_SECTION_ROM);
2147     n = dummy_section(&d->map, as, &io_mem_watch);
2148     assert(n == PHYS_SECTION_WATCH);
2149
2150     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2151     d->as = as;
2152     as->next_dispatch = d;
2153 }
2154
2155 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2156 {
2157     phys_sections_free(&d->map);
2158     g_free(d);
2159 }
2160
2161 static void mem_commit(MemoryListener *listener)
2162 {
2163     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2164     AddressSpaceDispatch *cur = as->dispatch;
2165     AddressSpaceDispatch *next = as->next_dispatch;
2166
2167     phys_page_compact_all(next, next->map.nodes_nb);
2168
2169     atomic_rcu_set(&as->dispatch, next);
2170     if (cur) {
2171         call_rcu(cur, address_space_dispatch_free, rcu);
2172     }
2173 }
2174
2175 static void tcg_commit(MemoryListener *listener)
2176 {
2177     CPUState *cpu;
2178
2179     /* since each CPU stores ram addresses in its TLB cache, we must
2180        reset the modified entries */
2181     /* XXX: slow ! */
2182     CPU_FOREACH(cpu) {
2183         /* FIXME: Disentangle the cpu.h circular files deps so we can
2184            directly get the right CPU from listener.  */
2185         if (cpu->tcg_as_listener != listener) {
2186             continue;
2187         }
2188         cpu_reload_memory_map(cpu);
2189     }
2190 }
2191
2192 void address_space_init_dispatch(AddressSpace *as)
2193 {
2194     as->dispatch = NULL;
2195     as->dispatch_listener = (MemoryListener) {
2196         .begin = mem_begin,
2197         .commit = mem_commit,
2198         .region_add = mem_add,
2199         .region_nop = mem_add,
2200         .priority = 0,
2201     };
2202     memory_listener_register(&as->dispatch_listener, as);
2203 }
2204
2205 void address_space_unregister(AddressSpace *as)
2206 {
2207     memory_listener_unregister(&as->dispatch_listener);
2208 }
2209
2210 void address_space_destroy_dispatch(AddressSpace *as)
2211 {
2212     AddressSpaceDispatch *d = as->dispatch;
2213
2214     atomic_rcu_set(&as->dispatch, NULL);
2215     if (d) {
2216         call_rcu(d, address_space_dispatch_free, rcu);
2217     }
2218 }
2219
2220 static void memory_map_init(void)
2221 {
2222     system_memory = g_malloc(sizeof(*system_memory));
2223
2224     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2225     address_space_init(&address_space_memory, system_memory, "memory");
2226
2227     system_io = g_malloc(sizeof(*system_io));
2228     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2229                           65536);
2230     address_space_init(&address_space_io, system_io, "I/O");
2231 }
2232
2233 MemoryRegion *get_system_memory(void)
2234 {
2235     return system_memory;
2236 }
2237
2238 MemoryRegion *get_system_io(void)
2239 {
2240     return system_io;
2241 }
2242
2243 #endif /* !defined(CONFIG_USER_ONLY) */
2244
2245 /* physical memory access (slow version, mainly for debug) */
2246 #if defined(CONFIG_USER_ONLY)
2247 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2248                         uint8_t *buf, int len, int is_write)
2249 {
2250     int l, flags;
2251     target_ulong page;
2252     void * p;
2253
2254     while (len > 0) {
2255         page = addr & TARGET_PAGE_MASK;
2256         l = (page + TARGET_PAGE_SIZE) - addr;
2257         if (l > len)
2258             l = len;
2259         flags = page_get_flags(page);
2260         if (!(flags & PAGE_VALID))
2261             return -1;
2262         if (is_write) {
2263             if (!(flags & PAGE_WRITE))
2264                 return -1;
2265             /* XXX: this code should not depend on lock_user */
2266             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2267                 return -1;
2268             memcpy(p, buf, l);
2269             unlock_user(p, addr, l);
2270         } else {
2271             if (!(flags & PAGE_READ))
2272                 return -1;
2273             /* XXX: this code should not depend on lock_user */
2274             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2275                 return -1;
2276             memcpy(buf, p, l);
2277             unlock_user(p, addr, 0);
2278         }
2279         len -= l;
2280         buf += l;
2281         addr += l;
2282     }
2283     return 0;
2284 }
2285
2286 #else
2287
2288 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2289                                      hwaddr length)
2290 {
2291     uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2292     /* No early return if dirty_log_mask is or becomes 0, because
2293      * cpu_physical_memory_set_dirty_range will still call
2294      * xen_modified_memory.
2295      */
2296     if (dirty_log_mask) {
2297         dirty_log_mask =
2298             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2299     }
2300     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2301         tb_invalidate_phys_range(addr, addr + length);
2302         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2303     }
2304     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2305 }
2306
2307 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2308 {
2309     unsigned access_size_max = mr->ops->valid.max_access_size;
2310
2311     /* Regions are assumed to support 1-4 byte accesses unless
2312        otherwise specified.  */
2313     if (access_size_max == 0) {
2314         access_size_max = 4;
2315     }
2316
2317     /* Bound the maximum access by the alignment of the address.  */
2318     if (!mr->ops->impl.unaligned) {
2319         unsigned align_size_max = addr & -addr;
2320         if (align_size_max != 0 && align_size_max < access_size_max) {
2321             access_size_max = align_size_max;
2322         }
2323     }
2324
2325     /* Don't attempt accesses larger than the maximum.  */
2326     if (l > access_size_max) {
2327         l = access_size_max;
2328     }
2329     if (l & (l - 1)) {
2330         l = 1 << (qemu_fls(l) - 1);
2331     }
2332
2333     return l;
2334 }
2335
2336 static bool prepare_mmio_access(MemoryRegion *mr)
2337 {
2338     bool unlocked = !qemu_mutex_iothread_locked();
2339     bool release_lock = false;
2340
2341     if (unlocked && mr->global_locking) {
2342         qemu_mutex_lock_iothread();
2343         unlocked = false;
2344         release_lock = true;
2345     }
2346     if (mr->flush_coalesced_mmio) {
2347         if (unlocked) {
2348             qemu_mutex_lock_iothread();
2349         }
2350         qemu_flush_coalesced_mmio_buffer();
2351         if (unlocked) {
2352             qemu_mutex_unlock_iothread();
2353         }
2354     }
2355
2356     return release_lock;
2357 }
2358
2359 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2360                              uint8_t *buf, int len, bool is_write)
2361 {
2362     hwaddr l;
2363     uint8_t *ptr;
2364     uint64_t val;
2365     hwaddr addr1;
2366     MemoryRegion *mr;
2367     MemTxResult result = MEMTX_OK;
2368     bool release_lock = false;
2369
2370     rcu_read_lock();
2371     while (len > 0) {
2372         l = len;
2373         mr = address_space_translate(as, addr, &addr1, &l, is_write);
2374
2375         if (is_write) {
2376             if (!memory_access_is_direct(mr, is_write)) {
2377                 release_lock |= prepare_mmio_access(mr);
2378                 l = memory_access_size(mr, l, addr1);
2379                 /* XXX: could force current_cpu to NULL to avoid
2380                    potential bugs */
2381                 switch (l) {
2382                 case 8:
2383                     /* 64 bit write access */
2384                     val = ldq_p(buf);
2385                     result |= memory_region_dispatch_write(mr, addr1, val, 8,
2386                                                            attrs);
2387                     break;
2388                 case 4:
2389                     /* 32 bit write access */
2390                     val = ldl_p(buf);
2391                     result |= memory_region_dispatch_write(mr, addr1, val, 4,
2392                                                            attrs);
2393                     break;
2394                 case 2:
2395                     /* 16 bit write access */
2396                     val = lduw_p(buf);
2397                     result |= memory_region_dispatch_write(mr, addr1, val, 2,
2398                                                            attrs);
2399                     break;
2400                 case 1:
2401                     /* 8 bit write access */
2402                     val = ldub_p(buf);
2403                     result |= memory_region_dispatch_write(mr, addr1, val, 1,
2404                                                            attrs);
2405                     break;
2406                 default:
2407                     abort();
2408                 }
2409             } else {
2410                 addr1 += memory_region_get_ram_addr(mr);
2411                 /* RAM case */
2412                 ptr = qemu_get_ram_ptr(addr1);
2413                 memcpy(ptr, buf, l);
2414                 invalidate_and_set_dirty(mr, addr1, l);
2415             }
2416         } else {
2417             if (!memory_access_is_direct(mr, is_write)) {
2418                 /* I/O case */
2419                 release_lock |= prepare_mmio_access(mr);
2420                 l = memory_access_size(mr, l, addr1);
2421                 switch (l) {
2422                 case 8:
2423                     /* 64 bit read access */
2424                     result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2425                                                           attrs);
2426                     stq_p(buf, val);
2427                     break;
2428                 case 4:
2429                     /* 32 bit read access */
2430                     result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2431                                                           attrs);
2432                     stl_p(buf, val);
2433                     break;
2434                 case 2:
2435                     /* 16 bit read access */
2436                     result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2437                                                           attrs);
2438                     stw_p(buf, val);
2439                     break;
2440                 case 1:
2441                     /* 8 bit read access */
2442                     result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2443                                                           attrs);
2444                     stb_p(buf, val);
2445                     break;
2446                 default:
2447                     abort();
2448                 }
2449             } else {
2450                 /* RAM case */
2451                 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2452                 memcpy(buf, ptr, l);
2453             }
2454         }
2455
2456         if (release_lock) {
2457             qemu_mutex_unlock_iothread();
2458             release_lock = false;
2459         }
2460
2461         len -= l;
2462         buf += l;
2463         addr += l;
2464     }
2465     rcu_read_unlock();
2466
2467     return result;
2468 }
2469
2470 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2471                                 const uint8_t *buf, int len)
2472 {
2473     return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2474 }
2475
2476 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2477                                uint8_t *buf, int len)
2478 {
2479     return address_space_rw(as, addr, attrs, buf, len, false);
2480 }
2481
2482
2483 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2484                             int len, int is_write)
2485 {
2486     address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2487                      buf, len, is_write);
2488 }
2489
2490 enum write_rom_type {
2491     WRITE_DATA,
2492     FLUSH_CACHE,
2493 };
2494
2495 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2496     hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2497 {
2498     hwaddr l;
2499     uint8_t *ptr;
2500     hwaddr addr1;
2501     MemoryRegion *mr;
2502
2503     rcu_read_lock();
2504     while (len > 0) {
2505         l = len;
2506         mr = address_space_translate(as, addr, &addr1, &l, true);
2507
2508         if (!(memory_region_is_ram(mr) ||
2509               memory_region_is_romd(mr))) {
2510             l = memory_access_size(mr, l, addr1);
2511         } else {
2512             addr1 += memory_region_get_ram_addr(mr);
2513             /* ROM/RAM case */
2514             ptr = qemu_get_ram_ptr(addr1);
2515             switch (type) {
2516             case WRITE_DATA:
2517                 memcpy(ptr, buf, l);
2518                 invalidate_and_set_dirty(mr, addr1, l);
2519                 break;
2520             case FLUSH_CACHE:
2521                 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2522                 break;
2523             }
2524         }
2525         len -= l;
2526         buf += l;
2527         addr += l;
2528     }
2529     rcu_read_unlock();
2530 }
2531
2532 /* used for ROM loading : can write in RAM and ROM */
2533 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2534                                    const uint8_t *buf, int len)
2535 {
2536     cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2537 }
2538
2539 void cpu_flush_icache_range(hwaddr start, int len)
2540 {
2541     /*
2542      * This function should do the same thing as an icache flush that was
2543      * triggered from within the guest. For TCG we are always cache coherent,
2544      * so there is no need to flush anything. For KVM / Xen we need to flush
2545      * the host's instruction cache at least.
2546      */
2547     if (tcg_enabled()) {
2548         return;
2549     }
2550
2551     cpu_physical_memory_write_rom_internal(&address_space_memory,
2552                                            start, NULL, len, FLUSH_CACHE);
2553 }
2554
2555 typedef struct {
2556     MemoryRegion *mr;
2557     void *buffer;
2558     hwaddr addr;
2559     hwaddr len;
2560     bool in_use;
2561 } BounceBuffer;
2562
2563 static BounceBuffer bounce;
2564
2565 typedef struct MapClient {
2566     QEMUBH *bh;
2567     QLIST_ENTRY(MapClient) link;
2568 } MapClient;
2569
2570 QemuMutex map_client_list_lock;
2571 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2572     = QLIST_HEAD_INITIALIZER(map_client_list);
2573
2574 static void cpu_unregister_map_client_do(MapClient *client)
2575 {
2576     QLIST_REMOVE(client, link);
2577     g_free(client);
2578 }
2579
2580 static void cpu_notify_map_clients_locked(void)
2581 {
2582     MapClient *client;
2583
2584     while (!QLIST_EMPTY(&map_client_list)) {
2585         client = QLIST_FIRST(&map_client_list);
2586         qemu_bh_schedule(client->bh);
2587         cpu_unregister_map_client_do(client);
2588     }
2589 }
2590
2591 void cpu_register_map_client(QEMUBH *bh)
2592 {
2593     MapClient *client = g_malloc(sizeof(*client));
2594
2595     qemu_mutex_lock(&map_client_list_lock);
2596     client->bh = bh;
2597     QLIST_INSERT_HEAD(&map_client_list, client, link);
2598     if (!atomic_read(&bounce.in_use)) {
2599         cpu_notify_map_clients_locked();
2600     }
2601     qemu_mutex_unlock(&map_client_list_lock);
2602 }
2603
2604 void cpu_exec_init_all(void)
2605 {
2606     qemu_mutex_init(&ram_list.mutex);
2607     memory_map_init();
2608     io_mem_init();
2609     qemu_mutex_init(&map_client_list_lock);
2610 }
2611
2612 void cpu_unregister_map_client(QEMUBH *bh)
2613 {
2614     MapClient *client;
2615
2616     qemu_mutex_lock(&map_client_list_lock);
2617     QLIST_FOREACH(client, &map_client_list, link) {
2618         if (client->bh == bh) {
2619             cpu_unregister_map_client_do(client);
2620             break;
2621         }
2622     }
2623     qemu_mutex_unlock(&map_client_list_lock);
2624 }
2625
2626 static void cpu_notify_map_clients(void)
2627 {
2628     qemu_mutex_lock(&map_client_list_lock);
2629     cpu_notify_map_clients_locked();
2630     qemu_mutex_unlock(&map_client_list_lock);
2631 }
2632
2633 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2634 {
2635     MemoryRegion *mr;
2636     hwaddr l, xlat;
2637
2638     rcu_read_lock();
2639     while (len > 0) {
2640         l = len;
2641         mr = address_space_translate(as, addr, &xlat, &l, is_write);
2642         if (!memory_access_is_direct(mr, is_write)) {
2643             l = memory_access_size(mr, l, addr);
2644             if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2645                 return false;
2646             }
2647         }
2648
2649         len -= l;
2650         addr += l;
2651     }
2652     rcu_read_unlock();
2653     return true;
2654 }
2655
2656 /* Map a physical memory region into a host virtual address.
2657  * May map a subset of the requested range, given by and returned in *plen.
2658  * May return NULL if resources needed to perform the mapping are exhausted.
2659  * Use only for reads OR writes - not for read-modify-write operations.
2660  * Use cpu_register_map_client() to know when retrying the map operation is
2661  * likely to succeed.
2662  */
2663 void *address_space_map(AddressSpace *as,
2664                         hwaddr addr,
2665                         hwaddr *plen,
2666                         bool is_write)
2667 {
2668     hwaddr len = *plen;
2669     hwaddr done = 0;
2670     hwaddr l, xlat, base;
2671     MemoryRegion *mr, *this_mr;
2672     ram_addr_t raddr;
2673
2674     if (len == 0) {
2675         return NULL;
2676     }
2677
2678     l = len;
2679     rcu_read_lock();
2680     mr = address_space_translate(as, addr, &xlat, &l, is_write);
2681
2682     if (!memory_access_is_direct(mr, is_write)) {
2683         if (atomic_xchg(&bounce.in_use, true)) {
2684             rcu_read_unlock();
2685             return NULL;
2686         }
2687         /* Avoid unbounded allocations */
2688         l = MIN(l, TARGET_PAGE_SIZE);
2689         bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2690         bounce.addr = addr;
2691         bounce.len = l;
2692
2693         memory_region_ref(mr);
2694         bounce.mr = mr;
2695         if (!is_write) {
2696             address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2697                                bounce.buffer, l);
2698         }
2699
2700         rcu_read_unlock();
2701         *plen = l;
2702         return bounce.buffer;
2703     }
2704
2705     base = xlat;
2706     raddr = memory_region_get_ram_addr(mr);
2707
2708     for (;;) {
2709         len -= l;
2710         addr += l;
2711         done += l;
2712         if (len == 0) {
2713             break;
2714         }
2715
2716         l = len;
2717         this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2718         if (this_mr != mr || xlat != base + done) {
2719             break;
2720         }
2721     }
2722
2723     memory_region_ref(mr);
2724     rcu_read_unlock();
2725     *plen = done;
2726     return qemu_ram_ptr_length(raddr + base, plen);
2727 }
2728
2729 /* Unmaps a memory region previously mapped by address_space_map().
2730  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2731  * the amount of memory that was actually read or written by the caller.
2732  */
2733 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2734                          int is_write, hwaddr access_len)
2735 {
2736     if (buffer != bounce.buffer) {
2737         MemoryRegion *mr;
2738         ram_addr_t addr1;
2739
2740         mr = qemu_ram_addr_from_host(buffer, &addr1);
2741         assert(mr != NULL);
2742         if (is_write) {
2743             invalidate_and_set_dirty(mr, addr1, access_len);
2744         }
2745         if (xen_enabled()) {
2746             xen_invalidate_map_cache_entry(buffer);
2747         }
2748         memory_region_unref(mr);
2749         return;
2750     }
2751     if (is_write) {
2752         address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2753                             bounce.buffer, access_len);
2754     }
2755     qemu_vfree(bounce.buffer);
2756     bounce.buffer = NULL;
2757     memory_region_unref(bounce.mr);
2758     atomic_mb_set(&bounce.in_use, false);
2759     cpu_notify_map_clients();
2760 }
2761
2762 void *cpu_physical_memory_map(hwaddr addr,
2763                               hwaddr *plen,
2764                               int is_write)
2765 {
2766     return address_space_map(&address_space_memory, addr, plen, is_write);
2767 }
2768
2769 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2770                                int is_write, hwaddr access_len)
2771 {
2772     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2773 }
2774
2775 /* warning: addr must be aligned */
2776 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2777                                                   MemTxAttrs attrs,
2778                                                   MemTxResult *result,
2779                                                   enum device_endian endian)
2780 {
2781     uint8_t *ptr;
2782     uint64_t val;
2783     MemoryRegion *mr;
2784     hwaddr l = 4;
2785     hwaddr addr1;
2786     MemTxResult r;
2787     bool release_lock = false;
2788
2789     rcu_read_lock();
2790     mr = address_space_translate(as, addr, &addr1, &l, false);
2791     if (l < 4 || !memory_access_is_direct(mr, false)) {
2792         release_lock |= prepare_mmio_access(mr);
2793
2794         /* I/O case */
2795         r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2796 #if defined(TARGET_WORDS_BIGENDIAN)
2797         if (endian == DEVICE_LITTLE_ENDIAN) {
2798             val = bswap32(val);
2799         }
2800 #else
2801         if (endian == DEVICE_BIG_ENDIAN) {
2802             val = bswap32(val);
2803         }
2804 #endif
2805     } else {
2806         /* RAM case */
2807         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2808                                 & TARGET_PAGE_MASK)
2809                                + addr1);
2810         switch (endian) {
2811         case DEVICE_LITTLE_ENDIAN:
2812             val = ldl_le_p(ptr);
2813             break;
2814         case DEVICE_BIG_ENDIAN:
2815             val = ldl_be_p(ptr);
2816             break;
2817         default:
2818             val = ldl_p(ptr);
2819             break;
2820         }
2821         r = MEMTX_OK;
2822     }
2823     if (result) {
2824         *result = r;
2825     }
2826     if (release_lock) {
2827         qemu_mutex_unlock_iothread();
2828     }
2829     rcu_read_unlock();
2830     return val;
2831 }
2832
2833 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2834                            MemTxAttrs attrs, MemTxResult *result)
2835 {
2836     return address_space_ldl_internal(as, addr, attrs, result,
2837                                       DEVICE_NATIVE_ENDIAN);
2838 }
2839
2840 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2841                               MemTxAttrs attrs, MemTxResult *result)
2842 {
2843     return address_space_ldl_internal(as, addr, attrs, result,
2844                                       DEVICE_LITTLE_ENDIAN);
2845 }
2846
2847 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2848                               MemTxAttrs attrs, MemTxResult *result)
2849 {
2850     return address_space_ldl_internal(as, addr, attrs, result,
2851                                       DEVICE_BIG_ENDIAN);
2852 }
2853
2854 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2855 {
2856     return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2857 }
2858
2859 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2860 {
2861     return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2862 }
2863
2864 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2865 {
2866     return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2867 }
2868
2869 /* warning: addr must be aligned */
2870 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2871                                                   MemTxAttrs attrs,
2872                                                   MemTxResult *result,
2873                                                   enum device_endian endian)
2874 {
2875     uint8_t *ptr;
2876     uint64_t val;
2877     MemoryRegion *mr;
2878     hwaddr l = 8;
2879     hwaddr addr1;
2880     MemTxResult r;
2881     bool release_lock = false;
2882
2883     rcu_read_lock();
2884     mr = address_space_translate(as, addr, &addr1, &l,
2885                                  false);
2886     if (l < 8 || !memory_access_is_direct(mr, false)) {
2887         release_lock |= prepare_mmio_access(mr);
2888
2889         /* I/O case */
2890         r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2891 #if defined(TARGET_WORDS_BIGENDIAN)
2892         if (endian == DEVICE_LITTLE_ENDIAN) {
2893             val = bswap64(val);
2894         }
2895 #else
2896         if (endian == DEVICE_BIG_ENDIAN) {
2897             val = bswap64(val);
2898         }
2899 #endif
2900     } else {
2901         /* RAM case */
2902         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2903                                 & TARGET_PAGE_MASK)
2904                                + addr1);
2905         switch (endian) {
2906         case DEVICE_LITTLE_ENDIAN:
2907             val = ldq_le_p(ptr);
2908             break;
2909         case DEVICE_BIG_ENDIAN:
2910             val = ldq_be_p(ptr);
2911             break;
2912         default:
2913             val = ldq_p(ptr);
2914             break;
2915         }
2916         r = MEMTX_OK;
2917     }
2918     if (result) {
2919         *result = r;
2920     }
2921     if (release_lock) {
2922         qemu_mutex_unlock_iothread();
2923     }
2924     rcu_read_unlock();
2925     return val;
2926 }
2927
2928 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2929                            MemTxAttrs attrs, MemTxResult *result)
2930 {
2931     return address_space_ldq_internal(as, addr, attrs, result,
2932                                       DEVICE_NATIVE_ENDIAN);
2933 }
2934
2935 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2936                            MemTxAttrs attrs, MemTxResult *result)
2937 {
2938     return address_space_ldq_internal(as, addr, attrs, result,
2939                                       DEVICE_LITTLE_ENDIAN);
2940 }
2941
2942 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2943                            MemTxAttrs attrs, MemTxResult *result)
2944 {
2945     return address_space_ldq_internal(as, addr, attrs, result,
2946                                       DEVICE_BIG_ENDIAN);
2947 }
2948
2949 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2950 {
2951     return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2952 }
2953
2954 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2955 {
2956     return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2957 }
2958
2959 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2960 {
2961     return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2962 }
2963
2964 /* XXX: optimize */
2965 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2966                             MemTxAttrs attrs, MemTxResult *result)
2967 {
2968     uint8_t val;
2969     MemTxResult r;
2970
2971     r = address_space_rw(as, addr, attrs, &val, 1, 0);
2972     if (result) {
2973         *result = r;
2974     }
2975     return val;
2976 }
2977
2978 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2979 {
2980     return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2981 }
2982
2983 /* warning: addr must be aligned */
2984 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2985                                                    hwaddr addr,
2986                                                    MemTxAttrs attrs,
2987                                                    MemTxResult *result,
2988                                                    enum device_endian endian)
2989 {
2990     uint8_t *ptr;
2991     uint64_t val;
2992     MemoryRegion *mr;
2993     hwaddr l = 2;
2994     hwaddr addr1;
2995     MemTxResult r;
2996     bool release_lock = false;
2997
2998     rcu_read_lock();
2999     mr = address_space_translate(as, addr, &addr1, &l,
3000                                  false);
3001     if (l < 2 || !memory_access_is_direct(mr, false)) {
3002         release_lock |= prepare_mmio_access(mr);
3003
3004         /* I/O case */
3005         r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3006 #if defined(TARGET_WORDS_BIGENDIAN)
3007         if (endian == DEVICE_LITTLE_ENDIAN) {
3008             val = bswap16(val);
3009         }
3010 #else
3011         if (endian == DEVICE_BIG_ENDIAN) {
3012             val = bswap16(val);
3013         }
3014 #endif
3015     } else {
3016         /* RAM case */
3017         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3018                                 & TARGET_PAGE_MASK)
3019                                + addr1);
3020         switch (endian) {
3021         case DEVICE_LITTLE_ENDIAN:
3022             val = lduw_le_p(ptr);
3023             break;
3024         case DEVICE_BIG_ENDIAN:
3025             val = lduw_be_p(ptr);
3026             break;
3027         default:
3028             val = lduw_p(ptr);
3029             break;
3030         }
3031         r = MEMTX_OK;
3032     }
3033     if (result) {
3034         *result = r;
3035     }
3036     if (release_lock) {
3037         qemu_mutex_unlock_iothread();
3038     }
3039     rcu_read_unlock();
3040     return val;
3041 }
3042
3043 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3044                            MemTxAttrs attrs, MemTxResult *result)
3045 {
3046     return address_space_lduw_internal(as, addr, attrs, result,
3047                                        DEVICE_NATIVE_ENDIAN);
3048 }
3049
3050 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3051                            MemTxAttrs attrs, MemTxResult *result)
3052 {
3053     return address_space_lduw_internal(as, addr, attrs, result,
3054                                        DEVICE_LITTLE_ENDIAN);
3055 }
3056
3057 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3058                            MemTxAttrs attrs, MemTxResult *result)
3059 {
3060     return address_space_lduw_internal(as, addr, attrs, result,
3061                                        DEVICE_BIG_ENDIAN);
3062 }
3063
3064 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3065 {
3066     return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3067 }
3068
3069 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3070 {
3071     return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3072 }
3073
3074 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3075 {
3076     return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3077 }
3078
3079 /* warning: addr must be aligned. The ram page is not masked as dirty
3080    and the code inside is not invalidated. It is useful if the dirty
3081    bits are used to track modified PTEs */
3082 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3083                                 MemTxAttrs attrs, MemTxResult *result)
3084 {
3085     uint8_t *ptr;
3086     MemoryRegion *mr;
3087     hwaddr l = 4;
3088     hwaddr addr1;
3089     MemTxResult r;
3090     uint8_t dirty_log_mask;
3091     bool release_lock = false;
3092
3093     rcu_read_lock();
3094     mr = address_space_translate(as, addr, &addr1, &l,
3095                                  true);
3096     if (l < 4 || !memory_access_is_direct(mr, true)) {
3097         release_lock |= prepare_mmio_access(mr);
3098
3099         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3100     } else {
3101         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3102         ptr = qemu_get_ram_ptr(addr1);
3103         stl_p(ptr, val);
3104
3105         dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3106         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3107         cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3108         r = MEMTX_OK;
3109     }
3110     if (result) {
3111         *result = r;
3112     }
3113     if (release_lock) {
3114         qemu_mutex_unlock_iothread();
3115     }
3116     rcu_read_unlock();
3117 }
3118
3119 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3120 {
3121     address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3122 }
3123
3124 /* warning: addr must be aligned */
3125 static inline void address_space_stl_internal(AddressSpace *as,
3126                                               hwaddr addr, uint32_t val,
3127                                               MemTxAttrs attrs,
3128                                               MemTxResult *result,
3129                                               enum device_endian endian)
3130 {
3131     uint8_t *ptr;
3132     MemoryRegion *mr;
3133     hwaddr l = 4;
3134     hwaddr addr1;
3135     MemTxResult r;
3136     bool release_lock = false;
3137
3138     rcu_read_lock();
3139     mr = address_space_translate(as, addr, &addr1, &l,
3140                                  true);
3141     if (l < 4 || !memory_access_is_direct(mr, true)) {
3142         release_lock |= prepare_mmio_access(mr);
3143
3144 #if defined(TARGET_WORDS_BIGENDIAN)
3145         if (endian == DEVICE_LITTLE_ENDIAN) {
3146             val = bswap32(val);
3147         }
3148 #else
3149         if (endian == DEVICE_BIG_ENDIAN) {
3150             val = bswap32(val);
3151         }
3152 #endif
3153         r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3154     } else {
3155         /* RAM case */
3156         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3157         ptr = qemu_get_ram_ptr(addr1);
3158         switch (endian) {
3159         case DEVICE_LITTLE_ENDIAN:
3160             stl_le_p(ptr, val);
3161             break;
3162         case DEVICE_BIG_ENDIAN:
3163             stl_be_p(ptr, val);
3164             break;
3165         default:
3166             stl_p(ptr, val);
3167             break;
3168         }
3169         invalidate_and_set_dirty(mr, addr1, 4);
3170         r = MEMTX_OK;
3171     }
3172     if (result) {
3173         *result = r;
3174     }
3175     if (release_lock) {
3176         qemu_mutex_unlock_iothread();
3177     }
3178     rcu_read_unlock();
3179 }
3180
3181 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3182                        MemTxAttrs attrs, MemTxResult *result)
3183 {
3184     address_space_stl_internal(as, addr, val, attrs, result,
3185                                DEVICE_NATIVE_ENDIAN);
3186 }
3187
3188 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3189                        MemTxAttrs attrs, MemTxResult *result)
3190 {
3191     address_space_stl_internal(as, addr, val, attrs, result,
3192                                DEVICE_LITTLE_ENDIAN);
3193 }
3194
3195 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3196                        MemTxAttrs attrs, MemTxResult *result)
3197 {
3198     address_space_stl_internal(as, addr, val, attrs, result,
3199                                DEVICE_BIG_ENDIAN);
3200 }
3201
3202 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3203 {
3204     address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3205 }
3206
3207 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3208 {
3209     address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3210 }
3211
3212 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3213 {
3214     address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3215 }
3216
3217 /* XXX: optimize */
3218 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3219                        MemTxAttrs attrs, MemTxResult *result)
3220 {
3221     uint8_t v = val;
3222     MemTxResult r;
3223
3224     r = address_space_rw(as, addr, attrs, &v, 1, 1);
3225     if (result) {
3226         *result = r;
3227     }
3228 }
3229
3230 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3231 {
3232     address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3233 }
3234
3235 /* warning: addr must be aligned */
3236 static inline void address_space_stw_internal(AddressSpace *as,
3237                                               hwaddr addr, uint32_t val,
3238                                               MemTxAttrs attrs,
3239                                               MemTxResult *result,
3240                                               enum device_endian endian)
3241 {
3242     uint8_t *ptr;
3243     MemoryRegion *mr;
3244     hwaddr l = 2;
3245     hwaddr addr1;
3246     MemTxResult r;
3247     bool release_lock = false;
3248
3249     rcu_read_lock();
3250     mr = address_space_translate(as, addr, &addr1, &l, true);
3251     if (l < 2 || !memory_access_is_direct(mr, true)) {
3252         release_lock |= prepare_mmio_access(mr);
3253
3254 #if defined(TARGET_WORDS_BIGENDIAN)
3255         if (endian == DEVICE_LITTLE_ENDIAN) {
3256             val = bswap16(val);
3257         }
3258 #else
3259         if (endian == DEVICE_BIG_ENDIAN) {
3260             val = bswap16(val);
3261         }
3262 #endif
3263         r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3264     } else {
3265         /* RAM case */
3266         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3267         ptr = qemu_get_ram_ptr(addr1);
3268         switch (endian) {
3269         case DEVICE_LITTLE_ENDIAN:
3270             stw_le_p(ptr, val);
3271             break;
3272         case DEVICE_BIG_ENDIAN:
3273             stw_be_p(ptr, val);
3274             break;
3275         default:
3276             stw_p(ptr, val);
3277             break;
3278         }
3279         invalidate_and_set_dirty(mr, addr1, 2);
3280         r = MEMTX_OK;
3281     }
3282     if (result) {
3283         *result = r;
3284     }
3285     if (release_lock) {
3286         qemu_mutex_unlock_iothread();
3287     }
3288     rcu_read_unlock();
3289 }
3290
3291 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3292                        MemTxAttrs attrs, MemTxResult *result)
3293 {
3294     address_space_stw_internal(as, addr, val, attrs, result,
3295                                DEVICE_NATIVE_ENDIAN);
3296 }
3297
3298 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3299                        MemTxAttrs attrs, MemTxResult *result)
3300 {
3301     address_space_stw_internal(as, addr, val, attrs, result,
3302                                DEVICE_LITTLE_ENDIAN);
3303 }
3304
3305 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3306                        MemTxAttrs attrs, MemTxResult *result)
3307 {
3308     address_space_stw_internal(as, addr, val, attrs, result,
3309                                DEVICE_BIG_ENDIAN);
3310 }
3311
3312 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3313 {
3314     address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3315 }
3316
3317 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3318 {
3319     address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3320 }
3321
3322 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3323 {
3324     address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3325 }
3326
3327 /* XXX: optimize */
3328 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3329                        MemTxAttrs attrs, MemTxResult *result)
3330 {
3331     MemTxResult r;
3332     val = tswap64(val);
3333     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3334     if (result) {
3335         *result = r;
3336     }
3337 }
3338
3339 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3340                        MemTxAttrs attrs, MemTxResult *result)
3341 {
3342     MemTxResult r;
3343     val = cpu_to_le64(val);
3344     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3345     if (result) {
3346         *result = r;
3347     }
3348 }
3349 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3350                        MemTxAttrs attrs, MemTxResult *result)
3351 {
3352     MemTxResult r;
3353     val = cpu_to_be64(val);
3354     r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3355     if (result) {
3356         *result = r;
3357     }
3358 }
3359
3360 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3361 {
3362     address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3363 }
3364
3365 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3366 {
3367     address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3368 }
3369
3370 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3371 {
3372     address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3373 }
3374
3375 /* virtual memory access for debug (includes writing to ROM) */
3376 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3377                         uint8_t *buf, int len, int is_write)
3378 {
3379     int l;
3380     hwaddr phys_addr;
3381     target_ulong page;
3382
3383     while (len > 0) {
3384         page = addr & TARGET_PAGE_MASK;
3385         phys_addr = cpu_get_phys_page_debug(cpu, page);
3386         /* if no physical page mapped, return an error */
3387         if (phys_addr == -1)
3388             return -1;
3389         l = (page + TARGET_PAGE_SIZE) - addr;
3390         if (l > len)
3391             l = len;
3392         phys_addr += (addr & ~TARGET_PAGE_MASK);
3393         if (is_write) {
3394             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3395         } else {
3396             address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3397                              buf, l, 0);
3398         }
3399         len -= l;
3400         buf += l;
3401         addr += l;
3402     }
3403     return 0;
3404 }
3405 #endif
3406
3407 /*
3408  * A helper function for the _utterly broken_ virtio device model to find out if
3409  * it's running on a big endian machine. Don't do this at home kids!
3410  */
3411 bool target_words_bigendian(void);
3412 bool target_words_bigendian(void)
3413 {
3414 #if defined(TARGET_WORDS_BIGENDIAN)
3415     return true;
3416 #else
3417     return false;
3418 #endif
3419 }
3420
3421 #ifndef CONFIG_USER_ONLY
3422 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3423 {
3424     MemoryRegion*mr;
3425     hwaddr l = 1;
3426     bool res;
3427
3428     rcu_read_lock();
3429     mr = address_space_translate(&address_space_memory,
3430                                  phys_addr, &phys_addr, &l, false);
3431
3432     res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3433     rcu_read_unlock();
3434     return res;
3435 }
3436
3437 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3438 {
3439     RAMBlock *block;
3440     int ret = 0;
3441
3442     rcu_read_lock();
3443     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3444         ret = func(block->idstr, block->host, block->offset,
3445                    block->used_length, opaque);
3446         if (ret) {
3447             break;
3448         }
3449     }
3450     rcu_read_unlock();
3451     return ret;
3452 }
3453 #endif