]> git.proxmox.com Git - mirror_qemu.git/blame_incremental - exec.c
exec: Make stl_phys_notdirty input an AddressSpace
[mirror_qemu.git] / exec.c
... / ...
CommitLineData
1/*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19#include "config.h"
20#ifdef _WIN32
21#include <windows.h>
22#else
23#include <sys/types.h>
24#include <sys/mman.h>
25#endif
26
27#include "qemu-common.h"
28#include "cpu.h"
29#include "tcg.h"
30#include "hw/hw.h"
31#include "hw/qdev.h"
32#include "qemu/osdep.h"
33#include "sysemu/kvm.h"
34#include "sysemu/sysemu.h"
35#include "hw/xen/xen.h"
36#include "qemu/timer.h"
37#include "qemu/config-file.h"
38#include "exec/memory.h"
39#include "sysemu/dma.h"
40#include "exec/address-spaces.h"
41#if defined(CONFIG_USER_ONLY)
42#include <qemu.h>
43#else /* !CONFIG_USER_ONLY */
44#include "sysemu/xen-mapcache.h"
45#include "trace.h"
46#endif
47#include "exec/cpu-all.h"
48
49#include "exec/cputlb.h"
50#include "translate-all.h"
51
52#include "exec/memory-internal.h"
53#include "exec/ram_addr.h"
54#include "qemu/cache-utils.h"
55
56#include "qemu/range.h"
57
58//#define DEBUG_SUBPAGE
59
60#if !defined(CONFIG_USER_ONLY)
61static bool in_migration;
62
63RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
64
65static MemoryRegion *system_memory;
66static MemoryRegion *system_io;
67
68AddressSpace address_space_io;
69AddressSpace address_space_memory;
70
71MemoryRegion io_mem_rom, io_mem_notdirty;
72static MemoryRegion io_mem_unassigned;
73
74#endif
75
76struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
77/* current CPU in the current thread. It is only valid inside
78 cpu_exec() */
79DEFINE_TLS(CPUState *, current_cpu);
80/* 0 = Do not count executed instructions.
81 1 = Precise instruction counting.
82 2 = Adaptive rate instruction counting. */
83int use_icount;
84
85#if !defined(CONFIG_USER_ONLY)
86
87typedef struct PhysPageEntry PhysPageEntry;
88
89struct PhysPageEntry {
90 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
91 uint32_t skip : 6;
92 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
93 uint32_t ptr : 26;
94};
95
96#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
97
98/* Size of the L2 (and L3, etc) page tables. */
99#define ADDR_SPACE_BITS 64
100
101#define P_L2_BITS 9
102#define P_L2_SIZE (1 << P_L2_BITS)
103
104#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
105
106typedef PhysPageEntry Node[P_L2_SIZE];
107
108typedef struct PhysPageMap {
109 unsigned sections_nb;
110 unsigned sections_nb_alloc;
111 unsigned nodes_nb;
112 unsigned nodes_nb_alloc;
113 Node *nodes;
114 MemoryRegionSection *sections;
115} PhysPageMap;
116
117struct AddressSpaceDispatch {
118 /* This is a multi-level map on the physical address space.
119 * The bottom level has pointers to MemoryRegionSections.
120 */
121 PhysPageEntry phys_map;
122 PhysPageMap map;
123 AddressSpace *as;
124};
125
126#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
127typedef struct subpage_t {
128 MemoryRegion iomem;
129 AddressSpace *as;
130 hwaddr base;
131 uint16_t sub_section[TARGET_PAGE_SIZE];
132} subpage_t;
133
134#define PHYS_SECTION_UNASSIGNED 0
135#define PHYS_SECTION_NOTDIRTY 1
136#define PHYS_SECTION_ROM 2
137#define PHYS_SECTION_WATCH 3
138
139static void io_mem_init(void);
140static void memory_map_init(void);
141static void tcg_commit(MemoryListener *listener);
142
143static MemoryRegion io_mem_watch;
144#endif
145
146#if !defined(CONFIG_USER_ONLY)
147
148static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149{
150 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
152 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
153 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
154 }
155}
156
157static uint32_t phys_map_node_alloc(PhysPageMap *map)
158{
159 unsigned i;
160 uint32_t ret;
161
162 ret = map->nodes_nb++;
163 assert(ret != PHYS_MAP_NODE_NIL);
164 assert(ret != map->nodes_nb_alloc);
165 for (i = 0; i < P_L2_SIZE; ++i) {
166 map->nodes[ret][i].skip = 1;
167 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
168 }
169 return ret;
170}
171
172static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
173 hwaddr *index, hwaddr *nb, uint16_t leaf,
174 int level)
175{
176 PhysPageEntry *p;
177 int i;
178 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179
180 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
181 lp->ptr = phys_map_node_alloc(map);
182 p = map->nodes[lp->ptr];
183 if (level == 0) {
184 for (i = 0; i < P_L2_SIZE; i++) {
185 p[i].skip = 0;
186 p[i].ptr = PHYS_SECTION_UNASSIGNED;
187 }
188 }
189 } else {
190 p = map->nodes[lp->ptr];
191 }
192 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193
194 while (*nb && lp < &p[P_L2_SIZE]) {
195 if ((*index & (step - 1)) == 0 && *nb >= step) {
196 lp->skip = 0;
197 lp->ptr = leaf;
198 *index += step;
199 *nb -= step;
200 } else {
201 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
202 }
203 ++lp;
204 }
205}
206
207static void phys_page_set(AddressSpaceDispatch *d,
208 hwaddr index, hwaddr nb,
209 uint16_t leaf)
210{
211 /* Wildly overreserve - it doesn't matter much. */
212 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213
214 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
215}
216
217/* Compact a non leaf page entry. Simply detect that the entry has a single child,
218 * and update our entry so we can skip it and go directly to the destination.
219 */
220static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221{
222 unsigned valid_ptr = P_L2_SIZE;
223 int valid = 0;
224 PhysPageEntry *p;
225 int i;
226
227 if (lp->ptr == PHYS_MAP_NODE_NIL) {
228 return;
229 }
230
231 p = nodes[lp->ptr];
232 for (i = 0; i < P_L2_SIZE; i++) {
233 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
234 continue;
235 }
236
237 valid_ptr = i;
238 valid++;
239 if (p[i].skip) {
240 phys_page_compact(&p[i], nodes, compacted);
241 }
242 }
243
244 /* We can only compress if there's only one child. */
245 if (valid != 1) {
246 return;
247 }
248
249 assert(valid_ptr < P_L2_SIZE);
250
251 /* Don't compress if it won't fit in the # of bits we have. */
252 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
253 return;
254 }
255
256 lp->ptr = p[valid_ptr].ptr;
257 if (!p[valid_ptr].skip) {
258 /* If our only child is a leaf, make this a leaf. */
259 /* By design, we should have made this node a leaf to begin with so we
260 * should never reach here.
261 * But since it's so simple to handle this, let's do it just in case we
262 * change this rule.
263 */
264 lp->skip = 0;
265 } else {
266 lp->skip += p[valid_ptr].skip;
267 }
268}
269
270static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271{
272 DECLARE_BITMAP(compacted, nodes_nb);
273
274 if (d->phys_map.skip) {
275 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
276 }
277}
278
279static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
280 Node *nodes, MemoryRegionSection *sections)
281{
282 PhysPageEntry *p;
283 hwaddr index = addr >> TARGET_PAGE_BITS;
284 int i;
285
286 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
287 if (lp.ptr == PHYS_MAP_NODE_NIL) {
288 return &sections[PHYS_SECTION_UNASSIGNED];
289 }
290 p = nodes[lp.ptr];
291 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
292 }
293
294 if (sections[lp.ptr].size.hi ||
295 range_covers_byte(sections[lp.ptr].offset_within_address_space,
296 sections[lp.ptr].size.lo, addr)) {
297 return &sections[lp.ptr];
298 } else {
299 return &sections[PHYS_SECTION_UNASSIGNED];
300 }
301}
302
303bool memory_region_is_unassigned(MemoryRegion *mr)
304{
305 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
306 && mr != &io_mem_watch;
307}
308
309static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
310 hwaddr addr,
311 bool resolve_subpage)
312{
313 MemoryRegionSection *section;
314 subpage_t *subpage;
315
316 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
317 if (resolve_subpage && section->mr->subpage) {
318 subpage = container_of(section->mr, subpage_t, iomem);
319 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
320 }
321 return section;
322}
323
324static MemoryRegionSection *
325address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
326 hwaddr *plen, bool resolve_subpage)
327{
328 MemoryRegionSection *section;
329 Int128 diff;
330
331 section = address_space_lookup_region(d, addr, resolve_subpage);
332 /* Compute offset within MemoryRegionSection */
333 addr -= section->offset_within_address_space;
334
335 /* Compute offset within MemoryRegion */
336 *xlat = addr + section->offset_within_region;
337
338 diff = int128_sub(section->mr->size, int128_make64(addr));
339 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
340 return section;
341}
342
343static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
344{
345 if (memory_region_is_ram(mr)) {
346 return !(is_write && mr->readonly);
347 }
348 if (memory_region_is_romd(mr)) {
349 return !is_write;
350 }
351
352 return false;
353}
354
355MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
356 hwaddr *xlat, hwaddr *plen,
357 bool is_write)
358{
359 IOMMUTLBEntry iotlb;
360 MemoryRegionSection *section;
361 MemoryRegion *mr;
362 hwaddr len = *plen;
363
364 for (;;) {
365 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
366 mr = section->mr;
367
368 if (!mr->iommu_ops) {
369 break;
370 }
371
372 iotlb = mr->iommu_ops->translate(mr, addr);
373 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
374 | (addr & iotlb.addr_mask));
375 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
376 if (!(iotlb.perm & (1 << is_write))) {
377 mr = &io_mem_unassigned;
378 break;
379 }
380
381 as = iotlb.target_as;
382 }
383
384 if (memory_access_is_direct(mr, is_write)) {
385 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
386 len = MIN(page, len);
387 }
388
389 *plen = len;
390 *xlat = addr;
391 return mr;
392}
393
394MemoryRegionSection *
395address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
396 hwaddr *plen)
397{
398 MemoryRegionSection *section;
399 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
400
401 assert(!section->mr->iommu_ops);
402 return section;
403}
404#endif
405
406void cpu_exec_init_all(void)
407{
408#if !defined(CONFIG_USER_ONLY)
409 qemu_mutex_init(&ram_list.mutex);
410 memory_map_init();
411 io_mem_init();
412#endif
413}
414
415#if !defined(CONFIG_USER_ONLY)
416
417static int cpu_common_post_load(void *opaque, int version_id)
418{
419 CPUState *cpu = opaque;
420
421 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
422 version_id is increased. */
423 cpu->interrupt_request &= ~0x01;
424 tlb_flush(cpu->env_ptr, 1);
425
426 return 0;
427}
428
429const VMStateDescription vmstate_cpu_common = {
430 .name = "cpu_common",
431 .version_id = 1,
432 .minimum_version_id = 1,
433 .minimum_version_id_old = 1,
434 .post_load = cpu_common_post_load,
435 .fields = (VMStateField []) {
436 VMSTATE_UINT32(halted, CPUState),
437 VMSTATE_UINT32(interrupt_request, CPUState),
438 VMSTATE_END_OF_LIST()
439 }
440};
441
442#endif
443
444CPUState *qemu_get_cpu(int index)
445{
446 CPUState *cpu;
447
448 CPU_FOREACH(cpu) {
449 if (cpu->cpu_index == index) {
450 return cpu;
451 }
452 }
453
454 return NULL;
455}
456
457#if !defined(CONFIG_USER_ONLY)
458void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
459{
460 /* We only support one address space per cpu at the moment. */
461 assert(cpu->as == as);
462
463 if (cpu->tcg_as_listener) {
464 memory_listener_unregister(cpu->tcg_as_listener);
465 } else {
466 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
467 }
468 cpu->tcg_as_listener->commit = tcg_commit;
469 memory_listener_register(cpu->tcg_as_listener, as);
470}
471#endif
472
473void cpu_exec_init(CPUArchState *env)
474{
475 CPUState *cpu = ENV_GET_CPU(env);
476 CPUClass *cc = CPU_GET_CLASS(cpu);
477 CPUState *some_cpu;
478 int cpu_index;
479
480#if defined(CONFIG_USER_ONLY)
481 cpu_list_lock();
482#endif
483 cpu_index = 0;
484 CPU_FOREACH(some_cpu) {
485 cpu_index++;
486 }
487 cpu->cpu_index = cpu_index;
488 cpu->numa_node = 0;
489 QTAILQ_INIT(&env->breakpoints);
490 QTAILQ_INIT(&env->watchpoints);
491#ifndef CONFIG_USER_ONLY
492 cpu->as = &address_space_memory;
493 cpu->thread_id = qemu_get_thread_id();
494#endif
495 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
496#if defined(CONFIG_USER_ONLY)
497 cpu_list_unlock();
498#endif
499 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
500 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
501 }
502#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
503 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
504 cpu_save, cpu_load, env);
505 assert(cc->vmsd == NULL);
506 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
507#endif
508 if (cc->vmsd != NULL) {
509 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
510 }
511}
512
513#if defined(TARGET_HAS_ICE)
514#if defined(CONFIG_USER_ONLY)
515static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
516{
517 tb_invalidate_phys_page_range(pc, pc + 1, 0);
518}
519#else
520static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
521{
522 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
523 if (phys != -1) {
524 tb_invalidate_phys_addr(cpu->as,
525 phys | (pc & ~TARGET_PAGE_MASK));
526 }
527}
528#endif
529#endif /* TARGET_HAS_ICE */
530
531#if defined(CONFIG_USER_ONLY)
532void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
533
534{
535}
536
537int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
538 int flags, CPUWatchpoint **watchpoint)
539{
540 return -ENOSYS;
541}
542#else
543/* Add a watchpoint. */
544int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
545 int flags, CPUWatchpoint **watchpoint)
546{
547 target_ulong len_mask = ~(len - 1);
548 CPUWatchpoint *wp;
549
550 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
551 if ((len & (len - 1)) || (addr & ~len_mask) ||
552 len == 0 || len > TARGET_PAGE_SIZE) {
553 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
554 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
555 return -EINVAL;
556 }
557 wp = g_malloc(sizeof(*wp));
558
559 wp->vaddr = addr;
560 wp->len_mask = len_mask;
561 wp->flags = flags;
562
563 /* keep all GDB-injected watchpoints in front */
564 if (flags & BP_GDB)
565 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
566 else
567 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
568
569 tlb_flush_page(env, addr);
570
571 if (watchpoint)
572 *watchpoint = wp;
573 return 0;
574}
575
576/* Remove a specific watchpoint. */
577int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
578 int flags)
579{
580 target_ulong len_mask = ~(len - 1);
581 CPUWatchpoint *wp;
582
583 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
584 if (addr == wp->vaddr && len_mask == wp->len_mask
585 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
586 cpu_watchpoint_remove_by_ref(env, wp);
587 return 0;
588 }
589 }
590 return -ENOENT;
591}
592
593/* Remove a specific watchpoint by reference. */
594void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
595{
596 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
597
598 tlb_flush_page(env, watchpoint->vaddr);
599
600 g_free(watchpoint);
601}
602
603/* Remove all matching watchpoints. */
604void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
605{
606 CPUWatchpoint *wp, *next;
607
608 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
609 if (wp->flags & mask)
610 cpu_watchpoint_remove_by_ref(env, wp);
611 }
612}
613#endif
614
615/* Add a breakpoint. */
616int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
617 CPUBreakpoint **breakpoint)
618{
619#if defined(TARGET_HAS_ICE)
620 CPUBreakpoint *bp;
621
622 bp = g_malloc(sizeof(*bp));
623
624 bp->pc = pc;
625 bp->flags = flags;
626
627 /* keep all GDB-injected breakpoints in front */
628 if (flags & BP_GDB) {
629 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
630 } else {
631 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
632 }
633
634 breakpoint_invalidate(ENV_GET_CPU(env), pc);
635
636 if (breakpoint) {
637 *breakpoint = bp;
638 }
639 return 0;
640#else
641 return -ENOSYS;
642#endif
643}
644
645/* Remove a specific breakpoint. */
646int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
647{
648#if defined(TARGET_HAS_ICE)
649 CPUBreakpoint *bp;
650
651 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
652 if (bp->pc == pc && bp->flags == flags) {
653 cpu_breakpoint_remove_by_ref(env, bp);
654 return 0;
655 }
656 }
657 return -ENOENT;
658#else
659 return -ENOSYS;
660#endif
661}
662
663/* Remove a specific breakpoint by reference. */
664void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
665{
666#if defined(TARGET_HAS_ICE)
667 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
668
669 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
670
671 g_free(breakpoint);
672#endif
673}
674
675/* Remove all matching breakpoints. */
676void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
677{
678#if defined(TARGET_HAS_ICE)
679 CPUBreakpoint *bp, *next;
680
681 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
682 if (bp->flags & mask)
683 cpu_breakpoint_remove_by_ref(env, bp);
684 }
685#endif
686}
687
688/* enable or disable single step mode. EXCP_DEBUG is returned by the
689 CPU loop after each instruction */
690void cpu_single_step(CPUState *cpu, int enabled)
691{
692#if defined(TARGET_HAS_ICE)
693 if (cpu->singlestep_enabled != enabled) {
694 cpu->singlestep_enabled = enabled;
695 if (kvm_enabled()) {
696 kvm_update_guest_debug(cpu, 0);
697 } else {
698 /* must flush all the translated code to avoid inconsistencies */
699 /* XXX: only flush what is necessary */
700 CPUArchState *env = cpu->env_ptr;
701 tb_flush(env);
702 }
703 }
704#endif
705}
706
707void cpu_abort(CPUArchState *env, const char *fmt, ...)
708{
709 CPUState *cpu = ENV_GET_CPU(env);
710 va_list ap;
711 va_list ap2;
712
713 va_start(ap, fmt);
714 va_copy(ap2, ap);
715 fprintf(stderr, "qemu: fatal: ");
716 vfprintf(stderr, fmt, ap);
717 fprintf(stderr, "\n");
718 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
719 if (qemu_log_enabled()) {
720 qemu_log("qemu: fatal: ");
721 qemu_log_vprintf(fmt, ap2);
722 qemu_log("\n");
723 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724 qemu_log_flush();
725 qemu_log_close();
726 }
727 va_end(ap2);
728 va_end(ap);
729#if defined(CONFIG_USER_ONLY)
730 {
731 struct sigaction act;
732 sigfillset(&act.sa_mask);
733 act.sa_handler = SIG_DFL;
734 sigaction(SIGABRT, &act, NULL);
735 }
736#endif
737 abort();
738}
739
740#if !defined(CONFIG_USER_ONLY)
741static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
742{
743 RAMBlock *block;
744
745 /* The list is protected by the iothread lock here. */
746 block = ram_list.mru_block;
747 if (block && addr - block->offset < block->length) {
748 goto found;
749 }
750 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
751 if (addr - block->offset < block->length) {
752 goto found;
753 }
754 }
755
756 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
757 abort();
758
759found:
760 ram_list.mru_block = block;
761 return block;
762}
763
764static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
765{
766 ram_addr_t start1;
767 RAMBlock *block;
768 ram_addr_t end;
769
770 end = TARGET_PAGE_ALIGN(start + length);
771 start &= TARGET_PAGE_MASK;
772
773 block = qemu_get_ram_block(start);
774 assert(block == qemu_get_ram_block(end - 1));
775 start1 = (uintptr_t)block->host + (start - block->offset);
776 cpu_tlb_reset_dirty_all(start1, length);
777}
778
779/* Note: start and end must be within the same ram block. */
780void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
781 unsigned client)
782{
783 if (length == 0)
784 return;
785 cpu_physical_memory_clear_dirty_range(start, length, client);
786
787 if (tcg_enabled()) {
788 tlb_reset_dirty_range_all(start, length);
789 }
790}
791
792static void cpu_physical_memory_set_dirty_tracking(bool enable)
793{
794 in_migration = enable;
795}
796
797hwaddr memory_region_section_get_iotlb(CPUArchState *env,
798 MemoryRegionSection *section,
799 target_ulong vaddr,
800 hwaddr paddr, hwaddr xlat,
801 int prot,
802 target_ulong *address)
803{
804 hwaddr iotlb;
805 CPUWatchpoint *wp;
806
807 if (memory_region_is_ram(section->mr)) {
808 /* Normal RAM. */
809 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
810 + xlat;
811 if (!section->readonly) {
812 iotlb |= PHYS_SECTION_NOTDIRTY;
813 } else {
814 iotlb |= PHYS_SECTION_ROM;
815 }
816 } else {
817 iotlb = section - section->address_space->dispatch->map.sections;
818 iotlb += xlat;
819 }
820
821 /* Make accesses to pages with watchpoints go via the
822 watchpoint trap routines. */
823 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
824 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
825 /* Avoid trapping reads of pages with a write breakpoint. */
826 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
827 iotlb = PHYS_SECTION_WATCH + paddr;
828 *address |= TLB_MMIO;
829 break;
830 }
831 }
832 }
833
834 return iotlb;
835}
836#endif /* defined(CONFIG_USER_ONLY) */
837
838#if !defined(CONFIG_USER_ONLY)
839
840static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
841 uint16_t section);
842static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
843
844static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
845
846/*
847 * Set a custom physical guest memory alloator.
848 * Accelerators with unusual needs may need this. Hopefully, we can
849 * get rid of it eventually.
850 */
851void phys_mem_set_alloc(void *(*alloc)(size_t))
852{
853 phys_mem_alloc = alloc;
854}
855
856static uint16_t phys_section_add(PhysPageMap *map,
857 MemoryRegionSection *section)
858{
859 /* The physical section number is ORed with a page-aligned
860 * pointer to produce the iotlb entries. Thus it should
861 * never overflow into the page-aligned value.
862 */
863 assert(map->sections_nb < TARGET_PAGE_SIZE);
864
865 if (map->sections_nb == map->sections_nb_alloc) {
866 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
867 map->sections = g_renew(MemoryRegionSection, map->sections,
868 map->sections_nb_alloc);
869 }
870 map->sections[map->sections_nb] = *section;
871 memory_region_ref(section->mr);
872 return map->sections_nb++;
873}
874
875static void phys_section_destroy(MemoryRegion *mr)
876{
877 memory_region_unref(mr);
878
879 if (mr->subpage) {
880 subpage_t *subpage = container_of(mr, subpage_t, iomem);
881 memory_region_destroy(&subpage->iomem);
882 g_free(subpage);
883 }
884}
885
886static void phys_sections_free(PhysPageMap *map)
887{
888 while (map->sections_nb > 0) {
889 MemoryRegionSection *section = &map->sections[--map->sections_nb];
890 phys_section_destroy(section->mr);
891 }
892 g_free(map->sections);
893 g_free(map->nodes);
894}
895
896static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
897{
898 subpage_t *subpage;
899 hwaddr base = section->offset_within_address_space
900 & TARGET_PAGE_MASK;
901 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
902 d->map.nodes, d->map.sections);
903 MemoryRegionSection subsection = {
904 .offset_within_address_space = base,
905 .size = int128_make64(TARGET_PAGE_SIZE),
906 };
907 hwaddr start, end;
908
909 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
910
911 if (!(existing->mr->subpage)) {
912 subpage = subpage_init(d->as, base);
913 subsection.address_space = d->as;
914 subsection.mr = &subpage->iomem;
915 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
916 phys_section_add(&d->map, &subsection));
917 } else {
918 subpage = container_of(existing->mr, subpage_t, iomem);
919 }
920 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
921 end = start + int128_get64(section->size) - 1;
922 subpage_register(subpage, start, end,
923 phys_section_add(&d->map, section));
924}
925
926
927static void register_multipage(AddressSpaceDispatch *d,
928 MemoryRegionSection *section)
929{
930 hwaddr start_addr = section->offset_within_address_space;
931 uint16_t section_index = phys_section_add(&d->map, section);
932 uint64_t num_pages = int128_get64(int128_rshift(section->size,
933 TARGET_PAGE_BITS));
934
935 assert(num_pages);
936 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
937}
938
939static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
940{
941 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
942 AddressSpaceDispatch *d = as->next_dispatch;
943 MemoryRegionSection now = *section, remain = *section;
944 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
945
946 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
947 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
948 - now.offset_within_address_space;
949
950 now.size = int128_min(int128_make64(left), now.size);
951 register_subpage(d, &now);
952 } else {
953 now.size = int128_zero();
954 }
955 while (int128_ne(remain.size, now.size)) {
956 remain.size = int128_sub(remain.size, now.size);
957 remain.offset_within_address_space += int128_get64(now.size);
958 remain.offset_within_region += int128_get64(now.size);
959 now = remain;
960 if (int128_lt(remain.size, page_size)) {
961 register_subpage(d, &now);
962 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
963 now.size = page_size;
964 register_subpage(d, &now);
965 } else {
966 now.size = int128_and(now.size, int128_neg(page_size));
967 register_multipage(d, &now);
968 }
969 }
970}
971
972void qemu_flush_coalesced_mmio_buffer(void)
973{
974 if (kvm_enabled())
975 kvm_flush_coalesced_mmio_buffer();
976}
977
978void qemu_mutex_lock_ramlist(void)
979{
980 qemu_mutex_lock(&ram_list.mutex);
981}
982
983void qemu_mutex_unlock_ramlist(void)
984{
985 qemu_mutex_unlock(&ram_list.mutex);
986}
987
988#ifdef __linux__
989
990#include <sys/vfs.h>
991
992#define HUGETLBFS_MAGIC 0x958458f6
993
994static long gethugepagesize(const char *path)
995{
996 struct statfs fs;
997 int ret;
998
999 do {
1000 ret = statfs(path, &fs);
1001 } while (ret != 0 && errno == EINTR);
1002
1003 if (ret != 0) {
1004 perror(path);
1005 return 0;
1006 }
1007
1008 if (fs.f_type != HUGETLBFS_MAGIC)
1009 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1010
1011 return fs.f_bsize;
1012}
1013
1014static sigjmp_buf sigjump;
1015
1016static void sigbus_handler(int signal)
1017{
1018 siglongjmp(sigjump, 1);
1019}
1020
1021static void *file_ram_alloc(RAMBlock *block,
1022 ram_addr_t memory,
1023 const char *path)
1024{
1025 char *filename;
1026 char *sanitized_name;
1027 char *c;
1028 void *area;
1029 int fd;
1030 unsigned long hpagesize;
1031
1032 hpagesize = gethugepagesize(path);
1033 if (!hpagesize) {
1034 return NULL;
1035 }
1036
1037 if (memory < hpagesize) {
1038 return NULL;
1039 }
1040
1041 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1042 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1043 return NULL;
1044 }
1045
1046 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1047 sanitized_name = g_strdup(block->mr->name);
1048 for (c = sanitized_name; *c != '\0'; c++) {
1049 if (*c == '/')
1050 *c = '_';
1051 }
1052
1053 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1054 sanitized_name);
1055 g_free(sanitized_name);
1056
1057 fd = mkstemp(filename);
1058 if (fd < 0) {
1059 perror("unable to create backing store for hugepages");
1060 g_free(filename);
1061 return NULL;
1062 }
1063 unlink(filename);
1064 g_free(filename);
1065
1066 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1067
1068 /*
1069 * ftruncate is not supported by hugetlbfs in older
1070 * hosts, so don't bother bailing out on errors.
1071 * If anything goes wrong with it under other filesystems,
1072 * mmap will fail.
1073 */
1074 if (ftruncate(fd, memory))
1075 perror("ftruncate");
1076
1077 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1078 if (area == MAP_FAILED) {
1079 perror("file_ram_alloc: can't mmap RAM pages");
1080 close(fd);
1081 return (NULL);
1082 }
1083
1084 if (mem_prealloc) {
1085 int ret, i;
1086 struct sigaction act, oldact;
1087 sigset_t set, oldset;
1088
1089 memset(&act, 0, sizeof(act));
1090 act.sa_handler = &sigbus_handler;
1091 act.sa_flags = 0;
1092
1093 ret = sigaction(SIGBUS, &act, &oldact);
1094 if (ret) {
1095 perror("file_ram_alloc: failed to install signal handler");
1096 exit(1);
1097 }
1098
1099 /* unblock SIGBUS */
1100 sigemptyset(&set);
1101 sigaddset(&set, SIGBUS);
1102 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1103
1104 if (sigsetjmp(sigjump, 1)) {
1105 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1106 exit(1);
1107 }
1108
1109 /* MAP_POPULATE silently ignores failures */
1110 for (i = 0; i < (memory/hpagesize); i++) {
1111 memset(area + (hpagesize*i), 0, 1);
1112 }
1113
1114 ret = sigaction(SIGBUS, &oldact, NULL);
1115 if (ret) {
1116 perror("file_ram_alloc: failed to reinstall signal handler");
1117 exit(1);
1118 }
1119
1120 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1121 }
1122
1123 block->fd = fd;
1124 return area;
1125}
1126#else
1127static void *file_ram_alloc(RAMBlock *block,
1128 ram_addr_t memory,
1129 const char *path)
1130{
1131 fprintf(stderr, "-mem-path not supported on this host\n");
1132 exit(1);
1133}
1134#endif
1135
1136static ram_addr_t find_ram_offset(ram_addr_t size)
1137{
1138 RAMBlock *block, *next_block;
1139 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1140
1141 assert(size != 0); /* it would hand out same offset multiple times */
1142
1143 if (QTAILQ_EMPTY(&ram_list.blocks))
1144 return 0;
1145
1146 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1147 ram_addr_t end, next = RAM_ADDR_MAX;
1148
1149 end = block->offset + block->length;
1150
1151 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1152 if (next_block->offset >= end) {
1153 next = MIN(next, next_block->offset);
1154 }
1155 }
1156 if (next - end >= size && next - end < mingap) {
1157 offset = end;
1158 mingap = next - end;
1159 }
1160 }
1161
1162 if (offset == RAM_ADDR_MAX) {
1163 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1164 (uint64_t)size);
1165 abort();
1166 }
1167
1168 return offset;
1169}
1170
1171ram_addr_t last_ram_offset(void)
1172{
1173 RAMBlock *block;
1174 ram_addr_t last = 0;
1175
1176 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1177 last = MAX(last, block->offset + block->length);
1178
1179 return last;
1180}
1181
1182static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1183{
1184 int ret;
1185
1186 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1187 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1188 "dump-guest-core", true)) {
1189 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1190 if (ret) {
1191 perror("qemu_madvise");
1192 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1193 "but dump_guest_core=off specified\n");
1194 }
1195 }
1196}
1197
1198void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1199{
1200 RAMBlock *new_block, *block;
1201
1202 new_block = NULL;
1203 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1204 if (block->offset == addr) {
1205 new_block = block;
1206 break;
1207 }
1208 }
1209 assert(new_block);
1210 assert(!new_block->idstr[0]);
1211
1212 if (dev) {
1213 char *id = qdev_get_dev_path(dev);
1214 if (id) {
1215 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1216 g_free(id);
1217 }
1218 }
1219 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1220
1221 /* This assumes the iothread lock is taken here too. */
1222 qemu_mutex_lock_ramlist();
1223 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1224 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1225 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1226 new_block->idstr);
1227 abort();
1228 }
1229 }
1230 qemu_mutex_unlock_ramlist();
1231}
1232
1233static int memory_try_enable_merging(void *addr, size_t len)
1234{
1235 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1236 /* disabled by the user */
1237 return 0;
1238 }
1239
1240 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1241}
1242
1243ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1244 MemoryRegion *mr)
1245{
1246 RAMBlock *block, *new_block;
1247 ram_addr_t old_ram_size, new_ram_size;
1248
1249 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1250
1251 size = TARGET_PAGE_ALIGN(size);
1252 new_block = g_malloc0(sizeof(*new_block));
1253 new_block->fd = -1;
1254
1255 /* This assumes the iothread lock is taken here too. */
1256 qemu_mutex_lock_ramlist();
1257 new_block->mr = mr;
1258 new_block->offset = find_ram_offset(size);
1259 if (host) {
1260 new_block->host = host;
1261 new_block->flags |= RAM_PREALLOC_MASK;
1262 } else if (xen_enabled()) {
1263 if (mem_path) {
1264 fprintf(stderr, "-mem-path not supported with Xen\n");
1265 exit(1);
1266 }
1267 xen_ram_alloc(new_block->offset, size, mr);
1268 } else {
1269 if (mem_path) {
1270 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1271 /*
1272 * file_ram_alloc() needs to allocate just like
1273 * phys_mem_alloc, but we haven't bothered to provide
1274 * a hook there.
1275 */
1276 fprintf(stderr,
1277 "-mem-path not supported with this accelerator\n");
1278 exit(1);
1279 }
1280 new_block->host = file_ram_alloc(new_block, size, mem_path);
1281 }
1282 if (!new_block->host) {
1283 new_block->host = phys_mem_alloc(size);
1284 if (!new_block->host) {
1285 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1286 new_block->mr->name, strerror(errno));
1287 exit(1);
1288 }
1289 memory_try_enable_merging(new_block->host, size);
1290 }
1291 }
1292 new_block->length = size;
1293
1294 /* Keep the list sorted from biggest to smallest block. */
1295 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1296 if (block->length < new_block->length) {
1297 break;
1298 }
1299 }
1300 if (block) {
1301 QTAILQ_INSERT_BEFORE(block, new_block, next);
1302 } else {
1303 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1304 }
1305 ram_list.mru_block = NULL;
1306
1307 ram_list.version++;
1308 qemu_mutex_unlock_ramlist();
1309
1310 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1311
1312 if (new_ram_size > old_ram_size) {
1313 int i;
1314 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1315 ram_list.dirty_memory[i] =
1316 bitmap_zero_extend(ram_list.dirty_memory[i],
1317 old_ram_size, new_ram_size);
1318 }
1319 }
1320 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1321
1322 qemu_ram_setup_dump(new_block->host, size);
1323 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1324 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1325
1326 if (kvm_enabled())
1327 kvm_setup_guest_memory(new_block->host, size);
1328
1329 return new_block->offset;
1330}
1331
1332ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1333{
1334 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1335}
1336
1337void qemu_ram_free_from_ptr(ram_addr_t addr)
1338{
1339 RAMBlock *block;
1340
1341 /* This assumes the iothread lock is taken here too. */
1342 qemu_mutex_lock_ramlist();
1343 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1344 if (addr == block->offset) {
1345 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1346 ram_list.mru_block = NULL;
1347 ram_list.version++;
1348 g_free(block);
1349 break;
1350 }
1351 }
1352 qemu_mutex_unlock_ramlist();
1353}
1354
1355void qemu_ram_free(ram_addr_t addr)
1356{
1357 RAMBlock *block;
1358
1359 /* This assumes the iothread lock is taken here too. */
1360 qemu_mutex_lock_ramlist();
1361 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1362 if (addr == block->offset) {
1363 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1364 ram_list.mru_block = NULL;
1365 ram_list.version++;
1366 if (block->flags & RAM_PREALLOC_MASK) {
1367 ;
1368 } else if (xen_enabled()) {
1369 xen_invalidate_map_cache_entry(block->host);
1370#ifndef _WIN32
1371 } else if (block->fd >= 0) {
1372 munmap(block->host, block->length);
1373 close(block->fd);
1374#endif
1375 } else {
1376 qemu_anon_ram_free(block->host, block->length);
1377 }
1378 g_free(block);
1379 break;
1380 }
1381 }
1382 qemu_mutex_unlock_ramlist();
1383
1384}
1385
1386#ifndef _WIN32
1387void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1388{
1389 RAMBlock *block;
1390 ram_addr_t offset;
1391 int flags;
1392 void *area, *vaddr;
1393
1394 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1395 offset = addr - block->offset;
1396 if (offset < block->length) {
1397 vaddr = block->host + offset;
1398 if (block->flags & RAM_PREALLOC_MASK) {
1399 ;
1400 } else if (xen_enabled()) {
1401 abort();
1402 } else {
1403 flags = MAP_FIXED;
1404 munmap(vaddr, length);
1405 if (block->fd >= 0) {
1406#ifdef MAP_POPULATE
1407 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1408 MAP_PRIVATE;
1409#else
1410 flags |= MAP_PRIVATE;
1411#endif
1412 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1413 flags, block->fd, offset);
1414 } else {
1415 /*
1416 * Remap needs to match alloc. Accelerators that
1417 * set phys_mem_alloc never remap. If they did,
1418 * we'd need a remap hook here.
1419 */
1420 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1421
1422 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1423 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1424 flags, -1, 0);
1425 }
1426 if (area != vaddr) {
1427 fprintf(stderr, "Could not remap addr: "
1428 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1429 length, addr);
1430 exit(1);
1431 }
1432 memory_try_enable_merging(vaddr, length);
1433 qemu_ram_setup_dump(vaddr, length);
1434 }
1435 return;
1436 }
1437 }
1438}
1439#endif /* !_WIN32 */
1440
1441/* Return a host pointer to ram allocated with qemu_ram_alloc.
1442 With the exception of the softmmu code in this file, this should
1443 only be used for local memory (e.g. video ram) that the device owns,
1444 and knows it isn't going to access beyond the end of the block.
1445
1446 It should not be used for general purpose DMA.
1447 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1448 */
1449void *qemu_get_ram_ptr(ram_addr_t addr)
1450{
1451 RAMBlock *block = qemu_get_ram_block(addr);
1452
1453 if (xen_enabled()) {
1454 /* We need to check if the requested address is in the RAM
1455 * because we don't want to map the entire memory in QEMU.
1456 * In that case just map until the end of the page.
1457 */
1458 if (block->offset == 0) {
1459 return xen_map_cache(addr, 0, 0);
1460 } else if (block->host == NULL) {
1461 block->host =
1462 xen_map_cache(block->offset, block->length, 1);
1463 }
1464 }
1465 return block->host + (addr - block->offset);
1466}
1467
1468/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1469 * but takes a size argument */
1470static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1471{
1472 if (*size == 0) {
1473 return NULL;
1474 }
1475 if (xen_enabled()) {
1476 return xen_map_cache(addr, *size, 1);
1477 } else {
1478 RAMBlock *block;
1479
1480 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1481 if (addr - block->offset < block->length) {
1482 if (addr - block->offset + *size > block->length)
1483 *size = block->length - addr + block->offset;
1484 return block->host + (addr - block->offset);
1485 }
1486 }
1487
1488 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1489 abort();
1490 }
1491}
1492
1493/* Some of the softmmu routines need to translate from a host pointer
1494 (typically a TLB entry) back to a ram offset. */
1495MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1496{
1497 RAMBlock *block;
1498 uint8_t *host = ptr;
1499
1500 if (xen_enabled()) {
1501 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1502 return qemu_get_ram_block(*ram_addr)->mr;
1503 }
1504
1505 block = ram_list.mru_block;
1506 if (block && block->host && host - block->host < block->length) {
1507 goto found;
1508 }
1509
1510 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1511 /* This case append when the block is not mapped. */
1512 if (block->host == NULL) {
1513 continue;
1514 }
1515 if (host - block->host < block->length) {
1516 goto found;
1517 }
1518 }
1519
1520 return NULL;
1521
1522found:
1523 *ram_addr = block->offset + (host - block->host);
1524 return block->mr;
1525}
1526
1527static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1528 uint64_t val, unsigned size)
1529{
1530 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1531 tb_invalidate_phys_page_fast(ram_addr, size);
1532 }
1533 switch (size) {
1534 case 1:
1535 stb_p(qemu_get_ram_ptr(ram_addr), val);
1536 break;
1537 case 2:
1538 stw_p(qemu_get_ram_ptr(ram_addr), val);
1539 break;
1540 case 4:
1541 stl_p(qemu_get_ram_ptr(ram_addr), val);
1542 break;
1543 default:
1544 abort();
1545 }
1546 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1547 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1548 /* we remove the notdirty callback only if the code has been
1549 flushed */
1550 if (!cpu_physical_memory_is_clean(ram_addr)) {
1551 CPUArchState *env = current_cpu->env_ptr;
1552 tlb_set_dirty(env, env->mem_io_vaddr);
1553 }
1554}
1555
1556static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1557 unsigned size, bool is_write)
1558{
1559 return is_write;
1560}
1561
1562static const MemoryRegionOps notdirty_mem_ops = {
1563 .write = notdirty_mem_write,
1564 .valid.accepts = notdirty_mem_accepts,
1565 .endianness = DEVICE_NATIVE_ENDIAN,
1566};
1567
1568/* Generate a debug exception if a watchpoint has been hit. */
1569static void check_watchpoint(int offset, int len_mask, int flags)
1570{
1571 CPUArchState *env = current_cpu->env_ptr;
1572 target_ulong pc, cs_base;
1573 target_ulong vaddr;
1574 CPUWatchpoint *wp;
1575 int cpu_flags;
1576
1577 if (env->watchpoint_hit) {
1578 /* We re-entered the check after replacing the TB. Now raise
1579 * the debug interrupt so that is will trigger after the
1580 * current instruction. */
1581 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1582 return;
1583 }
1584 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1585 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1586 if ((vaddr == (wp->vaddr & len_mask) ||
1587 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1588 wp->flags |= BP_WATCHPOINT_HIT;
1589 if (!env->watchpoint_hit) {
1590 env->watchpoint_hit = wp;
1591 tb_check_watchpoint(env);
1592 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1593 env->exception_index = EXCP_DEBUG;
1594 cpu_loop_exit(env);
1595 } else {
1596 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1597 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1598 cpu_resume_from_signal(env, NULL);
1599 }
1600 }
1601 } else {
1602 wp->flags &= ~BP_WATCHPOINT_HIT;
1603 }
1604 }
1605}
1606
1607/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1608 so these check for a hit then pass through to the normal out-of-line
1609 phys routines. */
1610static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1611 unsigned size)
1612{
1613 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1614 switch (size) {
1615 case 1: return ldub_phys(&address_space_memory, addr);
1616 case 2: return lduw_phys(&address_space_memory, addr);
1617 case 4: return ldl_phys(&address_space_memory, addr);
1618 default: abort();
1619 }
1620}
1621
1622static void watch_mem_write(void *opaque, hwaddr addr,
1623 uint64_t val, unsigned size)
1624{
1625 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1626 switch (size) {
1627 case 1:
1628 stb_phys(addr, val);
1629 break;
1630 case 2:
1631 stw_phys(addr, val);
1632 break;
1633 case 4:
1634 stl_phys(&address_space_memory, addr, val);
1635 break;
1636 default: abort();
1637 }
1638}
1639
1640static const MemoryRegionOps watch_mem_ops = {
1641 .read = watch_mem_read,
1642 .write = watch_mem_write,
1643 .endianness = DEVICE_NATIVE_ENDIAN,
1644};
1645
1646static uint64_t subpage_read(void *opaque, hwaddr addr,
1647 unsigned len)
1648{
1649 subpage_t *subpage = opaque;
1650 uint8_t buf[4];
1651
1652#if defined(DEBUG_SUBPAGE)
1653 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1654 subpage, len, addr);
1655#endif
1656 address_space_read(subpage->as, addr + subpage->base, buf, len);
1657 switch (len) {
1658 case 1:
1659 return ldub_p(buf);
1660 case 2:
1661 return lduw_p(buf);
1662 case 4:
1663 return ldl_p(buf);
1664 default:
1665 abort();
1666 }
1667}
1668
1669static void subpage_write(void *opaque, hwaddr addr,
1670 uint64_t value, unsigned len)
1671{
1672 subpage_t *subpage = opaque;
1673 uint8_t buf[4];
1674
1675#if defined(DEBUG_SUBPAGE)
1676 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1677 " value %"PRIx64"\n",
1678 __func__, subpage, len, addr, value);
1679#endif
1680 switch (len) {
1681 case 1:
1682 stb_p(buf, value);
1683 break;
1684 case 2:
1685 stw_p(buf, value);
1686 break;
1687 case 4:
1688 stl_p(buf, value);
1689 break;
1690 default:
1691 abort();
1692 }
1693 address_space_write(subpage->as, addr + subpage->base, buf, len);
1694}
1695
1696static bool subpage_accepts(void *opaque, hwaddr addr,
1697 unsigned len, bool is_write)
1698{
1699 subpage_t *subpage = opaque;
1700#if defined(DEBUG_SUBPAGE)
1701 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1702 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1703#endif
1704
1705 return address_space_access_valid(subpage->as, addr + subpage->base,
1706 len, is_write);
1707}
1708
1709static const MemoryRegionOps subpage_ops = {
1710 .read = subpage_read,
1711 .write = subpage_write,
1712 .valid.accepts = subpage_accepts,
1713 .endianness = DEVICE_NATIVE_ENDIAN,
1714};
1715
1716static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1717 uint16_t section)
1718{
1719 int idx, eidx;
1720
1721 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1722 return -1;
1723 idx = SUBPAGE_IDX(start);
1724 eidx = SUBPAGE_IDX(end);
1725#if defined(DEBUG_SUBPAGE)
1726 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1727 __func__, mmio, start, end, idx, eidx, section);
1728#endif
1729 for (; idx <= eidx; idx++) {
1730 mmio->sub_section[idx] = section;
1731 }
1732
1733 return 0;
1734}
1735
1736static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1737{
1738 subpage_t *mmio;
1739
1740 mmio = g_malloc0(sizeof(subpage_t));
1741
1742 mmio->as = as;
1743 mmio->base = base;
1744 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1745 "subpage", TARGET_PAGE_SIZE);
1746 mmio->iomem.subpage = true;
1747#if defined(DEBUG_SUBPAGE)
1748 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1749 mmio, base, TARGET_PAGE_SIZE);
1750#endif
1751 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1752
1753 return mmio;
1754}
1755
1756static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1757{
1758 MemoryRegionSection section = {
1759 .address_space = &address_space_memory,
1760 .mr = mr,
1761 .offset_within_address_space = 0,
1762 .offset_within_region = 0,
1763 .size = int128_2_64(),
1764 };
1765
1766 return phys_section_add(map, &section);
1767}
1768
1769MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1770{
1771 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1772}
1773
1774static void io_mem_init(void)
1775{
1776 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1777 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1778 "unassigned", UINT64_MAX);
1779 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1780 "notdirty", UINT64_MAX);
1781 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1782 "watch", UINT64_MAX);
1783}
1784
1785static void mem_begin(MemoryListener *listener)
1786{
1787 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1788 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1789 uint16_t n;
1790
1791 n = dummy_section(&d->map, &io_mem_unassigned);
1792 assert(n == PHYS_SECTION_UNASSIGNED);
1793 n = dummy_section(&d->map, &io_mem_notdirty);
1794 assert(n == PHYS_SECTION_NOTDIRTY);
1795 n = dummy_section(&d->map, &io_mem_rom);
1796 assert(n == PHYS_SECTION_ROM);
1797 n = dummy_section(&d->map, &io_mem_watch);
1798 assert(n == PHYS_SECTION_WATCH);
1799
1800 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1801 d->as = as;
1802 as->next_dispatch = d;
1803}
1804
1805static void mem_commit(MemoryListener *listener)
1806{
1807 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1808 AddressSpaceDispatch *cur = as->dispatch;
1809 AddressSpaceDispatch *next = as->next_dispatch;
1810
1811 phys_page_compact_all(next, next->map.nodes_nb);
1812
1813 as->dispatch = next;
1814
1815 if (cur) {
1816 phys_sections_free(&cur->map);
1817 g_free(cur);
1818 }
1819}
1820
1821static void tcg_commit(MemoryListener *listener)
1822{
1823 CPUState *cpu;
1824
1825 /* since each CPU stores ram addresses in its TLB cache, we must
1826 reset the modified entries */
1827 /* XXX: slow ! */
1828 CPU_FOREACH(cpu) {
1829 CPUArchState *env = cpu->env_ptr;
1830
1831 /* FIXME: Disentangle the cpu.h circular files deps so we can
1832 directly get the right CPU from listener. */
1833 if (cpu->tcg_as_listener != listener) {
1834 continue;
1835 }
1836 tlb_flush(env, 1);
1837 }
1838}
1839
1840static void core_log_global_start(MemoryListener *listener)
1841{
1842 cpu_physical_memory_set_dirty_tracking(true);
1843}
1844
1845static void core_log_global_stop(MemoryListener *listener)
1846{
1847 cpu_physical_memory_set_dirty_tracking(false);
1848}
1849
1850static MemoryListener core_memory_listener = {
1851 .log_global_start = core_log_global_start,
1852 .log_global_stop = core_log_global_stop,
1853 .priority = 1,
1854};
1855
1856void address_space_init_dispatch(AddressSpace *as)
1857{
1858 as->dispatch = NULL;
1859 as->dispatch_listener = (MemoryListener) {
1860 .begin = mem_begin,
1861 .commit = mem_commit,
1862 .region_add = mem_add,
1863 .region_nop = mem_add,
1864 .priority = 0,
1865 };
1866 memory_listener_register(&as->dispatch_listener, as);
1867}
1868
1869void address_space_destroy_dispatch(AddressSpace *as)
1870{
1871 AddressSpaceDispatch *d = as->dispatch;
1872
1873 memory_listener_unregister(&as->dispatch_listener);
1874 g_free(d);
1875 as->dispatch = NULL;
1876}
1877
1878static void memory_map_init(void)
1879{
1880 system_memory = g_malloc(sizeof(*system_memory));
1881
1882 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1883 address_space_init(&address_space_memory, system_memory, "memory");
1884
1885 system_io = g_malloc(sizeof(*system_io));
1886 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1887 65536);
1888 address_space_init(&address_space_io, system_io, "I/O");
1889
1890 memory_listener_register(&core_memory_listener, &address_space_memory);
1891}
1892
1893MemoryRegion *get_system_memory(void)
1894{
1895 return system_memory;
1896}
1897
1898MemoryRegion *get_system_io(void)
1899{
1900 return system_io;
1901}
1902
1903#endif /* !defined(CONFIG_USER_ONLY) */
1904
1905/* physical memory access (slow version, mainly for debug) */
1906#if defined(CONFIG_USER_ONLY)
1907int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1908 uint8_t *buf, int len, int is_write)
1909{
1910 int l, flags;
1911 target_ulong page;
1912 void * p;
1913
1914 while (len > 0) {
1915 page = addr & TARGET_PAGE_MASK;
1916 l = (page + TARGET_PAGE_SIZE) - addr;
1917 if (l > len)
1918 l = len;
1919 flags = page_get_flags(page);
1920 if (!(flags & PAGE_VALID))
1921 return -1;
1922 if (is_write) {
1923 if (!(flags & PAGE_WRITE))
1924 return -1;
1925 /* XXX: this code should not depend on lock_user */
1926 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1927 return -1;
1928 memcpy(p, buf, l);
1929 unlock_user(p, addr, l);
1930 } else {
1931 if (!(flags & PAGE_READ))
1932 return -1;
1933 /* XXX: this code should not depend on lock_user */
1934 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1935 return -1;
1936 memcpy(buf, p, l);
1937 unlock_user(p, addr, 0);
1938 }
1939 len -= l;
1940 buf += l;
1941 addr += l;
1942 }
1943 return 0;
1944}
1945
1946#else
1947
1948static void invalidate_and_set_dirty(hwaddr addr,
1949 hwaddr length)
1950{
1951 if (cpu_physical_memory_is_clean(addr)) {
1952 /* invalidate code */
1953 tb_invalidate_phys_page_range(addr, addr + length, 0);
1954 /* set dirty bit */
1955 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1956 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1957 }
1958 xen_modified_memory(addr, length);
1959}
1960
1961static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1962{
1963 unsigned access_size_max = mr->ops->valid.max_access_size;
1964
1965 /* Regions are assumed to support 1-4 byte accesses unless
1966 otherwise specified. */
1967 if (access_size_max == 0) {
1968 access_size_max = 4;
1969 }
1970
1971 /* Bound the maximum access by the alignment of the address. */
1972 if (!mr->ops->impl.unaligned) {
1973 unsigned align_size_max = addr & -addr;
1974 if (align_size_max != 0 && align_size_max < access_size_max) {
1975 access_size_max = align_size_max;
1976 }
1977 }
1978
1979 /* Don't attempt accesses larger than the maximum. */
1980 if (l > access_size_max) {
1981 l = access_size_max;
1982 }
1983 if (l & (l - 1)) {
1984 l = 1 << (qemu_fls(l) - 1);
1985 }
1986
1987 return l;
1988}
1989
1990bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1991 int len, bool is_write)
1992{
1993 hwaddr l;
1994 uint8_t *ptr;
1995 uint64_t val;
1996 hwaddr addr1;
1997 MemoryRegion *mr;
1998 bool error = false;
1999
2000 while (len > 0) {
2001 l = len;
2002 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2003
2004 if (is_write) {
2005 if (!memory_access_is_direct(mr, is_write)) {
2006 l = memory_access_size(mr, l, addr1);
2007 /* XXX: could force current_cpu to NULL to avoid
2008 potential bugs */
2009 switch (l) {
2010 case 8:
2011 /* 64 bit write access */
2012 val = ldq_p(buf);
2013 error |= io_mem_write(mr, addr1, val, 8);
2014 break;
2015 case 4:
2016 /* 32 bit write access */
2017 val = ldl_p(buf);
2018 error |= io_mem_write(mr, addr1, val, 4);
2019 break;
2020 case 2:
2021 /* 16 bit write access */
2022 val = lduw_p(buf);
2023 error |= io_mem_write(mr, addr1, val, 2);
2024 break;
2025 case 1:
2026 /* 8 bit write access */
2027 val = ldub_p(buf);
2028 error |= io_mem_write(mr, addr1, val, 1);
2029 break;
2030 default:
2031 abort();
2032 }
2033 } else {
2034 addr1 += memory_region_get_ram_addr(mr);
2035 /* RAM case */
2036 ptr = qemu_get_ram_ptr(addr1);
2037 memcpy(ptr, buf, l);
2038 invalidate_and_set_dirty(addr1, l);
2039 }
2040 } else {
2041 if (!memory_access_is_direct(mr, is_write)) {
2042 /* I/O case */
2043 l = memory_access_size(mr, l, addr1);
2044 switch (l) {
2045 case 8:
2046 /* 64 bit read access */
2047 error |= io_mem_read(mr, addr1, &val, 8);
2048 stq_p(buf, val);
2049 break;
2050 case 4:
2051 /* 32 bit read access */
2052 error |= io_mem_read(mr, addr1, &val, 4);
2053 stl_p(buf, val);
2054 break;
2055 case 2:
2056 /* 16 bit read access */
2057 error |= io_mem_read(mr, addr1, &val, 2);
2058 stw_p(buf, val);
2059 break;
2060 case 1:
2061 /* 8 bit read access */
2062 error |= io_mem_read(mr, addr1, &val, 1);
2063 stb_p(buf, val);
2064 break;
2065 default:
2066 abort();
2067 }
2068 } else {
2069 /* RAM case */
2070 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2071 memcpy(buf, ptr, l);
2072 }
2073 }
2074 len -= l;
2075 buf += l;
2076 addr += l;
2077 }
2078
2079 return error;
2080}
2081
2082bool address_space_write(AddressSpace *as, hwaddr addr,
2083 const uint8_t *buf, int len)
2084{
2085 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2086}
2087
2088bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2089{
2090 return address_space_rw(as, addr, buf, len, false);
2091}
2092
2093
2094void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2095 int len, int is_write)
2096{
2097 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2098}
2099
2100enum write_rom_type {
2101 WRITE_DATA,
2102 FLUSH_CACHE,
2103};
2104
2105static inline void cpu_physical_memory_write_rom_internal(
2106 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2107{
2108 hwaddr l;
2109 uint8_t *ptr;
2110 hwaddr addr1;
2111 MemoryRegion *mr;
2112
2113 while (len > 0) {
2114 l = len;
2115 mr = address_space_translate(&address_space_memory,
2116 addr, &addr1, &l, true);
2117
2118 if (!(memory_region_is_ram(mr) ||
2119 memory_region_is_romd(mr))) {
2120 /* do nothing */
2121 } else {
2122 addr1 += memory_region_get_ram_addr(mr);
2123 /* ROM/RAM case */
2124 ptr = qemu_get_ram_ptr(addr1);
2125 switch (type) {
2126 case WRITE_DATA:
2127 memcpy(ptr, buf, l);
2128 invalidate_and_set_dirty(addr1, l);
2129 break;
2130 case FLUSH_CACHE:
2131 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2132 break;
2133 }
2134 }
2135 len -= l;
2136 buf += l;
2137 addr += l;
2138 }
2139}
2140
2141/* used for ROM loading : can write in RAM and ROM */
2142void cpu_physical_memory_write_rom(hwaddr addr,
2143 const uint8_t *buf, int len)
2144{
2145 cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
2146}
2147
2148void cpu_flush_icache_range(hwaddr start, int len)
2149{
2150 /*
2151 * This function should do the same thing as an icache flush that was
2152 * triggered from within the guest. For TCG we are always cache coherent,
2153 * so there is no need to flush anything. For KVM / Xen we need to flush
2154 * the host's instruction cache at least.
2155 */
2156 if (tcg_enabled()) {
2157 return;
2158 }
2159
2160 cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
2161}
2162
2163typedef struct {
2164 MemoryRegion *mr;
2165 void *buffer;
2166 hwaddr addr;
2167 hwaddr len;
2168} BounceBuffer;
2169
2170static BounceBuffer bounce;
2171
2172typedef struct MapClient {
2173 void *opaque;
2174 void (*callback)(void *opaque);
2175 QLIST_ENTRY(MapClient) link;
2176} MapClient;
2177
2178static QLIST_HEAD(map_client_list, MapClient) map_client_list
2179 = QLIST_HEAD_INITIALIZER(map_client_list);
2180
2181void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2182{
2183 MapClient *client = g_malloc(sizeof(*client));
2184
2185 client->opaque = opaque;
2186 client->callback = callback;
2187 QLIST_INSERT_HEAD(&map_client_list, client, link);
2188 return client;
2189}
2190
2191static void cpu_unregister_map_client(void *_client)
2192{
2193 MapClient *client = (MapClient *)_client;
2194
2195 QLIST_REMOVE(client, link);
2196 g_free(client);
2197}
2198
2199static void cpu_notify_map_clients(void)
2200{
2201 MapClient *client;
2202
2203 while (!QLIST_EMPTY(&map_client_list)) {
2204 client = QLIST_FIRST(&map_client_list);
2205 client->callback(client->opaque);
2206 cpu_unregister_map_client(client);
2207 }
2208}
2209
2210bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2211{
2212 MemoryRegion *mr;
2213 hwaddr l, xlat;
2214
2215 while (len > 0) {
2216 l = len;
2217 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2218 if (!memory_access_is_direct(mr, is_write)) {
2219 l = memory_access_size(mr, l, addr);
2220 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2221 return false;
2222 }
2223 }
2224
2225 len -= l;
2226 addr += l;
2227 }
2228 return true;
2229}
2230
2231/* Map a physical memory region into a host virtual address.
2232 * May map a subset of the requested range, given by and returned in *plen.
2233 * May return NULL if resources needed to perform the mapping are exhausted.
2234 * Use only for reads OR writes - not for read-modify-write operations.
2235 * Use cpu_register_map_client() to know when retrying the map operation is
2236 * likely to succeed.
2237 */
2238void *address_space_map(AddressSpace *as,
2239 hwaddr addr,
2240 hwaddr *plen,
2241 bool is_write)
2242{
2243 hwaddr len = *plen;
2244 hwaddr done = 0;
2245 hwaddr l, xlat, base;
2246 MemoryRegion *mr, *this_mr;
2247 ram_addr_t raddr;
2248
2249 if (len == 0) {
2250 return NULL;
2251 }
2252
2253 l = len;
2254 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2255 if (!memory_access_is_direct(mr, is_write)) {
2256 if (bounce.buffer) {
2257 return NULL;
2258 }
2259 /* Avoid unbounded allocations */
2260 l = MIN(l, TARGET_PAGE_SIZE);
2261 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2262 bounce.addr = addr;
2263 bounce.len = l;
2264
2265 memory_region_ref(mr);
2266 bounce.mr = mr;
2267 if (!is_write) {
2268 address_space_read(as, addr, bounce.buffer, l);
2269 }
2270
2271 *plen = l;
2272 return bounce.buffer;
2273 }
2274
2275 base = xlat;
2276 raddr = memory_region_get_ram_addr(mr);
2277
2278 for (;;) {
2279 len -= l;
2280 addr += l;
2281 done += l;
2282 if (len == 0) {
2283 break;
2284 }
2285
2286 l = len;
2287 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2288 if (this_mr != mr || xlat != base + done) {
2289 break;
2290 }
2291 }
2292
2293 memory_region_ref(mr);
2294 *plen = done;
2295 return qemu_ram_ptr_length(raddr + base, plen);
2296}
2297
2298/* Unmaps a memory region previously mapped by address_space_map().
2299 * Will also mark the memory as dirty if is_write == 1. access_len gives
2300 * the amount of memory that was actually read or written by the caller.
2301 */
2302void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2303 int is_write, hwaddr access_len)
2304{
2305 if (buffer != bounce.buffer) {
2306 MemoryRegion *mr;
2307 ram_addr_t addr1;
2308
2309 mr = qemu_ram_addr_from_host(buffer, &addr1);
2310 assert(mr != NULL);
2311 if (is_write) {
2312 while (access_len) {
2313 unsigned l;
2314 l = TARGET_PAGE_SIZE;
2315 if (l > access_len)
2316 l = access_len;
2317 invalidate_and_set_dirty(addr1, l);
2318 addr1 += l;
2319 access_len -= l;
2320 }
2321 }
2322 if (xen_enabled()) {
2323 xen_invalidate_map_cache_entry(buffer);
2324 }
2325 memory_region_unref(mr);
2326 return;
2327 }
2328 if (is_write) {
2329 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2330 }
2331 qemu_vfree(bounce.buffer);
2332 bounce.buffer = NULL;
2333 memory_region_unref(bounce.mr);
2334 cpu_notify_map_clients();
2335}
2336
2337void *cpu_physical_memory_map(hwaddr addr,
2338 hwaddr *plen,
2339 int is_write)
2340{
2341 return address_space_map(&address_space_memory, addr, plen, is_write);
2342}
2343
2344void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2345 int is_write, hwaddr access_len)
2346{
2347 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2348}
2349
2350/* warning: addr must be aligned */
2351static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2352 enum device_endian endian)
2353{
2354 uint8_t *ptr;
2355 uint64_t val;
2356 MemoryRegion *mr;
2357 hwaddr l = 4;
2358 hwaddr addr1;
2359
2360 mr = address_space_translate(as, addr, &addr1, &l, false);
2361 if (l < 4 || !memory_access_is_direct(mr, false)) {
2362 /* I/O case */
2363 io_mem_read(mr, addr1, &val, 4);
2364#if defined(TARGET_WORDS_BIGENDIAN)
2365 if (endian == DEVICE_LITTLE_ENDIAN) {
2366 val = bswap32(val);
2367 }
2368#else
2369 if (endian == DEVICE_BIG_ENDIAN) {
2370 val = bswap32(val);
2371 }
2372#endif
2373 } else {
2374 /* RAM case */
2375 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2376 & TARGET_PAGE_MASK)
2377 + addr1);
2378 switch (endian) {
2379 case DEVICE_LITTLE_ENDIAN:
2380 val = ldl_le_p(ptr);
2381 break;
2382 case DEVICE_BIG_ENDIAN:
2383 val = ldl_be_p(ptr);
2384 break;
2385 default:
2386 val = ldl_p(ptr);
2387 break;
2388 }
2389 }
2390 return val;
2391}
2392
2393uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2394{
2395 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2396}
2397
2398uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2399{
2400 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2401}
2402
2403uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2404{
2405 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2406}
2407
2408/* warning: addr must be aligned */
2409static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2410 enum device_endian endian)
2411{
2412 uint8_t *ptr;
2413 uint64_t val;
2414 MemoryRegion *mr;
2415 hwaddr l = 8;
2416 hwaddr addr1;
2417
2418 mr = address_space_translate(as, addr, &addr1, &l,
2419 false);
2420 if (l < 8 || !memory_access_is_direct(mr, false)) {
2421 /* I/O case */
2422 io_mem_read(mr, addr1, &val, 8);
2423#if defined(TARGET_WORDS_BIGENDIAN)
2424 if (endian == DEVICE_LITTLE_ENDIAN) {
2425 val = bswap64(val);
2426 }
2427#else
2428 if (endian == DEVICE_BIG_ENDIAN) {
2429 val = bswap64(val);
2430 }
2431#endif
2432 } else {
2433 /* RAM case */
2434 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2435 & TARGET_PAGE_MASK)
2436 + addr1);
2437 switch (endian) {
2438 case DEVICE_LITTLE_ENDIAN:
2439 val = ldq_le_p(ptr);
2440 break;
2441 case DEVICE_BIG_ENDIAN:
2442 val = ldq_be_p(ptr);
2443 break;
2444 default:
2445 val = ldq_p(ptr);
2446 break;
2447 }
2448 }
2449 return val;
2450}
2451
2452uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2453{
2454 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2455}
2456
2457uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2458{
2459 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2460}
2461
2462uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2463{
2464 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2465}
2466
2467/* XXX: optimize */
2468uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2469{
2470 uint8_t val;
2471 address_space_rw(as, addr, &val, 1, 0);
2472 return val;
2473}
2474
2475/* warning: addr must be aligned */
2476static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2477 enum device_endian endian)
2478{
2479 uint8_t *ptr;
2480 uint64_t val;
2481 MemoryRegion *mr;
2482 hwaddr l = 2;
2483 hwaddr addr1;
2484
2485 mr = address_space_translate(as, addr, &addr1, &l,
2486 false);
2487 if (l < 2 || !memory_access_is_direct(mr, false)) {
2488 /* I/O case */
2489 io_mem_read(mr, addr1, &val, 2);
2490#if defined(TARGET_WORDS_BIGENDIAN)
2491 if (endian == DEVICE_LITTLE_ENDIAN) {
2492 val = bswap16(val);
2493 }
2494#else
2495 if (endian == DEVICE_BIG_ENDIAN) {
2496 val = bswap16(val);
2497 }
2498#endif
2499 } else {
2500 /* RAM case */
2501 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2502 & TARGET_PAGE_MASK)
2503 + addr1);
2504 switch (endian) {
2505 case DEVICE_LITTLE_ENDIAN:
2506 val = lduw_le_p(ptr);
2507 break;
2508 case DEVICE_BIG_ENDIAN:
2509 val = lduw_be_p(ptr);
2510 break;
2511 default:
2512 val = lduw_p(ptr);
2513 break;
2514 }
2515 }
2516 return val;
2517}
2518
2519uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2520{
2521 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2522}
2523
2524uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2525{
2526 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2527}
2528
2529uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2530{
2531 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2532}
2533
2534/* warning: addr must be aligned. The ram page is not masked as dirty
2535 and the code inside is not invalidated. It is useful if the dirty
2536 bits are used to track modified PTEs */
2537void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2538{
2539 uint8_t *ptr;
2540 MemoryRegion *mr;
2541 hwaddr l = 4;
2542 hwaddr addr1;
2543
2544 mr = address_space_translate(as, addr, &addr1, &l,
2545 true);
2546 if (l < 4 || !memory_access_is_direct(mr, true)) {
2547 io_mem_write(mr, addr1, val, 4);
2548 } else {
2549 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2550 ptr = qemu_get_ram_ptr(addr1);
2551 stl_p(ptr, val);
2552
2553 if (unlikely(in_migration)) {
2554 if (cpu_physical_memory_is_clean(addr1)) {
2555 /* invalidate code */
2556 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2557 /* set dirty bit */
2558 cpu_physical_memory_set_dirty_flag(addr1,
2559 DIRTY_MEMORY_MIGRATION);
2560 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2561 }
2562 }
2563 }
2564}
2565
2566/* warning: addr must be aligned */
2567static inline void stl_phys_internal(AddressSpace *as,
2568 hwaddr addr, uint32_t val,
2569 enum device_endian endian)
2570{
2571 uint8_t *ptr;
2572 MemoryRegion *mr;
2573 hwaddr l = 4;
2574 hwaddr addr1;
2575
2576 mr = address_space_translate(as, addr, &addr1, &l,
2577 true);
2578 if (l < 4 || !memory_access_is_direct(mr, true)) {
2579#if defined(TARGET_WORDS_BIGENDIAN)
2580 if (endian == DEVICE_LITTLE_ENDIAN) {
2581 val = bswap32(val);
2582 }
2583#else
2584 if (endian == DEVICE_BIG_ENDIAN) {
2585 val = bswap32(val);
2586 }
2587#endif
2588 io_mem_write(mr, addr1, val, 4);
2589 } else {
2590 /* RAM case */
2591 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2592 ptr = qemu_get_ram_ptr(addr1);
2593 switch (endian) {
2594 case DEVICE_LITTLE_ENDIAN:
2595 stl_le_p(ptr, val);
2596 break;
2597 case DEVICE_BIG_ENDIAN:
2598 stl_be_p(ptr, val);
2599 break;
2600 default:
2601 stl_p(ptr, val);
2602 break;
2603 }
2604 invalidate_and_set_dirty(addr1, 4);
2605 }
2606}
2607
2608void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2609{
2610 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2611}
2612
2613void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2614{
2615 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2616}
2617
2618void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2619{
2620 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2621}
2622
2623/* XXX: optimize */
2624void stb_phys(hwaddr addr, uint32_t val)
2625{
2626 uint8_t v = val;
2627 cpu_physical_memory_write(addr, &v, 1);
2628}
2629
2630/* warning: addr must be aligned */
2631static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2632 enum device_endian endian)
2633{
2634 uint8_t *ptr;
2635 MemoryRegion *mr;
2636 hwaddr l = 2;
2637 hwaddr addr1;
2638
2639 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2640 true);
2641 if (l < 2 || !memory_access_is_direct(mr, true)) {
2642#if defined(TARGET_WORDS_BIGENDIAN)
2643 if (endian == DEVICE_LITTLE_ENDIAN) {
2644 val = bswap16(val);
2645 }
2646#else
2647 if (endian == DEVICE_BIG_ENDIAN) {
2648 val = bswap16(val);
2649 }
2650#endif
2651 io_mem_write(mr, addr1, val, 2);
2652 } else {
2653 /* RAM case */
2654 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2655 ptr = qemu_get_ram_ptr(addr1);
2656 switch (endian) {
2657 case DEVICE_LITTLE_ENDIAN:
2658 stw_le_p(ptr, val);
2659 break;
2660 case DEVICE_BIG_ENDIAN:
2661 stw_be_p(ptr, val);
2662 break;
2663 default:
2664 stw_p(ptr, val);
2665 break;
2666 }
2667 invalidate_and_set_dirty(addr1, 2);
2668 }
2669}
2670
2671void stw_phys(hwaddr addr, uint32_t val)
2672{
2673 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2674}
2675
2676void stw_le_phys(hwaddr addr, uint32_t val)
2677{
2678 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2679}
2680
2681void stw_be_phys(hwaddr addr, uint32_t val)
2682{
2683 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2684}
2685
2686/* XXX: optimize */
2687void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2688{
2689 val = tswap64(val);
2690 address_space_rw(as, addr, (void *) &val, 8, 1);
2691}
2692
2693void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2694{
2695 val = cpu_to_le64(val);
2696 address_space_rw(as, addr, (void *) &val, 8, 1);
2697}
2698
2699void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2700{
2701 val = cpu_to_be64(val);
2702 address_space_rw(as, addr, (void *) &val, 8, 1);
2703}
2704
2705/* virtual memory access for debug (includes writing to ROM) */
2706int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2707 uint8_t *buf, int len, int is_write)
2708{
2709 int l;
2710 hwaddr phys_addr;
2711 target_ulong page;
2712
2713 while (len > 0) {
2714 page = addr & TARGET_PAGE_MASK;
2715 phys_addr = cpu_get_phys_page_debug(cpu, page);
2716 /* if no physical page mapped, return an error */
2717 if (phys_addr == -1)
2718 return -1;
2719 l = (page + TARGET_PAGE_SIZE) - addr;
2720 if (l > len)
2721 l = len;
2722 phys_addr += (addr & ~TARGET_PAGE_MASK);
2723 if (is_write)
2724 cpu_physical_memory_write_rom(phys_addr, buf, l);
2725 else
2726 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2727 len -= l;
2728 buf += l;
2729 addr += l;
2730 }
2731 return 0;
2732}
2733#endif
2734
2735#if !defined(CONFIG_USER_ONLY)
2736
2737/*
2738 * A helper function for the _utterly broken_ virtio device model to find out if
2739 * it's running on a big endian machine. Don't do this at home kids!
2740 */
2741bool virtio_is_big_endian(void);
2742bool virtio_is_big_endian(void)
2743{
2744#if defined(TARGET_WORDS_BIGENDIAN)
2745 return true;
2746#else
2747 return false;
2748#endif
2749}
2750
2751#endif
2752
2753#ifndef CONFIG_USER_ONLY
2754bool cpu_physical_memory_is_io(hwaddr phys_addr)
2755{
2756 MemoryRegion*mr;
2757 hwaddr l = 1;
2758
2759 mr = address_space_translate(&address_space_memory,
2760 phys_addr, &phys_addr, &l, false);
2761
2762 return !(memory_region_is_ram(mr) ||
2763 memory_region_is_romd(mr));
2764}
2765
2766void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2767{
2768 RAMBlock *block;
2769
2770 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2771 func(block->host, block->offset, block->length, opaque);
2772 }
2773}
2774#endif