]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
Merge remote-tracking branch 'remotes/kvm/uq/master' into staging
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
74 #define RAM_PREALLOC (1 << 0)
75
76 /* RAM is mmap-ed with MAP_SHARED */
77 #define RAM_SHARED (1 << 1)
78
79 #endif
80
81 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
82 /* current CPU in the current thread. It is only valid inside
83 cpu_exec() */
84 DEFINE_TLS(CPUState *, current_cpu);
85 /* 0 = Do not count executed instructions.
86 1 = Precise instruction counting.
87 2 = Adaptive rate instruction counting. */
88 int use_icount;
89
90 #if !defined(CONFIG_USER_ONLY)
91
92 typedef struct PhysPageEntry PhysPageEntry;
93
94 struct PhysPageEntry {
95 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
96 uint32_t skip : 6;
97 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
98 uint32_t ptr : 26;
99 };
100
101 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
102
103 /* Size of the L2 (and L3, etc) page tables. */
104 #define ADDR_SPACE_BITS 64
105
106 #define P_L2_BITS 9
107 #define P_L2_SIZE (1 << P_L2_BITS)
108
109 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
110
111 typedef PhysPageEntry Node[P_L2_SIZE];
112
113 typedef struct PhysPageMap {
114 unsigned sections_nb;
115 unsigned sections_nb_alloc;
116 unsigned nodes_nb;
117 unsigned nodes_nb_alloc;
118 Node *nodes;
119 MemoryRegionSection *sections;
120 } PhysPageMap;
121
122 struct AddressSpaceDispatch {
123 /* This is a multi-level map on the physical address space.
124 * The bottom level has pointers to MemoryRegionSections.
125 */
126 PhysPageEntry phys_map;
127 PhysPageMap map;
128 AddressSpace *as;
129 };
130
131 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
132 typedef struct subpage_t {
133 MemoryRegion iomem;
134 AddressSpace *as;
135 hwaddr base;
136 uint16_t sub_section[TARGET_PAGE_SIZE];
137 } subpage_t;
138
139 #define PHYS_SECTION_UNASSIGNED 0
140 #define PHYS_SECTION_NOTDIRTY 1
141 #define PHYS_SECTION_ROM 2
142 #define PHYS_SECTION_WATCH 3
143
144 static void io_mem_init(void);
145 static void memory_map_init(void);
146 static void tcg_commit(MemoryListener *listener);
147
148 static MemoryRegion io_mem_watch;
149 #endif
150
151 #if !defined(CONFIG_USER_ONLY)
152
153 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
154 {
155 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
157 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
158 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
159 }
160 }
161
162 static uint32_t phys_map_node_alloc(PhysPageMap *map)
163 {
164 unsigned i;
165 uint32_t ret;
166
167 ret = map->nodes_nb++;
168 assert(ret != PHYS_MAP_NODE_NIL);
169 assert(ret != map->nodes_nb_alloc);
170 for (i = 0; i < P_L2_SIZE; ++i) {
171 map->nodes[ret][i].skip = 1;
172 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
173 }
174 return ret;
175 }
176
177 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
178 hwaddr *index, hwaddr *nb, uint16_t leaf,
179 int level)
180 {
181 PhysPageEntry *p;
182 int i;
183 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184
185 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
186 lp->ptr = phys_map_node_alloc(map);
187 p = map->nodes[lp->ptr];
188 if (level == 0) {
189 for (i = 0; i < P_L2_SIZE; i++) {
190 p[i].skip = 0;
191 p[i].ptr = PHYS_SECTION_UNASSIGNED;
192 }
193 }
194 } else {
195 p = map->nodes[lp->ptr];
196 }
197 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198
199 while (*nb && lp < &p[P_L2_SIZE]) {
200 if ((*index & (step - 1)) == 0 && *nb >= step) {
201 lp->skip = 0;
202 lp->ptr = leaf;
203 *index += step;
204 *nb -= step;
205 } else {
206 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
207 }
208 ++lp;
209 }
210 }
211
212 static void phys_page_set(AddressSpaceDispatch *d,
213 hwaddr index, hwaddr nb,
214 uint16_t leaf)
215 {
216 /* Wildly overreserve - it doesn't matter much. */
217 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
218
219 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
220 }
221
222 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
223 * and update our entry so we can skip it and go directly to the destination.
224 */
225 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 {
227 unsigned valid_ptr = P_L2_SIZE;
228 int valid = 0;
229 PhysPageEntry *p;
230 int i;
231
232 if (lp->ptr == PHYS_MAP_NODE_NIL) {
233 return;
234 }
235
236 p = nodes[lp->ptr];
237 for (i = 0; i < P_L2_SIZE; i++) {
238 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
239 continue;
240 }
241
242 valid_ptr = i;
243 valid++;
244 if (p[i].skip) {
245 phys_page_compact(&p[i], nodes, compacted);
246 }
247 }
248
249 /* We can only compress if there's only one child. */
250 if (valid != 1) {
251 return;
252 }
253
254 assert(valid_ptr < P_L2_SIZE);
255
256 /* Don't compress if it won't fit in the # of bits we have. */
257 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
258 return;
259 }
260
261 lp->ptr = p[valid_ptr].ptr;
262 if (!p[valid_ptr].skip) {
263 /* If our only child is a leaf, make this a leaf. */
264 /* By design, we should have made this node a leaf to begin with so we
265 * should never reach here.
266 * But since it's so simple to handle this, let's do it just in case we
267 * change this rule.
268 */
269 lp->skip = 0;
270 } else {
271 lp->skip += p[valid_ptr].skip;
272 }
273 }
274
275 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 {
277 DECLARE_BITMAP(compacted, nodes_nb);
278
279 if (d->phys_map.skip) {
280 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
281 }
282 }
283
284 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
285 Node *nodes, MemoryRegionSection *sections)
286 {
287 PhysPageEntry *p;
288 hwaddr index = addr >> TARGET_PAGE_BITS;
289 int i;
290
291 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
292 if (lp.ptr == PHYS_MAP_NODE_NIL) {
293 return &sections[PHYS_SECTION_UNASSIGNED];
294 }
295 p = nodes[lp.ptr];
296 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
297 }
298
299 if (sections[lp.ptr].size.hi ||
300 range_covers_byte(sections[lp.ptr].offset_within_address_space,
301 sections[lp.ptr].size.lo, addr)) {
302 return &sections[lp.ptr];
303 } else {
304 return &sections[PHYS_SECTION_UNASSIGNED];
305 }
306 }
307
308 bool memory_region_is_unassigned(MemoryRegion *mr)
309 {
310 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
311 && mr != &io_mem_watch;
312 }
313
314 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
315 hwaddr addr,
316 bool resolve_subpage)
317 {
318 MemoryRegionSection *section;
319 subpage_t *subpage;
320
321 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
322 if (resolve_subpage && section->mr->subpage) {
323 subpage = container_of(section->mr, subpage_t, iomem);
324 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
325 }
326 return section;
327 }
328
329 static MemoryRegionSection *
330 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
331 hwaddr *plen, bool resolve_subpage)
332 {
333 MemoryRegionSection *section;
334 Int128 diff;
335
336 section = address_space_lookup_region(d, addr, resolve_subpage);
337 /* Compute offset within MemoryRegionSection */
338 addr -= section->offset_within_address_space;
339
340 /* Compute offset within MemoryRegion */
341 *xlat = addr + section->offset_within_region;
342
343 diff = int128_sub(section->mr->size, int128_make64(addr));
344 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
345 return section;
346 }
347
348 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
349 {
350 if (memory_region_is_ram(mr)) {
351 return !(is_write && mr->readonly);
352 }
353 if (memory_region_is_romd(mr)) {
354 return !is_write;
355 }
356
357 return false;
358 }
359
360 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
361 hwaddr *xlat, hwaddr *plen,
362 bool is_write)
363 {
364 IOMMUTLBEntry iotlb;
365 MemoryRegionSection *section;
366 MemoryRegion *mr;
367 hwaddr len = *plen;
368
369 for (;;) {
370 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
371 mr = section->mr;
372
373 if (!mr->iommu_ops) {
374 break;
375 }
376
377 iotlb = mr->iommu_ops->translate(mr, addr);
378 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
379 | (addr & iotlb.addr_mask));
380 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
381 if (!(iotlb.perm & (1 << is_write))) {
382 mr = &io_mem_unassigned;
383 break;
384 }
385
386 as = iotlb.target_as;
387 }
388
389 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
390 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
391 len = MIN(page, len);
392 }
393
394 *plen = len;
395 *xlat = addr;
396 return mr;
397 }
398
399 MemoryRegionSection *
400 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
401 hwaddr *plen)
402 {
403 MemoryRegionSection *section;
404 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
405
406 assert(!section->mr->iommu_ops);
407 return section;
408 }
409 #endif
410
411 void cpu_exec_init_all(void)
412 {
413 #if !defined(CONFIG_USER_ONLY)
414 qemu_mutex_init(&ram_list.mutex);
415 memory_map_init();
416 io_mem_init();
417 #endif
418 }
419
420 #if !defined(CONFIG_USER_ONLY)
421
422 static int cpu_common_post_load(void *opaque, int version_id)
423 {
424 CPUState *cpu = opaque;
425
426 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
427 version_id is increased. */
428 cpu->interrupt_request &= ~0x01;
429 tlb_flush(cpu, 1);
430
431 return 0;
432 }
433
434 const VMStateDescription vmstate_cpu_common = {
435 .name = "cpu_common",
436 .version_id = 1,
437 .minimum_version_id = 1,
438 .post_load = cpu_common_post_load,
439 .fields = (VMStateField[]) {
440 VMSTATE_UINT32(halted, CPUState),
441 VMSTATE_UINT32(interrupt_request, CPUState),
442 VMSTATE_END_OF_LIST()
443 }
444 };
445
446 #endif
447
448 CPUState *qemu_get_cpu(int index)
449 {
450 CPUState *cpu;
451
452 CPU_FOREACH(cpu) {
453 if (cpu->cpu_index == index) {
454 return cpu;
455 }
456 }
457
458 return NULL;
459 }
460
461 #if !defined(CONFIG_USER_ONLY)
462 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
463 {
464 /* We only support one address space per cpu at the moment. */
465 assert(cpu->as == as);
466
467 if (cpu->tcg_as_listener) {
468 memory_listener_unregister(cpu->tcg_as_listener);
469 } else {
470 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
471 }
472 cpu->tcg_as_listener->commit = tcg_commit;
473 memory_listener_register(cpu->tcg_as_listener, as);
474 }
475 #endif
476
477 void cpu_exec_init(CPUArchState *env)
478 {
479 CPUState *cpu = ENV_GET_CPU(env);
480 CPUClass *cc = CPU_GET_CLASS(cpu);
481 CPUState *some_cpu;
482 int cpu_index;
483
484 #if defined(CONFIG_USER_ONLY)
485 cpu_list_lock();
486 #endif
487 cpu_index = 0;
488 CPU_FOREACH(some_cpu) {
489 cpu_index++;
490 }
491 cpu->cpu_index = cpu_index;
492 cpu->numa_node = 0;
493 QTAILQ_INIT(&cpu->breakpoints);
494 QTAILQ_INIT(&cpu->watchpoints);
495 #ifndef CONFIG_USER_ONLY
496 cpu->as = &address_space_memory;
497 cpu->thread_id = qemu_get_thread_id();
498 #endif
499 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
500 #if defined(CONFIG_USER_ONLY)
501 cpu_list_unlock();
502 #endif
503 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
504 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
505 }
506 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
507 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
508 cpu_save, cpu_load, env);
509 assert(cc->vmsd == NULL);
510 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
511 #endif
512 if (cc->vmsd != NULL) {
513 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
514 }
515 }
516
517 #if defined(TARGET_HAS_ICE)
518 #if defined(CONFIG_USER_ONLY)
519 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
520 {
521 tb_invalidate_phys_page_range(pc, pc + 1, 0);
522 }
523 #else
524 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
525 {
526 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
527 if (phys != -1) {
528 tb_invalidate_phys_addr(cpu->as,
529 phys | (pc & ~TARGET_PAGE_MASK));
530 }
531 }
532 #endif
533 #endif /* TARGET_HAS_ICE */
534
535 #if defined(CONFIG_USER_ONLY)
536 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
537
538 {
539 }
540
541 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
542 int flags, CPUWatchpoint **watchpoint)
543 {
544 return -ENOSYS;
545 }
546 #else
547 /* Add a watchpoint. */
548 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
549 int flags, CPUWatchpoint **watchpoint)
550 {
551 vaddr len_mask = ~(len - 1);
552 CPUWatchpoint *wp;
553
554 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
555 if ((len & (len - 1)) || (addr & ~len_mask) ||
556 len == 0 || len > TARGET_PAGE_SIZE) {
557 error_report("tried to set invalid watchpoint at %"
558 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
559 return -EINVAL;
560 }
561 wp = g_malloc(sizeof(*wp));
562
563 wp->vaddr = addr;
564 wp->len_mask = len_mask;
565 wp->flags = flags;
566
567 /* keep all GDB-injected watchpoints in front */
568 if (flags & BP_GDB) {
569 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
570 } else {
571 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
572 }
573
574 tlb_flush_page(cpu, addr);
575
576 if (watchpoint)
577 *watchpoint = wp;
578 return 0;
579 }
580
581 /* Remove a specific watchpoint. */
582 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
583 int flags)
584 {
585 vaddr len_mask = ~(len - 1);
586 CPUWatchpoint *wp;
587
588 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
589 if (addr == wp->vaddr && len_mask == wp->len_mask
590 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
591 cpu_watchpoint_remove_by_ref(cpu, wp);
592 return 0;
593 }
594 }
595 return -ENOENT;
596 }
597
598 /* Remove a specific watchpoint by reference. */
599 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
600 {
601 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
602
603 tlb_flush_page(cpu, watchpoint->vaddr);
604
605 g_free(watchpoint);
606 }
607
608 /* Remove all matching watchpoints. */
609 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
610 {
611 CPUWatchpoint *wp, *next;
612
613 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
614 if (wp->flags & mask) {
615 cpu_watchpoint_remove_by_ref(cpu, wp);
616 }
617 }
618 }
619 #endif
620
621 /* Add a breakpoint. */
622 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
623 CPUBreakpoint **breakpoint)
624 {
625 #if defined(TARGET_HAS_ICE)
626 CPUBreakpoint *bp;
627
628 bp = g_malloc(sizeof(*bp));
629
630 bp->pc = pc;
631 bp->flags = flags;
632
633 /* keep all GDB-injected breakpoints in front */
634 if (flags & BP_GDB) {
635 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
636 } else {
637 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
638 }
639
640 breakpoint_invalidate(cpu, pc);
641
642 if (breakpoint) {
643 *breakpoint = bp;
644 }
645 return 0;
646 #else
647 return -ENOSYS;
648 #endif
649 }
650
651 /* Remove a specific breakpoint. */
652 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
653 {
654 #if defined(TARGET_HAS_ICE)
655 CPUBreakpoint *bp;
656
657 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
658 if (bp->pc == pc && bp->flags == flags) {
659 cpu_breakpoint_remove_by_ref(cpu, bp);
660 return 0;
661 }
662 }
663 return -ENOENT;
664 #else
665 return -ENOSYS;
666 #endif
667 }
668
669 /* Remove a specific breakpoint by reference. */
670 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
671 {
672 #if defined(TARGET_HAS_ICE)
673 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
674
675 breakpoint_invalidate(cpu, breakpoint->pc);
676
677 g_free(breakpoint);
678 #endif
679 }
680
681 /* Remove all matching breakpoints. */
682 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
683 {
684 #if defined(TARGET_HAS_ICE)
685 CPUBreakpoint *bp, *next;
686
687 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
688 if (bp->flags & mask) {
689 cpu_breakpoint_remove_by_ref(cpu, bp);
690 }
691 }
692 #endif
693 }
694
695 /* enable or disable single step mode. EXCP_DEBUG is returned by the
696 CPU loop after each instruction */
697 void cpu_single_step(CPUState *cpu, int enabled)
698 {
699 #if defined(TARGET_HAS_ICE)
700 if (cpu->singlestep_enabled != enabled) {
701 cpu->singlestep_enabled = enabled;
702 if (kvm_enabled()) {
703 kvm_update_guest_debug(cpu, 0);
704 } else {
705 /* must flush all the translated code to avoid inconsistencies */
706 /* XXX: only flush what is necessary */
707 CPUArchState *env = cpu->env_ptr;
708 tb_flush(env);
709 }
710 }
711 #endif
712 }
713
714 void cpu_abort(CPUState *cpu, const char *fmt, ...)
715 {
716 va_list ap;
717 va_list ap2;
718
719 va_start(ap, fmt);
720 va_copy(ap2, ap);
721 fprintf(stderr, "qemu: fatal: ");
722 vfprintf(stderr, fmt, ap);
723 fprintf(stderr, "\n");
724 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
725 if (qemu_log_enabled()) {
726 qemu_log("qemu: fatal: ");
727 qemu_log_vprintf(fmt, ap2);
728 qemu_log("\n");
729 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
730 qemu_log_flush();
731 qemu_log_close();
732 }
733 va_end(ap2);
734 va_end(ap);
735 #if defined(CONFIG_USER_ONLY)
736 {
737 struct sigaction act;
738 sigfillset(&act.sa_mask);
739 act.sa_handler = SIG_DFL;
740 sigaction(SIGABRT, &act, NULL);
741 }
742 #endif
743 abort();
744 }
745
746 #if !defined(CONFIG_USER_ONLY)
747 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
748 {
749 RAMBlock *block;
750
751 /* The list is protected by the iothread lock here. */
752 block = ram_list.mru_block;
753 if (block && addr - block->offset < block->length) {
754 goto found;
755 }
756 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
757 if (addr - block->offset < block->length) {
758 goto found;
759 }
760 }
761
762 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
763 abort();
764
765 found:
766 ram_list.mru_block = block;
767 return block;
768 }
769
770 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
771 {
772 ram_addr_t start1;
773 RAMBlock *block;
774 ram_addr_t end;
775
776 end = TARGET_PAGE_ALIGN(start + length);
777 start &= TARGET_PAGE_MASK;
778
779 block = qemu_get_ram_block(start);
780 assert(block == qemu_get_ram_block(end - 1));
781 start1 = (uintptr_t)block->host + (start - block->offset);
782 cpu_tlb_reset_dirty_all(start1, length);
783 }
784
785 /* Note: start and end must be within the same ram block. */
786 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
787 unsigned client)
788 {
789 if (length == 0)
790 return;
791 cpu_physical_memory_clear_dirty_range(start, length, client);
792
793 if (tcg_enabled()) {
794 tlb_reset_dirty_range_all(start, length);
795 }
796 }
797
798 static void cpu_physical_memory_set_dirty_tracking(bool enable)
799 {
800 in_migration = enable;
801 }
802
803 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
804 MemoryRegionSection *section,
805 target_ulong vaddr,
806 hwaddr paddr, hwaddr xlat,
807 int prot,
808 target_ulong *address)
809 {
810 hwaddr iotlb;
811 CPUWatchpoint *wp;
812
813 if (memory_region_is_ram(section->mr)) {
814 /* Normal RAM. */
815 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
816 + xlat;
817 if (!section->readonly) {
818 iotlb |= PHYS_SECTION_NOTDIRTY;
819 } else {
820 iotlb |= PHYS_SECTION_ROM;
821 }
822 } else {
823 iotlb = section - section->address_space->dispatch->map.sections;
824 iotlb += xlat;
825 }
826
827 /* Make accesses to pages with watchpoints go via the
828 watchpoint trap routines. */
829 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
830 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
831 /* Avoid trapping reads of pages with a write breakpoint. */
832 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
833 iotlb = PHYS_SECTION_WATCH + paddr;
834 *address |= TLB_MMIO;
835 break;
836 }
837 }
838 }
839
840 return iotlb;
841 }
842 #endif /* defined(CONFIG_USER_ONLY) */
843
844 #if !defined(CONFIG_USER_ONLY)
845
846 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
847 uint16_t section);
848 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
849
850 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
851
852 /*
853 * Set a custom physical guest memory alloator.
854 * Accelerators with unusual needs may need this. Hopefully, we can
855 * get rid of it eventually.
856 */
857 void phys_mem_set_alloc(void *(*alloc)(size_t))
858 {
859 phys_mem_alloc = alloc;
860 }
861
862 static uint16_t phys_section_add(PhysPageMap *map,
863 MemoryRegionSection *section)
864 {
865 /* The physical section number is ORed with a page-aligned
866 * pointer to produce the iotlb entries. Thus it should
867 * never overflow into the page-aligned value.
868 */
869 assert(map->sections_nb < TARGET_PAGE_SIZE);
870
871 if (map->sections_nb == map->sections_nb_alloc) {
872 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
873 map->sections = g_renew(MemoryRegionSection, map->sections,
874 map->sections_nb_alloc);
875 }
876 map->sections[map->sections_nb] = *section;
877 memory_region_ref(section->mr);
878 return map->sections_nb++;
879 }
880
881 static void phys_section_destroy(MemoryRegion *mr)
882 {
883 memory_region_unref(mr);
884
885 if (mr->subpage) {
886 subpage_t *subpage = container_of(mr, subpage_t, iomem);
887 memory_region_destroy(&subpage->iomem);
888 g_free(subpage);
889 }
890 }
891
892 static void phys_sections_free(PhysPageMap *map)
893 {
894 while (map->sections_nb > 0) {
895 MemoryRegionSection *section = &map->sections[--map->sections_nb];
896 phys_section_destroy(section->mr);
897 }
898 g_free(map->sections);
899 g_free(map->nodes);
900 }
901
902 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
903 {
904 subpage_t *subpage;
905 hwaddr base = section->offset_within_address_space
906 & TARGET_PAGE_MASK;
907 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
908 d->map.nodes, d->map.sections);
909 MemoryRegionSection subsection = {
910 .offset_within_address_space = base,
911 .size = int128_make64(TARGET_PAGE_SIZE),
912 };
913 hwaddr start, end;
914
915 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
916
917 if (!(existing->mr->subpage)) {
918 subpage = subpage_init(d->as, base);
919 subsection.address_space = d->as;
920 subsection.mr = &subpage->iomem;
921 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
922 phys_section_add(&d->map, &subsection));
923 } else {
924 subpage = container_of(existing->mr, subpage_t, iomem);
925 }
926 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
927 end = start + int128_get64(section->size) - 1;
928 subpage_register(subpage, start, end,
929 phys_section_add(&d->map, section));
930 }
931
932
933 static void register_multipage(AddressSpaceDispatch *d,
934 MemoryRegionSection *section)
935 {
936 hwaddr start_addr = section->offset_within_address_space;
937 uint16_t section_index = phys_section_add(&d->map, section);
938 uint64_t num_pages = int128_get64(int128_rshift(section->size,
939 TARGET_PAGE_BITS));
940
941 assert(num_pages);
942 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
943 }
944
945 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
946 {
947 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
948 AddressSpaceDispatch *d = as->next_dispatch;
949 MemoryRegionSection now = *section, remain = *section;
950 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
951
952 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
953 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
954 - now.offset_within_address_space;
955
956 now.size = int128_min(int128_make64(left), now.size);
957 register_subpage(d, &now);
958 } else {
959 now.size = int128_zero();
960 }
961 while (int128_ne(remain.size, now.size)) {
962 remain.size = int128_sub(remain.size, now.size);
963 remain.offset_within_address_space += int128_get64(now.size);
964 remain.offset_within_region += int128_get64(now.size);
965 now = remain;
966 if (int128_lt(remain.size, page_size)) {
967 register_subpage(d, &now);
968 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
969 now.size = page_size;
970 register_subpage(d, &now);
971 } else {
972 now.size = int128_and(now.size, int128_neg(page_size));
973 register_multipage(d, &now);
974 }
975 }
976 }
977
978 void qemu_flush_coalesced_mmio_buffer(void)
979 {
980 if (kvm_enabled())
981 kvm_flush_coalesced_mmio_buffer();
982 }
983
984 void qemu_mutex_lock_ramlist(void)
985 {
986 qemu_mutex_lock(&ram_list.mutex);
987 }
988
989 void qemu_mutex_unlock_ramlist(void)
990 {
991 qemu_mutex_unlock(&ram_list.mutex);
992 }
993
994 #ifdef __linux__
995
996 #include <sys/vfs.h>
997
998 #define HUGETLBFS_MAGIC 0x958458f6
999
1000 static long gethugepagesize(const char *path)
1001 {
1002 struct statfs fs;
1003 int ret;
1004
1005 do {
1006 ret = statfs(path, &fs);
1007 } while (ret != 0 && errno == EINTR);
1008
1009 if (ret != 0) {
1010 perror(path);
1011 return 0;
1012 }
1013
1014 if (fs.f_type != HUGETLBFS_MAGIC)
1015 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1016
1017 return fs.f_bsize;
1018 }
1019
1020 static void *file_ram_alloc(RAMBlock *block,
1021 ram_addr_t memory,
1022 const char *path,
1023 Error **errp)
1024 {
1025 char *filename;
1026 char *sanitized_name;
1027 char *c;
1028 void *area;
1029 int fd;
1030 unsigned long hpagesize;
1031
1032 hpagesize = gethugepagesize(path);
1033 if (!hpagesize) {
1034 goto error;
1035 }
1036
1037 if (memory < hpagesize) {
1038 return NULL;
1039 }
1040
1041 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1042 error_setg(errp,
1043 "host lacks kvm mmu notifiers, -mem-path unsupported");
1044 goto error;
1045 }
1046
1047 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1048 sanitized_name = g_strdup(block->mr->name);
1049 for (c = sanitized_name; *c != '\0'; c++) {
1050 if (*c == '/')
1051 *c = '_';
1052 }
1053
1054 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1055 sanitized_name);
1056 g_free(sanitized_name);
1057
1058 fd = mkstemp(filename);
1059 if (fd < 0) {
1060 error_setg_errno(errp, errno,
1061 "unable to create backing store for hugepages");
1062 g_free(filename);
1063 goto error;
1064 }
1065 unlink(filename);
1066 g_free(filename);
1067
1068 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1069
1070 /*
1071 * ftruncate is not supported by hugetlbfs in older
1072 * hosts, so don't bother bailing out on errors.
1073 * If anything goes wrong with it under other filesystems,
1074 * mmap will fail.
1075 */
1076 if (ftruncate(fd, memory)) {
1077 perror("ftruncate");
1078 }
1079
1080 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1081 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1082 fd, 0);
1083 if (area == MAP_FAILED) {
1084 error_setg_errno(errp, errno,
1085 "unable to map backing store for hugepages");
1086 close(fd);
1087 goto error;
1088 }
1089
1090 if (mem_prealloc) {
1091 os_mem_prealloc(fd, area, memory);
1092 }
1093
1094 block->fd = fd;
1095 return area;
1096
1097 error:
1098 if (mem_prealloc) {
1099 exit(1);
1100 }
1101 return NULL;
1102 }
1103 #endif
1104
1105 static ram_addr_t find_ram_offset(ram_addr_t size)
1106 {
1107 RAMBlock *block, *next_block;
1108 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1109
1110 assert(size != 0); /* it would hand out same offset multiple times */
1111
1112 if (QTAILQ_EMPTY(&ram_list.blocks))
1113 return 0;
1114
1115 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1116 ram_addr_t end, next = RAM_ADDR_MAX;
1117
1118 end = block->offset + block->length;
1119
1120 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1121 if (next_block->offset >= end) {
1122 next = MIN(next, next_block->offset);
1123 }
1124 }
1125 if (next - end >= size && next - end < mingap) {
1126 offset = end;
1127 mingap = next - end;
1128 }
1129 }
1130
1131 if (offset == RAM_ADDR_MAX) {
1132 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1133 (uint64_t)size);
1134 abort();
1135 }
1136
1137 return offset;
1138 }
1139
1140 ram_addr_t last_ram_offset(void)
1141 {
1142 RAMBlock *block;
1143 ram_addr_t last = 0;
1144
1145 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1146 last = MAX(last, block->offset + block->length);
1147
1148 return last;
1149 }
1150
1151 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1152 {
1153 int ret;
1154
1155 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1156 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1157 "dump-guest-core", true)) {
1158 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1159 if (ret) {
1160 perror("qemu_madvise");
1161 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1162 "but dump_guest_core=off specified\n");
1163 }
1164 }
1165 }
1166
1167 static RAMBlock *find_ram_block(ram_addr_t addr)
1168 {
1169 RAMBlock *block;
1170
1171 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1172 if (block->offset == addr) {
1173 return block;
1174 }
1175 }
1176
1177 return NULL;
1178 }
1179
1180 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1181 {
1182 RAMBlock *new_block = find_ram_block(addr);
1183 RAMBlock *block;
1184
1185 assert(new_block);
1186 assert(!new_block->idstr[0]);
1187
1188 if (dev) {
1189 char *id = qdev_get_dev_path(dev);
1190 if (id) {
1191 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1192 g_free(id);
1193 }
1194 }
1195 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1196
1197 /* This assumes the iothread lock is taken here too. */
1198 qemu_mutex_lock_ramlist();
1199 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1200 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1201 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1202 new_block->idstr);
1203 abort();
1204 }
1205 }
1206 qemu_mutex_unlock_ramlist();
1207 }
1208
1209 void qemu_ram_unset_idstr(ram_addr_t addr)
1210 {
1211 RAMBlock *block = find_ram_block(addr);
1212
1213 if (block) {
1214 memset(block->idstr, 0, sizeof(block->idstr));
1215 }
1216 }
1217
1218 static int memory_try_enable_merging(void *addr, size_t len)
1219 {
1220 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1221 /* disabled by the user */
1222 return 0;
1223 }
1224
1225 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1226 }
1227
1228 static ram_addr_t ram_block_add(RAMBlock *new_block)
1229 {
1230 RAMBlock *block;
1231 ram_addr_t old_ram_size, new_ram_size;
1232
1233 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1234
1235 /* This assumes the iothread lock is taken here too. */
1236 qemu_mutex_lock_ramlist();
1237 new_block->offset = find_ram_offset(new_block->length);
1238
1239 if (!new_block->host) {
1240 if (xen_enabled()) {
1241 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1242 } else {
1243 new_block->host = phys_mem_alloc(new_block->length);
1244 if (!new_block->host) {
1245 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1246 new_block->mr->name, strerror(errno));
1247 exit(1);
1248 }
1249 memory_try_enable_merging(new_block->host, new_block->length);
1250 }
1251 }
1252
1253 /* Keep the list sorted from biggest to smallest block. */
1254 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1255 if (block->length < new_block->length) {
1256 break;
1257 }
1258 }
1259 if (block) {
1260 QTAILQ_INSERT_BEFORE(block, new_block, next);
1261 } else {
1262 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1263 }
1264 ram_list.mru_block = NULL;
1265
1266 ram_list.version++;
1267 qemu_mutex_unlock_ramlist();
1268
1269 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1270
1271 if (new_ram_size > old_ram_size) {
1272 int i;
1273 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1274 ram_list.dirty_memory[i] =
1275 bitmap_zero_extend(ram_list.dirty_memory[i],
1276 old_ram_size, new_ram_size);
1277 }
1278 }
1279 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1280
1281 qemu_ram_setup_dump(new_block->host, new_block->length);
1282 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1283 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1284
1285 if (kvm_enabled()) {
1286 kvm_setup_guest_memory(new_block->host, new_block->length);
1287 }
1288
1289 return new_block->offset;
1290 }
1291
1292 #ifdef __linux__
1293 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1294 bool share, const char *mem_path,
1295 Error **errp)
1296 {
1297 RAMBlock *new_block;
1298
1299 if (xen_enabled()) {
1300 error_setg(errp, "-mem-path not supported with Xen");
1301 return -1;
1302 }
1303
1304 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1305 /*
1306 * file_ram_alloc() needs to allocate just like
1307 * phys_mem_alloc, but we haven't bothered to provide
1308 * a hook there.
1309 */
1310 error_setg(errp,
1311 "-mem-path not supported with this accelerator");
1312 return -1;
1313 }
1314
1315 size = TARGET_PAGE_ALIGN(size);
1316 new_block = g_malloc0(sizeof(*new_block));
1317 new_block->mr = mr;
1318 new_block->length = size;
1319 new_block->flags = share ? RAM_SHARED : 0;
1320 new_block->host = file_ram_alloc(new_block, size,
1321 mem_path, errp);
1322 if (!new_block->host) {
1323 g_free(new_block);
1324 return -1;
1325 }
1326
1327 return ram_block_add(new_block);
1328 }
1329 #endif
1330
1331 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1332 MemoryRegion *mr)
1333 {
1334 RAMBlock *new_block;
1335
1336 size = TARGET_PAGE_ALIGN(size);
1337 new_block = g_malloc0(sizeof(*new_block));
1338 new_block->mr = mr;
1339 new_block->length = size;
1340 new_block->fd = -1;
1341 new_block->host = host;
1342 if (host) {
1343 new_block->flags |= RAM_PREALLOC;
1344 }
1345 return ram_block_add(new_block);
1346 }
1347
1348 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1349 {
1350 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1351 }
1352
1353 void qemu_ram_free_from_ptr(ram_addr_t addr)
1354 {
1355 RAMBlock *block;
1356
1357 /* This assumes the iothread lock is taken here too. */
1358 qemu_mutex_lock_ramlist();
1359 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1360 if (addr == block->offset) {
1361 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1362 ram_list.mru_block = NULL;
1363 ram_list.version++;
1364 g_free(block);
1365 break;
1366 }
1367 }
1368 qemu_mutex_unlock_ramlist();
1369 }
1370
1371 void qemu_ram_free(ram_addr_t addr)
1372 {
1373 RAMBlock *block;
1374
1375 /* This assumes the iothread lock is taken here too. */
1376 qemu_mutex_lock_ramlist();
1377 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378 if (addr == block->offset) {
1379 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1380 ram_list.mru_block = NULL;
1381 ram_list.version++;
1382 if (block->flags & RAM_PREALLOC) {
1383 ;
1384 } else if (xen_enabled()) {
1385 xen_invalidate_map_cache_entry(block->host);
1386 #ifndef _WIN32
1387 } else if (block->fd >= 0) {
1388 munmap(block->host, block->length);
1389 close(block->fd);
1390 #endif
1391 } else {
1392 qemu_anon_ram_free(block->host, block->length);
1393 }
1394 g_free(block);
1395 break;
1396 }
1397 }
1398 qemu_mutex_unlock_ramlist();
1399
1400 }
1401
1402 #ifndef _WIN32
1403 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1404 {
1405 RAMBlock *block;
1406 ram_addr_t offset;
1407 int flags;
1408 void *area, *vaddr;
1409
1410 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1411 offset = addr - block->offset;
1412 if (offset < block->length) {
1413 vaddr = block->host + offset;
1414 if (block->flags & RAM_PREALLOC) {
1415 ;
1416 } else if (xen_enabled()) {
1417 abort();
1418 } else {
1419 flags = MAP_FIXED;
1420 munmap(vaddr, length);
1421 if (block->fd >= 0) {
1422 flags |= (block->flags & RAM_SHARED ?
1423 MAP_SHARED : MAP_PRIVATE);
1424 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1425 flags, block->fd, offset);
1426 } else {
1427 /*
1428 * Remap needs to match alloc. Accelerators that
1429 * set phys_mem_alloc never remap. If they did,
1430 * we'd need a remap hook here.
1431 */
1432 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1433
1434 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1435 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1436 flags, -1, 0);
1437 }
1438 if (area != vaddr) {
1439 fprintf(stderr, "Could not remap addr: "
1440 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1441 length, addr);
1442 exit(1);
1443 }
1444 memory_try_enable_merging(vaddr, length);
1445 qemu_ram_setup_dump(vaddr, length);
1446 }
1447 return;
1448 }
1449 }
1450 }
1451 #endif /* !_WIN32 */
1452
1453 int qemu_get_ram_fd(ram_addr_t addr)
1454 {
1455 RAMBlock *block = qemu_get_ram_block(addr);
1456
1457 return block->fd;
1458 }
1459
1460 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1461 With the exception of the softmmu code in this file, this should
1462 only be used for local memory (e.g. video ram) that the device owns,
1463 and knows it isn't going to access beyond the end of the block.
1464
1465 It should not be used for general purpose DMA.
1466 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1467 */
1468 void *qemu_get_ram_ptr(ram_addr_t addr)
1469 {
1470 RAMBlock *block = qemu_get_ram_block(addr);
1471
1472 if (xen_enabled()) {
1473 /* We need to check if the requested address is in the RAM
1474 * because we don't want to map the entire memory in QEMU.
1475 * In that case just map until the end of the page.
1476 */
1477 if (block->offset == 0) {
1478 return xen_map_cache(addr, 0, 0);
1479 } else if (block->host == NULL) {
1480 block->host =
1481 xen_map_cache(block->offset, block->length, 1);
1482 }
1483 }
1484 return block->host + (addr - block->offset);
1485 }
1486
1487 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1488 * but takes a size argument */
1489 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1490 {
1491 if (*size == 0) {
1492 return NULL;
1493 }
1494 if (xen_enabled()) {
1495 return xen_map_cache(addr, *size, 1);
1496 } else {
1497 RAMBlock *block;
1498
1499 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1500 if (addr - block->offset < block->length) {
1501 if (addr - block->offset + *size > block->length)
1502 *size = block->length - addr + block->offset;
1503 return block->host + (addr - block->offset);
1504 }
1505 }
1506
1507 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1508 abort();
1509 }
1510 }
1511
1512 /* Some of the softmmu routines need to translate from a host pointer
1513 (typically a TLB entry) back to a ram offset. */
1514 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1515 {
1516 RAMBlock *block;
1517 uint8_t *host = ptr;
1518
1519 if (xen_enabled()) {
1520 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1521 return qemu_get_ram_block(*ram_addr)->mr;
1522 }
1523
1524 block = ram_list.mru_block;
1525 if (block && block->host && host - block->host < block->length) {
1526 goto found;
1527 }
1528
1529 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1530 /* This case append when the block is not mapped. */
1531 if (block->host == NULL) {
1532 continue;
1533 }
1534 if (host - block->host < block->length) {
1535 goto found;
1536 }
1537 }
1538
1539 return NULL;
1540
1541 found:
1542 *ram_addr = block->offset + (host - block->host);
1543 return block->mr;
1544 }
1545
1546 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1547 uint64_t val, unsigned size)
1548 {
1549 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1550 tb_invalidate_phys_page_fast(ram_addr, size);
1551 }
1552 switch (size) {
1553 case 1:
1554 stb_p(qemu_get_ram_ptr(ram_addr), val);
1555 break;
1556 case 2:
1557 stw_p(qemu_get_ram_ptr(ram_addr), val);
1558 break;
1559 case 4:
1560 stl_p(qemu_get_ram_ptr(ram_addr), val);
1561 break;
1562 default:
1563 abort();
1564 }
1565 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1566 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1567 /* we remove the notdirty callback only if the code has been
1568 flushed */
1569 if (!cpu_physical_memory_is_clean(ram_addr)) {
1570 CPUArchState *env = current_cpu->env_ptr;
1571 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1572 }
1573 }
1574
1575 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1576 unsigned size, bool is_write)
1577 {
1578 return is_write;
1579 }
1580
1581 static const MemoryRegionOps notdirty_mem_ops = {
1582 .write = notdirty_mem_write,
1583 .valid.accepts = notdirty_mem_accepts,
1584 .endianness = DEVICE_NATIVE_ENDIAN,
1585 };
1586
1587 /* Generate a debug exception if a watchpoint has been hit. */
1588 static void check_watchpoint(int offset, int len_mask, int flags)
1589 {
1590 CPUState *cpu = current_cpu;
1591 CPUArchState *env = cpu->env_ptr;
1592 target_ulong pc, cs_base;
1593 target_ulong vaddr;
1594 CPUWatchpoint *wp;
1595 int cpu_flags;
1596
1597 if (cpu->watchpoint_hit) {
1598 /* We re-entered the check after replacing the TB. Now raise
1599 * the debug interrupt so that is will trigger after the
1600 * current instruction. */
1601 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1602 return;
1603 }
1604 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1605 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1606 if ((vaddr == (wp->vaddr & len_mask) ||
1607 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1608 wp->flags |= BP_WATCHPOINT_HIT;
1609 if (!cpu->watchpoint_hit) {
1610 cpu->watchpoint_hit = wp;
1611 tb_check_watchpoint(cpu);
1612 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1613 cpu->exception_index = EXCP_DEBUG;
1614 cpu_loop_exit(cpu);
1615 } else {
1616 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1617 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1618 cpu_resume_from_signal(cpu, NULL);
1619 }
1620 }
1621 } else {
1622 wp->flags &= ~BP_WATCHPOINT_HIT;
1623 }
1624 }
1625 }
1626
1627 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1628 so these check for a hit then pass through to the normal out-of-line
1629 phys routines. */
1630 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1631 unsigned size)
1632 {
1633 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1634 switch (size) {
1635 case 1: return ldub_phys(&address_space_memory, addr);
1636 case 2: return lduw_phys(&address_space_memory, addr);
1637 case 4: return ldl_phys(&address_space_memory, addr);
1638 default: abort();
1639 }
1640 }
1641
1642 static void watch_mem_write(void *opaque, hwaddr addr,
1643 uint64_t val, unsigned size)
1644 {
1645 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1646 switch (size) {
1647 case 1:
1648 stb_phys(&address_space_memory, addr, val);
1649 break;
1650 case 2:
1651 stw_phys(&address_space_memory, addr, val);
1652 break;
1653 case 4:
1654 stl_phys(&address_space_memory, addr, val);
1655 break;
1656 default: abort();
1657 }
1658 }
1659
1660 static const MemoryRegionOps watch_mem_ops = {
1661 .read = watch_mem_read,
1662 .write = watch_mem_write,
1663 .endianness = DEVICE_NATIVE_ENDIAN,
1664 };
1665
1666 static uint64_t subpage_read(void *opaque, hwaddr addr,
1667 unsigned len)
1668 {
1669 subpage_t *subpage = opaque;
1670 uint8_t buf[4];
1671
1672 #if defined(DEBUG_SUBPAGE)
1673 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1674 subpage, len, addr);
1675 #endif
1676 address_space_read(subpage->as, addr + subpage->base, buf, len);
1677 switch (len) {
1678 case 1:
1679 return ldub_p(buf);
1680 case 2:
1681 return lduw_p(buf);
1682 case 4:
1683 return ldl_p(buf);
1684 default:
1685 abort();
1686 }
1687 }
1688
1689 static void subpage_write(void *opaque, hwaddr addr,
1690 uint64_t value, unsigned len)
1691 {
1692 subpage_t *subpage = opaque;
1693 uint8_t buf[4];
1694
1695 #if defined(DEBUG_SUBPAGE)
1696 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1697 " value %"PRIx64"\n",
1698 __func__, subpage, len, addr, value);
1699 #endif
1700 switch (len) {
1701 case 1:
1702 stb_p(buf, value);
1703 break;
1704 case 2:
1705 stw_p(buf, value);
1706 break;
1707 case 4:
1708 stl_p(buf, value);
1709 break;
1710 default:
1711 abort();
1712 }
1713 address_space_write(subpage->as, addr + subpage->base, buf, len);
1714 }
1715
1716 static bool subpage_accepts(void *opaque, hwaddr addr,
1717 unsigned len, bool is_write)
1718 {
1719 subpage_t *subpage = opaque;
1720 #if defined(DEBUG_SUBPAGE)
1721 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1722 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1723 #endif
1724
1725 return address_space_access_valid(subpage->as, addr + subpage->base,
1726 len, is_write);
1727 }
1728
1729 static const MemoryRegionOps subpage_ops = {
1730 .read = subpage_read,
1731 .write = subpage_write,
1732 .valid.accepts = subpage_accepts,
1733 .endianness = DEVICE_NATIVE_ENDIAN,
1734 };
1735
1736 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1737 uint16_t section)
1738 {
1739 int idx, eidx;
1740
1741 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1742 return -1;
1743 idx = SUBPAGE_IDX(start);
1744 eidx = SUBPAGE_IDX(end);
1745 #if defined(DEBUG_SUBPAGE)
1746 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1747 __func__, mmio, start, end, idx, eidx, section);
1748 #endif
1749 for (; idx <= eidx; idx++) {
1750 mmio->sub_section[idx] = section;
1751 }
1752
1753 return 0;
1754 }
1755
1756 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1757 {
1758 subpage_t *mmio;
1759
1760 mmio = g_malloc0(sizeof(subpage_t));
1761
1762 mmio->as = as;
1763 mmio->base = base;
1764 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1765 "subpage", TARGET_PAGE_SIZE);
1766 mmio->iomem.subpage = true;
1767 #if defined(DEBUG_SUBPAGE)
1768 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1769 mmio, base, TARGET_PAGE_SIZE);
1770 #endif
1771 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1772
1773 return mmio;
1774 }
1775
1776 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1777 MemoryRegion *mr)
1778 {
1779 assert(as);
1780 MemoryRegionSection section = {
1781 .address_space = as,
1782 .mr = mr,
1783 .offset_within_address_space = 0,
1784 .offset_within_region = 0,
1785 .size = int128_2_64(),
1786 };
1787
1788 return phys_section_add(map, &section);
1789 }
1790
1791 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1792 {
1793 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1794 }
1795
1796 static void io_mem_init(void)
1797 {
1798 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1799 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1800 "unassigned", UINT64_MAX);
1801 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1802 "notdirty", UINT64_MAX);
1803 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1804 "watch", UINT64_MAX);
1805 }
1806
1807 static void mem_begin(MemoryListener *listener)
1808 {
1809 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1810 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1811 uint16_t n;
1812
1813 n = dummy_section(&d->map, as, &io_mem_unassigned);
1814 assert(n == PHYS_SECTION_UNASSIGNED);
1815 n = dummy_section(&d->map, as, &io_mem_notdirty);
1816 assert(n == PHYS_SECTION_NOTDIRTY);
1817 n = dummy_section(&d->map, as, &io_mem_rom);
1818 assert(n == PHYS_SECTION_ROM);
1819 n = dummy_section(&d->map, as, &io_mem_watch);
1820 assert(n == PHYS_SECTION_WATCH);
1821
1822 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1823 d->as = as;
1824 as->next_dispatch = d;
1825 }
1826
1827 static void mem_commit(MemoryListener *listener)
1828 {
1829 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1830 AddressSpaceDispatch *cur = as->dispatch;
1831 AddressSpaceDispatch *next = as->next_dispatch;
1832
1833 phys_page_compact_all(next, next->map.nodes_nb);
1834
1835 as->dispatch = next;
1836
1837 if (cur) {
1838 phys_sections_free(&cur->map);
1839 g_free(cur);
1840 }
1841 }
1842
1843 static void tcg_commit(MemoryListener *listener)
1844 {
1845 CPUState *cpu;
1846
1847 /* since each CPU stores ram addresses in its TLB cache, we must
1848 reset the modified entries */
1849 /* XXX: slow ! */
1850 CPU_FOREACH(cpu) {
1851 /* FIXME: Disentangle the cpu.h circular files deps so we can
1852 directly get the right CPU from listener. */
1853 if (cpu->tcg_as_listener != listener) {
1854 continue;
1855 }
1856 tlb_flush(cpu, 1);
1857 }
1858 }
1859
1860 static void core_log_global_start(MemoryListener *listener)
1861 {
1862 cpu_physical_memory_set_dirty_tracking(true);
1863 }
1864
1865 static void core_log_global_stop(MemoryListener *listener)
1866 {
1867 cpu_physical_memory_set_dirty_tracking(false);
1868 }
1869
1870 static MemoryListener core_memory_listener = {
1871 .log_global_start = core_log_global_start,
1872 .log_global_stop = core_log_global_stop,
1873 .priority = 1,
1874 };
1875
1876 void address_space_init_dispatch(AddressSpace *as)
1877 {
1878 as->dispatch = NULL;
1879 as->dispatch_listener = (MemoryListener) {
1880 .begin = mem_begin,
1881 .commit = mem_commit,
1882 .region_add = mem_add,
1883 .region_nop = mem_add,
1884 .priority = 0,
1885 };
1886 memory_listener_register(&as->dispatch_listener, as);
1887 }
1888
1889 void address_space_destroy_dispatch(AddressSpace *as)
1890 {
1891 AddressSpaceDispatch *d = as->dispatch;
1892
1893 memory_listener_unregister(&as->dispatch_listener);
1894 g_free(d);
1895 as->dispatch = NULL;
1896 }
1897
1898 static void memory_map_init(void)
1899 {
1900 system_memory = g_malloc(sizeof(*system_memory));
1901
1902 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1903 address_space_init(&address_space_memory, system_memory, "memory");
1904
1905 system_io = g_malloc(sizeof(*system_io));
1906 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1907 65536);
1908 address_space_init(&address_space_io, system_io, "I/O");
1909
1910 memory_listener_register(&core_memory_listener, &address_space_memory);
1911 }
1912
1913 MemoryRegion *get_system_memory(void)
1914 {
1915 return system_memory;
1916 }
1917
1918 MemoryRegion *get_system_io(void)
1919 {
1920 return system_io;
1921 }
1922
1923 #endif /* !defined(CONFIG_USER_ONLY) */
1924
1925 /* physical memory access (slow version, mainly for debug) */
1926 #if defined(CONFIG_USER_ONLY)
1927 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1928 uint8_t *buf, int len, int is_write)
1929 {
1930 int l, flags;
1931 target_ulong page;
1932 void * p;
1933
1934 while (len > 0) {
1935 page = addr & TARGET_PAGE_MASK;
1936 l = (page + TARGET_PAGE_SIZE) - addr;
1937 if (l > len)
1938 l = len;
1939 flags = page_get_flags(page);
1940 if (!(flags & PAGE_VALID))
1941 return -1;
1942 if (is_write) {
1943 if (!(flags & PAGE_WRITE))
1944 return -1;
1945 /* XXX: this code should not depend on lock_user */
1946 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1947 return -1;
1948 memcpy(p, buf, l);
1949 unlock_user(p, addr, l);
1950 } else {
1951 if (!(flags & PAGE_READ))
1952 return -1;
1953 /* XXX: this code should not depend on lock_user */
1954 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1955 return -1;
1956 memcpy(buf, p, l);
1957 unlock_user(p, addr, 0);
1958 }
1959 len -= l;
1960 buf += l;
1961 addr += l;
1962 }
1963 return 0;
1964 }
1965
1966 #else
1967
1968 static void invalidate_and_set_dirty(hwaddr addr,
1969 hwaddr length)
1970 {
1971 if (cpu_physical_memory_is_clean(addr)) {
1972 /* invalidate code */
1973 tb_invalidate_phys_page_range(addr, addr + length, 0);
1974 /* set dirty bit */
1975 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1976 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1977 }
1978 xen_modified_memory(addr, length);
1979 }
1980
1981 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1982 {
1983 unsigned access_size_max = mr->ops->valid.max_access_size;
1984
1985 /* Regions are assumed to support 1-4 byte accesses unless
1986 otherwise specified. */
1987 if (access_size_max == 0) {
1988 access_size_max = 4;
1989 }
1990
1991 /* Bound the maximum access by the alignment of the address. */
1992 if (!mr->ops->impl.unaligned) {
1993 unsigned align_size_max = addr & -addr;
1994 if (align_size_max != 0 && align_size_max < access_size_max) {
1995 access_size_max = align_size_max;
1996 }
1997 }
1998
1999 /* Don't attempt accesses larger than the maximum. */
2000 if (l > access_size_max) {
2001 l = access_size_max;
2002 }
2003 if (l & (l - 1)) {
2004 l = 1 << (qemu_fls(l) - 1);
2005 }
2006
2007 return l;
2008 }
2009
2010 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2011 int len, bool is_write)
2012 {
2013 hwaddr l;
2014 uint8_t *ptr;
2015 uint64_t val;
2016 hwaddr addr1;
2017 MemoryRegion *mr;
2018 bool error = false;
2019
2020 while (len > 0) {
2021 l = len;
2022 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2023
2024 if (is_write) {
2025 if (!memory_access_is_direct(mr, is_write)) {
2026 l = memory_access_size(mr, l, addr1);
2027 /* XXX: could force current_cpu to NULL to avoid
2028 potential bugs */
2029 switch (l) {
2030 case 8:
2031 /* 64 bit write access */
2032 val = ldq_p(buf);
2033 error |= io_mem_write(mr, addr1, val, 8);
2034 break;
2035 case 4:
2036 /* 32 bit write access */
2037 val = ldl_p(buf);
2038 error |= io_mem_write(mr, addr1, val, 4);
2039 break;
2040 case 2:
2041 /* 16 bit write access */
2042 val = lduw_p(buf);
2043 error |= io_mem_write(mr, addr1, val, 2);
2044 break;
2045 case 1:
2046 /* 8 bit write access */
2047 val = ldub_p(buf);
2048 error |= io_mem_write(mr, addr1, val, 1);
2049 break;
2050 default:
2051 abort();
2052 }
2053 } else {
2054 addr1 += memory_region_get_ram_addr(mr);
2055 /* RAM case */
2056 ptr = qemu_get_ram_ptr(addr1);
2057 memcpy(ptr, buf, l);
2058 invalidate_and_set_dirty(addr1, l);
2059 }
2060 } else {
2061 if (!memory_access_is_direct(mr, is_write)) {
2062 /* I/O case */
2063 l = memory_access_size(mr, l, addr1);
2064 switch (l) {
2065 case 8:
2066 /* 64 bit read access */
2067 error |= io_mem_read(mr, addr1, &val, 8);
2068 stq_p(buf, val);
2069 break;
2070 case 4:
2071 /* 32 bit read access */
2072 error |= io_mem_read(mr, addr1, &val, 4);
2073 stl_p(buf, val);
2074 break;
2075 case 2:
2076 /* 16 bit read access */
2077 error |= io_mem_read(mr, addr1, &val, 2);
2078 stw_p(buf, val);
2079 break;
2080 case 1:
2081 /* 8 bit read access */
2082 error |= io_mem_read(mr, addr1, &val, 1);
2083 stb_p(buf, val);
2084 break;
2085 default:
2086 abort();
2087 }
2088 } else {
2089 /* RAM case */
2090 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2091 memcpy(buf, ptr, l);
2092 }
2093 }
2094 len -= l;
2095 buf += l;
2096 addr += l;
2097 }
2098
2099 return error;
2100 }
2101
2102 bool address_space_write(AddressSpace *as, hwaddr addr,
2103 const uint8_t *buf, int len)
2104 {
2105 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2106 }
2107
2108 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2109 {
2110 return address_space_rw(as, addr, buf, len, false);
2111 }
2112
2113
2114 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2115 int len, int is_write)
2116 {
2117 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2118 }
2119
2120 enum write_rom_type {
2121 WRITE_DATA,
2122 FLUSH_CACHE,
2123 };
2124
2125 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2126 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2127 {
2128 hwaddr l;
2129 uint8_t *ptr;
2130 hwaddr addr1;
2131 MemoryRegion *mr;
2132
2133 while (len > 0) {
2134 l = len;
2135 mr = address_space_translate(as, addr, &addr1, &l, true);
2136
2137 if (!(memory_region_is_ram(mr) ||
2138 memory_region_is_romd(mr))) {
2139 /* do nothing */
2140 } else {
2141 addr1 += memory_region_get_ram_addr(mr);
2142 /* ROM/RAM case */
2143 ptr = qemu_get_ram_ptr(addr1);
2144 switch (type) {
2145 case WRITE_DATA:
2146 memcpy(ptr, buf, l);
2147 invalidate_and_set_dirty(addr1, l);
2148 break;
2149 case FLUSH_CACHE:
2150 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2151 break;
2152 }
2153 }
2154 len -= l;
2155 buf += l;
2156 addr += l;
2157 }
2158 }
2159
2160 /* used for ROM loading : can write in RAM and ROM */
2161 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2162 const uint8_t *buf, int len)
2163 {
2164 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2165 }
2166
2167 void cpu_flush_icache_range(hwaddr start, int len)
2168 {
2169 /*
2170 * This function should do the same thing as an icache flush that was
2171 * triggered from within the guest. For TCG we are always cache coherent,
2172 * so there is no need to flush anything. For KVM / Xen we need to flush
2173 * the host's instruction cache at least.
2174 */
2175 if (tcg_enabled()) {
2176 return;
2177 }
2178
2179 cpu_physical_memory_write_rom_internal(&address_space_memory,
2180 start, NULL, len, FLUSH_CACHE);
2181 }
2182
2183 typedef struct {
2184 MemoryRegion *mr;
2185 void *buffer;
2186 hwaddr addr;
2187 hwaddr len;
2188 } BounceBuffer;
2189
2190 static BounceBuffer bounce;
2191
2192 typedef struct MapClient {
2193 void *opaque;
2194 void (*callback)(void *opaque);
2195 QLIST_ENTRY(MapClient) link;
2196 } MapClient;
2197
2198 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2199 = QLIST_HEAD_INITIALIZER(map_client_list);
2200
2201 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2202 {
2203 MapClient *client = g_malloc(sizeof(*client));
2204
2205 client->opaque = opaque;
2206 client->callback = callback;
2207 QLIST_INSERT_HEAD(&map_client_list, client, link);
2208 return client;
2209 }
2210
2211 static void cpu_unregister_map_client(void *_client)
2212 {
2213 MapClient *client = (MapClient *)_client;
2214
2215 QLIST_REMOVE(client, link);
2216 g_free(client);
2217 }
2218
2219 static void cpu_notify_map_clients(void)
2220 {
2221 MapClient *client;
2222
2223 while (!QLIST_EMPTY(&map_client_list)) {
2224 client = QLIST_FIRST(&map_client_list);
2225 client->callback(client->opaque);
2226 cpu_unregister_map_client(client);
2227 }
2228 }
2229
2230 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2231 {
2232 MemoryRegion *mr;
2233 hwaddr l, xlat;
2234
2235 while (len > 0) {
2236 l = len;
2237 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2238 if (!memory_access_is_direct(mr, is_write)) {
2239 l = memory_access_size(mr, l, addr);
2240 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2241 return false;
2242 }
2243 }
2244
2245 len -= l;
2246 addr += l;
2247 }
2248 return true;
2249 }
2250
2251 /* Map a physical memory region into a host virtual address.
2252 * May map a subset of the requested range, given by and returned in *plen.
2253 * May return NULL if resources needed to perform the mapping are exhausted.
2254 * Use only for reads OR writes - not for read-modify-write operations.
2255 * Use cpu_register_map_client() to know when retrying the map operation is
2256 * likely to succeed.
2257 */
2258 void *address_space_map(AddressSpace *as,
2259 hwaddr addr,
2260 hwaddr *plen,
2261 bool is_write)
2262 {
2263 hwaddr len = *plen;
2264 hwaddr done = 0;
2265 hwaddr l, xlat, base;
2266 MemoryRegion *mr, *this_mr;
2267 ram_addr_t raddr;
2268
2269 if (len == 0) {
2270 return NULL;
2271 }
2272
2273 l = len;
2274 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2275 if (!memory_access_is_direct(mr, is_write)) {
2276 if (bounce.buffer) {
2277 return NULL;
2278 }
2279 /* Avoid unbounded allocations */
2280 l = MIN(l, TARGET_PAGE_SIZE);
2281 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2282 bounce.addr = addr;
2283 bounce.len = l;
2284
2285 memory_region_ref(mr);
2286 bounce.mr = mr;
2287 if (!is_write) {
2288 address_space_read(as, addr, bounce.buffer, l);
2289 }
2290
2291 *plen = l;
2292 return bounce.buffer;
2293 }
2294
2295 base = xlat;
2296 raddr = memory_region_get_ram_addr(mr);
2297
2298 for (;;) {
2299 len -= l;
2300 addr += l;
2301 done += l;
2302 if (len == 0) {
2303 break;
2304 }
2305
2306 l = len;
2307 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2308 if (this_mr != mr || xlat != base + done) {
2309 break;
2310 }
2311 }
2312
2313 memory_region_ref(mr);
2314 *plen = done;
2315 return qemu_ram_ptr_length(raddr + base, plen);
2316 }
2317
2318 /* Unmaps a memory region previously mapped by address_space_map().
2319 * Will also mark the memory as dirty if is_write == 1. access_len gives
2320 * the amount of memory that was actually read or written by the caller.
2321 */
2322 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2323 int is_write, hwaddr access_len)
2324 {
2325 if (buffer != bounce.buffer) {
2326 MemoryRegion *mr;
2327 ram_addr_t addr1;
2328
2329 mr = qemu_ram_addr_from_host(buffer, &addr1);
2330 assert(mr != NULL);
2331 if (is_write) {
2332 while (access_len) {
2333 unsigned l;
2334 l = TARGET_PAGE_SIZE;
2335 if (l > access_len)
2336 l = access_len;
2337 invalidate_and_set_dirty(addr1, l);
2338 addr1 += l;
2339 access_len -= l;
2340 }
2341 }
2342 if (xen_enabled()) {
2343 xen_invalidate_map_cache_entry(buffer);
2344 }
2345 memory_region_unref(mr);
2346 return;
2347 }
2348 if (is_write) {
2349 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2350 }
2351 qemu_vfree(bounce.buffer);
2352 bounce.buffer = NULL;
2353 memory_region_unref(bounce.mr);
2354 cpu_notify_map_clients();
2355 }
2356
2357 void *cpu_physical_memory_map(hwaddr addr,
2358 hwaddr *plen,
2359 int is_write)
2360 {
2361 return address_space_map(&address_space_memory, addr, plen, is_write);
2362 }
2363
2364 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2365 int is_write, hwaddr access_len)
2366 {
2367 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2368 }
2369
2370 /* warning: addr must be aligned */
2371 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2372 enum device_endian endian)
2373 {
2374 uint8_t *ptr;
2375 uint64_t val;
2376 MemoryRegion *mr;
2377 hwaddr l = 4;
2378 hwaddr addr1;
2379
2380 mr = address_space_translate(as, addr, &addr1, &l, false);
2381 if (l < 4 || !memory_access_is_direct(mr, false)) {
2382 /* I/O case */
2383 io_mem_read(mr, addr1, &val, 4);
2384 #if defined(TARGET_WORDS_BIGENDIAN)
2385 if (endian == DEVICE_LITTLE_ENDIAN) {
2386 val = bswap32(val);
2387 }
2388 #else
2389 if (endian == DEVICE_BIG_ENDIAN) {
2390 val = bswap32(val);
2391 }
2392 #endif
2393 } else {
2394 /* RAM case */
2395 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2396 & TARGET_PAGE_MASK)
2397 + addr1);
2398 switch (endian) {
2399 case DEVICE_LITTLE_ENDIAN:
2400 val = ldl_le_p(ptr);
2401 break;
2402 case DEVICE_BIG_ENDIAN:
2403 val = ldl_be_p(ptr);
2404 break;
2405 default:
2406 val = ldl_p(ptr);
2407 break;
2408 }
2409 }
2410 return val;
2411 }
2412
2413 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2414 {
2415 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2416 }
2417
2418 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2419 {
2420 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2421 }
2422
2423 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2424 {
2425 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2426 }
2427
2428 /* warning: addr must be aligned */
2429 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2430 enum device_endian endian)
2431 {
2432 uint8_t *ptr;
2433 uint64_t val;
2434 MemoryRegion *mr;
2435 hwaddr l = 8;
2436 hwaddr addr1;
2437
2438 mr = address_space_translate(as, addr, &addr1, &l,
2439 false);
2440 if (l < 8 || !memory_access_is_direct(mr, false)) {
2441 /* I/O case */
2442 io_mem_read(mr, addr1, &val, 8);
2443 #if defined(TARGET_WORDS_BIGENDIAN)
2444 if (endian == DEVICE_LITTLE_ENDIAN) {
2445 val = bswap64(val);
2446 }
2447 #else
2448 if (endian == DEVICE_BIG_ENDIAN) {
2449 val = bswap64(val);
2450 }
2451 #endif
2452 } else {
2453 /* RAM case */
2454 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2455 & TARGET_PAGE_MASK)
2456 + addr1);
2457 switch (endian) {
2458 case DEVICE_LITTLE_ENDIAN:
2459 val = ldq_le_p(ptr);
2460 break;
2461 case DEVICE_BIG_ENDIAN:
2462 val = ldq_be_p(ptr);
2463 break;
2464 default:
2465 val = ldq_p(ptr);
2466 break;
2467 }
2468 }
2469 return val;
2470 }
2471
2472 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2473 {
2474 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2475 }
2476
2477 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2478 {
2479 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2480 }
2481
2482 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2483 {
2484 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2485 }
2486
2487 /* XXX: optimize */
2488 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2489 {
2490 uint8_t val;
2491 address_space_rw(as, addr, &val, 1, 0);
2492 return val;
2493 }
2494
2495 /* warning: addr must be aligned */
2496 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2497 enum device_endian endian)
2498 {
2499 uint8_t *ptr;
2500 uint64_t val;
2501 MemoryRegion *mr;
2502 hwaddr l = 2;
2503 hwaddr addr1;
2504
2505 mr = address_space_translate(as, addr, &addr1, &l,
2506 false);
2507 if (l < 2 || !memory_access_is_direct(mr, false)) {
2508 /* I/O case */
2509 io_mem_read(mr, addr1, &val, 2);
2510 #if defined(TARGET_WORDS_BIGENDIAN)
2511 if (endian == DEVICE_LITTLE_ENDIAN) {
2512 val = bswap16(val);
2513 }
2514 #else
2515 if (endian == DEVICE_BIG_ENDIAN) {
2516 val = bswap16(val);
2517 }
2518 #endif
2519 } else {
2520 /* RAM case */
2521 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2522 & TARGET_PAGE_MASK)
2523 + addr1);
2524 switch (endian) {
2525 case DEVICE_LITTLE_ENDIAN:
2526 val = lduw_le_p(ptr);
2527 break;
2528 case DEVICE_BIG_ENDIAN:
2529 val = lduw_be_p(ptr);
2530 break;
2531 default:
2532 val = lduw_p(ptr);
2533 break;
2534 }
2535 }
2536 return val;
2537 }
2538
2539 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2540 {
2541 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2542 }
2543
2544 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2545 {
2546 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2547 }
2548
2549 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2550 {
2551 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2552 }
2553
2554 /* warning: addr must be aligned. The ram page is not masked as dirty
2555 and the code inside is not invalidated. It is useful if the dirty
2556 bits are used to track modified PTEs */
2557 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2558 {
2559 uint8_t *ptr;
2560 MemoryRegion *mr;
2561 hwaddr l = 4;
2562 hwaddr addr1;
2563
2564 mr = address_space_translate(as, addr, &addr1, &l,
2565 true);
2566 if (l < 4 || !memory_access_is_direct(mr, true)) {
2567 io_mem_write(mr, addr1, val, 4);
2568 } else {
2569 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2570 ptr = qemu_get_ram_ptr(addr1);
2571 stl_p(ptr, val);
2572
2573 if (unlikely(in_migration)) {
2574 if (cpu_physical_memory_is_clean(addr1)) {
2575 /* invalidate code */
2576 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2577 /* set dirty bit */
2578 cpu_physical_memory_set_dirty_flag(addr1,
2579 DIRTY_MEMORY_MIGRATION);
2580 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2581 }
2582 }
2583 }
2584 }
2585
2586 /* warning: addr must be aligned */
2587 static inline void stl_phys_internal(AddressSpace *as,
2588 hwaddr addr, uint32_t val,
2589 enum device_endian endian)
2590 {
2591 uint8_t *ptr;
2592 MemoryRegion *mr;
2593 hwaddr l = 4;
2594 hwaddr addr1;
2595
2596 mr = address_space_translate(as, addr, &addr1, &l,
2597 true);
2598 if (l < 4 || !memory_access_is_direct(mr, true)) {
2599 #if defined(TARGET_WORDS_BIGENDIAN)
2600 if (endian == DEVICE_LITTLE_ENDIAN) {
2601 val = bswap32(val);
2602 }
2603 #else
2604 if (endian == DEVICE_BIG_ENDIAN) {
2605 val = bswap32(val);
2606 }
2607 #endif
2608 io_mem_write(mr, addr1, val, 4);
2609 } else {
2610 /* RAM case */
2611 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2612 ptr = qemu_get_ram_ptr(addr1);
2613 switch (endian) {
2614 case DEVICE_LITTLE_ENDIAN:
2615 stl_le_p(ptr, val);
2616 break;
2617 case DEVICE_BIG_ENDIAN:
2618 stl_be_p(ptr, val);
2619 break;
2620 default:
2621 stl_p(ptr, val);
2622 break;
2623 }
2624 invalidate_and_set_dirty(addr1, 4);
2625 }
2626 }
2627
2628 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2629 {
2630 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2631 }
2632
2633 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2634 {
2635 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2636 }
2637
2638 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2639 {
2640 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2641 }
2642
2643 /* XXX: optimize */
2644 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2645 {
2646 uint8_t v = val;
2647 address_space_rw(as, addr, &v, 1, 1);
2648 }
2649
2650 /* warning: addr must be aligned */
2651 static inline void stw_phys_internal(AddressSpace *as,
2652 hwaddr addr, uint32_t val,
2653 enum device_endian endian)
2654 {
2655 uint8_t *ptr;
2656 MemoryRegion *mr;
2657 hwaddr l = 2;
2658 hwaddr addr1;
2659
2660 mr = address_space_translate(as, addr, &addr1, &l, true);
2661 if (l < 2 || !memory_access_is_direct(mr, true)) {
2662 #if defined(TARGET_WORDS_BIGENDIAN)
2663 if (endian == DEVICE_LITTLE_ENDIAN) {
2664 val = bswap16(val);
2665 }
2666 #else
2667 if (endian == DEVICE_BIG_ENDIAN) {
2668 val = bswap16(val);
2669 }
2670 #endif
2671 io_mem_write(mr, addr1, val, 2);
2672 } else {
2673 /* RAM case */
2674 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2675 ptr = qemu_get_ram_ptr(addr1);
2676 switch (endian) {
2677 case DEVICE_LITTLE_ENDIAN:
2678 stw_le_p(ptr, val);
2679 break;
2680 case DEVICE_BIG_ENDIAN:
2681 stw_be_p(ptr, val);
2682 break;
2683 default:
2684 stw_p(ptr, val);
2685 break;
2686 }
2687 invalidate_and_set_dirty(addr1, 2);
2688 }
2689 }
2690
2691 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2692 {
2693 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2694 }
2695
2696 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2697 {
2698 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2699 }
2700
2701 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2702 {
2703 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2704 }
2705
2706 /* XXX: optimize */
2707 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2708 {
2709 val = tswap64(val);
2710 address_space_rw(as, addr, (void *) &val, 8, 1);
2711 }
2712
2713 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2714 {
2715 val = cpu_to_le64(val);
2716 address_space_rw(as, addr, (void *) &val, 8, 1);
2717 }
2718
2719 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2720 {
2721 val = cpu_to_be64(val);
2722 address_space_rw(as, addr, (void *) &val, 8, 1);
2723 }
2724
2725 /* virtual memory access for debug (includes writing to ROM) */
2726 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2727 uint8_t *buf, int len, int is_write)
2728 {
2729 int l;
2730 hwaddr phys_addr;
2731 target_ulong page;
2732
2733 while (len > 0) {
2734 page = addr & TARGET_PAGE_MASK;
2735 phys_addr = cpu_get_phys_page_debug(cpu, page);
2736 /* if no physical page mapped, return an error */
2737 if (phys_addr == -1)
2738 return -1;
2739 l = (page + TARGET_PAGE_SIZE) - addr;
2740 if (l > len)
2741 l = len;
2742 phys_addr += (addr & ~TARGET_PAGE_MASK);
2743 if (is_write) {
2744 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2745 } else {
2746 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2747 }
2748 len -= l;
2749 buf += l;
2750 addr += l;
2751 }
2752 return 0;
2753 }
2754 #endif
2755
2756 #if !defined(CONFIG_USER_ONLY)
2757
2758 /*
2759 * A helper function for the _utterly broken_ virtio device model to find out if
2760 * it's running on a big endian machine. Don't do this at home kids!
2761 */
2762 bool virtio_is_big_endian(void);
2763 bool virtio_is_big_endian(void)
2764 {
2765 #if defined(TARGET_WORDS_BIGENDIAN)
2766 return true;
2767 #else
2768 return false;
2769 #endif
2770 }
2771
2772 #endif
2773
2774 #ifndef CONFIG_USER_ONLY
2775 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2776 {
2777 MemoryRegion*mr;
2778 hwaddr l = 1;
2779
2780 mr = address_space_translate(&address_space_memory,
2781 phys_addr, &phys_addr, &l, false);
2782
2783 return !(memory_region_is_ram(mr) ||
2784 memory_region_is_romd(mr));
2785 }
2786
2787 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2788 {
2789 RAMBlock *block;
2790
2791 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2792 func(block->host, block->offset, block->length, opaque);
2793 }
2794 }
2795 #endif