]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
memory: MemoryRegion: QOMify
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53
54 #include "qemu/range.h"
55
56 //#define DEBUG_SUBPAGE
57
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
60
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
62
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
65
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
68
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
71
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
74
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
77
78 #endif
79
80 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
81 /* current CPU in the current thread. It is only valid inside
82 cpu_exec() */
83 DEFINE_TLS(CPUState *, current_cpu);
84 /* 0 = Do not count executed instructions.
85 1 = Precise instruction counting.
86 2 = Adaptive rate instruction counting. */
87 int use_icount;
88
89 #if !defined(CONFIG_USER_ONLY)
90
91 typedef struct PhysPageEntry PhysPageEntry;
92
93 struct PhysPageEntry {
94 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
95 uint32_t skip : 6;
96 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
97 uint32_t ptr : 26;
98 };
99
100 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
101
102 /* Size of the L2 (and L3, etc) page tables. */
103 #define ADDR_SPACE_BITS 64
104
105 #define P_L2_BITS 9
106 #define P_L2_SIZE (1 << P_L2_BITS)
107
108 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
109
110 typedef PhysPageEntry Node[P_L2_SIZE];
111
112 typedef struct PhysPageMap {
113 unsigned sections_nb;
114 unsigned sections_nb_alloc;
115 unsigned nodes_nb;
116 unsigned nodes_nb_alloc;
117 Node *nodes;
118 MemoryRegionSection *sections;
119 } PhysPageMap;
120
121 struct AddressSpaceDispatch {
122 /* This is a multi-level map on the physical address space.
123 * The bottom level has pointers to MemoryRegionSections.
124 */
125 PhysPageEntry phys_map;
126 PhysPageMap map;
127 AddressSpace *as;
128 };
129
130 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
131 typedef struct subpage_t {
132 MemoryRegion iomem;
133 AddressSpace *as;
134 hwaddr base;
135 uint16_t sub_section[TARGET_PAGE_SIZE];
136 } subpage_t;
137
138 #define PHYS_SECTION_UNASSIGNED 0
139 #define PHYS_SECTION_NOTDIRTY 1
140 #define PHYS_SECTION_ROM 2
141 #define PHYS_SECTION_WATCH 3
142
143 static void io_mem_init(void);
144 static void memory_map_init(void);
145 static void tcg_commit(MemoryListener *listener);
146
147 static MemoryRegion io_mem_watch;
148 #endif
149
150 #if !defined(CONFIG_USER_ONLY)
151
152 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
153 {
154 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
155 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
157 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
158 }
159 }
160
161 static uint32_t phys_map_node_alloc(PhysPageMap *map)
162 {
163 unsigned i;
164 uint32_t ret;
165
166 ret = map->nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != map->nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 map->nodes[ret][i].skip = 1;
171 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
172 }
173 return ret;
174 }
175
176 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
177 hwaddr *index, hwaddr *nb, uint16_t leaf,
178 int level)
179 {
180 PhysPageEntry *p;
181 int i;
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
183
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc(map);
186 p = map->nodes[lp->ptr];
187 if (level == 0) {
188 for (i = 0; i < P_L2_SIZE; i++) {
189 p[i].skip = 0;
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
191 }
192 }
193 } else {
194 p = map->nodes[lp->ptr];
195 }
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
197
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 lp->skip = 0;
201 lp->ptr = leaf;
202 *index += step;
203 *nb -= step;
204 } else {
205 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
206 }
207 ++lp;
208 }
209 }
210
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
213 uint16_t leaf)
214 {
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
217
218 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
219 }
220
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
223 */
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
225 {
226 unsigned valid_ptr = P_L2_SIZE;
227 int valid = 0;
228 PhysPageEntry *p;
229 int i;
230
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
232 return;
233 }
234
235 p = nodes[lp->ptr];
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 continue;
239 }
240
241 valid_ptr = i;
242 valid++;
243 if (p[i].skip) {
244 phys_page_compact(&p[i], nodes, compacted);
245 }
246 }
247
248 /* We can only compress if there's only one child. */
249 if (valid != 1) {
250 return;
251 }
252
253 assert(valid_ptr < P_L2_SIZE);
254
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
257 return;
258 }
259
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
266 * change this rule.
267 */
268 lp->skip = 0;
269 } else {
270 lp->skip += p[valid_ptr].skip;
271 }
272 }
273
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
275 {
276 DECLARE_BITMAP(compacted, nodes_nb);
277
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
280 }
281 }
282
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
285 {
286 PhysPageEntry *p;
287 hwaddr index = addr >> TARGET_PAGE_BITS;
288 int i;
289
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return &sections[PHYS_SECTION_UNASSIGNED];
293 }
294 p = nodes[lp.ptr];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
296 }
297
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return &sections[lp.ptr];
302 } else {
303 return &sections[PHYS_SECTION_UNASSIGNED];
304 }
305 }
306
307 bool memory_region_is_unassigned(MemoryRegion *mr)
308 {
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
311 }
312
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
314 hwaddr addr,
315 bool resolve_subpage)
316 {
317 MemoryRegionSection *section;
318 subpage_t *subpage;
319
320 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
324 }
325 return section;
326 }
327
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
331 {
332 MemoryRegionSection *section;
333 Int128 diff;
334
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
338
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
341
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 return section;
345 }
346
347 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
348 {
349 if (memory_region_is_ram(mr)) {
350 return !(is_write && mr->readonly);
351 }
352 if (memory_region_is_romd(mr)) {
353 return !is_write;
354 }
355
356 return false;
357 }
358
359 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
360 hwaddr *xlat, hwaddr *plen,
361 bool is_write)
362 {
363 IOMMUTLBEntry iotlb;
364 MemoryRegionSection *section;
365 MemoryRegion *mr;
366 hwaddr len = *plen;
367
368 for (;;) {
369 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
370 mr = section->mr;
371
372 if (!mr->iommu_ops) {
373 break;
374 }
375
376 iotlb = mr->iommu_ops->translate(mr, addr);
377 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
378 | (addr & iotlb.addr_mask));
379 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
380 if (!(iotlb.perm & (1 << is_write))) {
381 mr = &io_mem_unassigned;
382 break;
383 }
384
385 as = iotlb.target_as;
386 }
387
388 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
389 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
390 len = MIN(page, len);
391 }
392
393 *plen = len;
394 *xlat = addr;
395 return mr;
396 }
397
398 MemoryRegionSection *
399 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
400 hwaddr *plen)
401 {
402 MemoryRegionSection *section;
403 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
404
405 assert(!section->mr->iommu_ops);
406 return section;
407 }
408 #endif
409
410 void cpu_exec_init_all(void)
411 {
412 #if !defined(CONFIG_USER_ONLY)
413 qemu_mutex_init(&ram_list.mutex);
414 memory_map_init();
415 io_mem_init();
416 #endif
417 }
418
419 #if !defined(CONFIG_USER_ONLY)
420
421 static int cpu_common_post_load(void *opaque, int version_id)
422 {
423 CPUState *cpu = opaque;
424
425 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
426 version_id is increased. */
427 cpu->interrupt_request &= ~0x01;
428 tlb_flush(cpu, 1);
429
430 return 0;
431 }
432
433 const VMStateDescription vmstate_cpu_common = {
434 .name = "cpu_common",
435 .version_id = 1,
436 .minimum_version_id = 1,
437 .post_load = cpu_common_post_load,
438 .fields = (VMStateField[]) {
439 VMSTATE_UINT32(halted, CPUState),
440 VMSTATE_UINT32(interrupt_request, CPUState),
441 VMSTATE_END_OF_LIST()
442 }
443 };
444
445 #endif
446
447 CPUState *qemu_get_cpu(int index)
448 {
449 CPUState *cpu;
450
451 CPU_FOREACH(cpu) {
452 if (cpu->cpu_index == index) {
453 return cpu;
454 }
455 }
456
457 return NULL;
458 }
459
460 #if !defined(CONFIG_USER_ONLY)
461 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
462 {
463 /* We only support one address space per cpu at the moment. */
464 assert(cpu->as == as);
465
466 if (cpu->tcg_as_listener) {
467 memory_listener_unregister(cpu->tcg_as_listener);
468 } else {
469 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
470 }
471 cpu->tcg_as_listener->commit = tcg_commit;
472 memory_listener_register(cpu->tcg_as_listener, as);
473 }
474 #endif
475
476 void cpu_exec_init(CPUArchState *env)
477 {
478 CPUState *cpu = ENV_GET_CPU(env);
479 CPUClass *cc = CPU_GET_CLASS(cpu);
480 CPUState *some_cpu;
481 int cpu_index;
482
483 #if defined(CONFIG_USER_ONLY)
484 cpu_list_lock();
485 #endif
486 cpu_index = 0;
487 CPU_FOREACH(some_cpu) {
488 cpu_index++;
489 }
490 cpu->cpu_index = cpu_index;
491 cpu->numa_node = 0;
492 QTAILQ_INIT(&cpu->breakpoints);
493 QTAILQ_INIT(&cpu->watchpoints);
494 #ifndef CONFIG_USER_ONLY
495 cpu->as = &address_space_memory;
496 cpu->thread_id = qemu_get_thread_id();
497 #endif
498 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
499 #if defined(CONFIG_USER_ONLY)
500 cpu_list_unlock();
501 #endif
502 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
503 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
504 }
505 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
506 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
507 cpu_save, cpu_load, env);
508 assert(cc->vmsd == NULL);
509 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
510 #endif
511 if (cc->vmsd != NULL) {
512 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
513 }
514 }
515
516 #if defined(TARGET_HAS_ICE)
517 #if defined(CONFIG_USER_ONLY)
518 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
519 {
520 tb_invalidate_phys_page_range(pc, pc + 1, 0);
521 }
522 #else
523 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
524 {
525 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
526 if (phys != -1) {
527 tb_invalidate_phys_addr(cpu->as,
528 phys | (pc & ~TARGET_PAGE_MASK));
529 }
530 }
531 #endif
532 #endif /* TARGET_HAS_ICE */
533
534 #if defined(CONFIG_USER_ONLY)
535 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
536
537 {
538 }
539
540 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
541 int flags, CPUWatchpoint **watchpoint)
542 {
543 return -ENOSYS;
544 }
545 #else
546 /* Add a watchpoint. */
547 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
548 int flags, CPUWatchpoint **watchpoint)
549 {
550 vaddr len_mask = ~(len - 1);
551 CPUWatchpoint *wp;
552
553 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
554 if ((len & (len - 1)) || (addr & ~len_mask) ||
555 len == 0 || len > TARGET_PAGE_SIZE) {
556 error_report("tried to set invalid watchpoint at %"
557 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
558 return -EINVAL;
559 }
560 wp = g_malloc(sizeof(*wp));
561
562 wp->vaddr = addr;
563 wp->len_mask = len_mask;
564 wp->flags = flags;
565
566 /* keep all GDB-injected watchpoints in front */
567 if (flags & BP_GDB) {
568 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
569 } else {
570 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
571 }
572
573 tlb_flush_page(cpu, addr);
574
575 if (watchpoint)
576 *watchpoint = wp;
577 return 0;
578 }
579
580 /* Remove a specific watchpoint. */
581 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
582 int flags)
583 {
584 vaddr len_mask = ~(len - 1);
585 CPUWatchpoint *wp;
586
587 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
588 if (addr == wp->vaddr && len_mask == wp->len_mask
589 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
590 cpu_watchpoint_remove_by_ref(cpu, wp);
591 return 0;
592 }
593 }
594 return -ENOENT;
595 }
596
597 /* Remove a specific watchpoint by reference. */
598 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
599 {
600 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
601
602 tlb_flush_page(cpu, watchpoint->vaddr);
603
604 g_free(watchpoint);
605 }
606
607 /* Remove all matching watchpoints. */
608 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
609 {
610 CPUWatchpoint *wp, *next;
611
612 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
613 if (wp->flags & mask) {
614 cpu_watchpoint_remove_by_ref(cpu, wp);
615 }
616 }
617 }
618 #endif
619
620 /* Add a breakpoint. */
621 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
622 CPUBreakpoint **breakpoint)
623 {
624 #if defined(TARGET_HAS_ICE)
625 CPUBreakpoint *bp;
626
627 bp = g_malloc(sizeof(*bp));
628
629 bp->pc = pc;
630 bp->flags = flags;
631
632 /* keep all GDB-injected breakpoints in front */
633 if (flags & BP_GDB) {
634 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
635 } else {
636 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
637 }
638
639 breakpoint_invalidate(cpu, pc);
640
641 if (breakpoint) {
642 *breakpoint = bp;
643 }
644 return 0;
645 #else
646 return -ENOSYS;
647 #endif
648 }
649
650 /* Remove a specific breakpoint. */
651 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
652 {
653 #if defined(TARGET_HAS_ICE)
654 CPUBreakpoint *bp;
655
656 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
657 if (bp->pc == pc && bp->flags == flags) {
658 cpu_breakpoint_remove_by_ref(cpu, bp);
659 return 0;
660 }
661 }
662 return -ENOENT;
663 #else
664 return -ENOSYS;
665 #endif
666 }
667
668 /* Remove a specific breakpoint by reference. */
669 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
670 {
671 #if defined(TARGET_HAS_ICE)
672 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
673
674 breakpoint_invalidate(cpu, breakpoint->pc);
675
676 g_free(breakpoint);
677 #endif
678 }
679
680 /* Remove all matching breakpoints. */
681 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
682 {
683 #if defined(TARGET_HAS_ICE)
684 CPUBreakpoint *bp, *next;
685
686 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
687 if (bp->flags & mask) {
688 cpu_breakpoint_remove_by_ref(cpu, bp);
689 }
690 }
691 #endif
692 }
693
694 /* enable or disable single step mode. EXCP_DEBUG is returned by the
695 CPU loop after each instruction */
696 void cpu_single_step(CPUState *cpu, int enabled)
697 {
698 #if defined(TARGET_HAS_ICE)
699 if (cpu->singlestep_enabled != enabled) {
700 cpu->singlestep_enabled = enabled;
701 if (kvm_enabled()) {
702 kvm_update_guest_debug(cpu, 0);
703 } else {
704 /* must flush all the translated code to avoid inconsistencies */
705 /* XXX: only flush what is necessary */
706 CPUArchState *env = cpu->env_ptr;
707 tb_flush(env);
708 }
709 }
710 #endif
711 }
712
713 void cpu_abort(CPUState *cpu, const char *fmt, ...)
714 {
715 va_list ap;
716 va_list ap2;
717
718 va_start(ap, fmt);
719 va_copy(ap2, ap);
720 fprintf(stderr, "qemu: fatal: ");
721 vfprintf(stderr, fmt, ap);
722 fprintf(stderr, "\n");
723 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724 if (qemu_log_enabled()) {
725 qemu_log("qemu: fatal: ");
726 qemu_log_vprintf(fmt, ap2);
727 qemu_log("\n");
728 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
729 qemu_log_flush();
730 qemu_log_close();
731 }
732 va_end(ap2);
733 va_end(ap);
734 #if defined(CONFIG_USER_ONLY)
735 {
736 struct sigaction act;
737 sigfillset(&act.sa_mask);
738 act.sa_handler = SIG_DFL;
739 sigaction(SIGABRT, &act, NULL);
740 }
741 #endif
742 abort();
743 }
744
745 #if !defined(CONFIG_USER_ONLY)
746 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
747 {
748 RAMBlock *block;
749
750 /* The list is protected by the iothread lock here. */
751 block = ram_list.mru_block;
752 if (block && addr - block->offset < block->length) {
753 goto found;
754 }
755 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
756 if (addr - block->offset < block->length) {
757 goto found;
758 }
759 }
760
761 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
762 abort();
763
764 found:
765 ram_list.mru_block = block;
766 return block;
767 }
768
769 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
770 {
771 ram_addr_t start1;
772 RAMBlock *block;
773 ram_addr_t end;
774
775 end = TARGET_PAGE_ALIGN(start + length);
776 start &= TARGET_PAGE_MASK;
777
778 block = qemu_get_ram_block(start);
779 assert(block == qemu_get_ram_block(end - 1));
780 start1 = (uintptr_t)block->host + (start - block->offset);
781 cpu_tlb_reset_dirty_all(start1, length);
782 }
783
784 /* Note: start and end must be within the same ram block. */
785 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
786 unsigned client)
787 {
788 if (length == 0)
789 return;
790 cpu_physical_memory_clear_dirty_range(start, length, client);
791
792 if (tcg_enabled()) {
793 tlb_reset_dirty_range_all(start, length);
794 }
795 }
796
797 static void cpu_physical_memory_set_dirty_tracking(bool enable)
798 {
799 in_migration = enable;
800 }
801
802 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
803 MemoryRegionSection *section,
804 target_ulong vaddr,
805 hwaddr paddr, hwaddr xlat,
806 int prot,
807 target_ulong *address)
808 {
809 hwaddr iotlb;
810 CPUWatchpoint *wp;
811
812 if (memory_region_is_ram(section->mr)) {
813 /* Normal RAM. */
814 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
815 + xlat;
816 if (!section->readonly) {
817 iotlb |= PHYS_SECTION_NOTDIRTY;
818 } else {
819 iotlb |= PHYS_SECTION_ROM;
820 }
821 } else {
822 iotlb = section - section->address_space->dispatch->map.sections;
823 iotlb += xlat;
824 }
825
826 /* Make accesses to pages with watchpoints go via the
827 watchpoint trap routines. */
828 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
829 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
830 /* Avoid trapping reads of pages with a write breakpoint. */
831 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
832 iotlb = PHYS_SECTION_WATCH + paddr;
833 *address |= TLB_MMIO;
834 break;
835 }
836 }
837 }
838
839 return iotlb;
840 }
841 #endif /* defined(CONFIG_USER_ONLY) */
842
843 #if !defined(CONFIG_USER_ONLY)
844
845 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
846 uint16_t section);
847 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
848
849 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
850
851 /*
852 * Set a custom physical guest memory alloator.
853 * Accelerators with unusual needs may need this. Hopefully, we can
854 * get rid of it eventually.
855 */
856 void phys_mem_set_alloc(void *(*alloc)(size_t))
857 {
858 phys_mem_alloc = alloc;
859 }
860
861 static uint16_t phys_section_add(PhysPageMap *map,
862 MemoryRegionSection *section)
863 {
864 /* The physical section number is ORed with a page-aligned
865 * pointer to produce the iotlb entries. Thus it should
866 * never overflow into the page-aligned value.
867 */
868 assert(map->sections_nb < TARGET_PAGE_SIZE);
869
870 if (map->sections_nb == map->sections_nb_alloc) {
871 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
872 map->sections = g_renew(MemoryRegionSection, map->sections,
873 map->sections_nb_alloc);
874 }
875 map->sections[map->sections_nb] = *section;
876 memory_region_ref(section->mr);
877 return map->sections_nb++;
878 }
879
880 static void phys_section_destroy(MemoryRegion *mr)
881 {
882 memory_region_unref(mr);
883
884 if (mr->subpage) {
885 subpage_t *subpage = container_of(mr, subpage_t, iomem);
886 object_unref(OBJECT(&subpage->iomem));
887 g_free(subpage);
888 }
889 }
890
891 static void phys_sections_free(PhysPageMap *map)
892 {
893 while (map->sections_nb > 0) {
894 MemoryRegionSection *section = &map->sections[--map->sections_nb];
895 phys_section_destroy(section->mr);
896 }
897 g_free(map->sections);
898 g_free(map->nodes);
899 }
900
901 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
902 {
903 subpage_t *subpage;
904 hwaddr base = section->offset_within_address_space
905 & TARGET_PAGE_MASK;
906 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
907 d->map.nodes, d->map.sections);
908 MemoryRegionSection subsection = {
909 .offset_within_address_space = base,
910 .size = int128_make64(TARGET_PAGE_SIZE),
911 };
912 hwaddr start, end;
913
914 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
915
916 if (!(existing->mr->subpage)) {
917 subpage = subpage_init(d->as, base);
918 subsection.address_space = d->as;
919 subsection.mr = &subpage->iomem;
920 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
921 phys_section_add(&d->map, &subsection));
922 } else {
923 subpage = container_of(existing->mr, subpage_t, iomem);
924 }
925 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
926 end = start + int128_get64(section->size) - 1;
927 subpage_register(subpage, start, end,
928 phys_section_add(&d->map, section));
929 }
930
931
932 static void register_multipage(AddressSpaceDispatch *d,
933 MemoryRegionSection *section)
934 {
935 hwaddr start_addr = section->offset_within_address_space;
936 uint16_t section_index = phys_section_add(&d->map, section);
937 uint64_t num_pages = int128_get64(int128_rshift(section->size,
938 TARGET_PAGE_BITS));
939
940 assert(num_pages);
941 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
942 }
943
944 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
945 {
946 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
947 AddressSpaceDispatch *d = as->next_dispatch;
948 MemoryRegionSection now = *section, remain = *section;
949 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
950
951 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
952 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
953 - now.offset_within_address_space;
954
955 now.size = int128_min(int128_make64(left), now.size);
956 register_subpage(d, &now);
957 } else {
958 now.size = int128_zero();
959 }
960 while (int128_ne(remain.size, now.size)) {
961 remain.size = int128_sub(remain.size, now.size);
962 remain.offset_within_address_space += int128_get64(now.size);
963 remain.offset_within_region += int128_get64(now.size);
964 now = remain;
965 if (int128_lt(remain.size, page_size)) {
966 register_subpage(d, &now);
967 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
968 now.size = page_size;
969 register_subpage(d, &now);
970 } else {
971 now.size = int128_and(now.size, int128_neg(page_size));
972 register_multipage(d, &now);
973 }
974 }
975 }
976
977 void qemu_flush_coalesced_mmio_buffer(void)
978 {
979 if (kvm_enabled())
980 kvm_flush_coalesced_mmio_buffer();
981 }
982
983 void qemu_mutex_lock_ramlist(void)
984 {
985 qemu_mutex_lock(&ram_list.mutex);
986 }
987
988 void qemu_mutex_unlock_ramlist(void)
989 {
990 qemu_mutex_unlock(&ram_list.mutex);
991 }
992
993 #ifdef __linux__
994
995 #include <sys/vfs.h>
996
997 #define HUGETLBFS_MAGIC 0x958458f6
998
999 static long gethugepagesize(const char *path)
1000 {
1001 struct statfs fs;
1002 int ret;
1003
1004 do {
1005 ret = statfs(path, &fs);
1006 } while (ret != 0 && errno == EINTR);
1007
1008 if (ret != 0) {
1009 perror(path);
1010 return 0;
1011 }
1012
1013 if (fs.f_type != HUGETLBFS_MAGIC)
1014 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1015
1016 return fs.f_bsize;
1017 }
1018
1019 static void *file_ram_alloc(RAMBlock *block,
1020 ram_addr_t memory,
1021 const char *path,
1022 Error **errp)
1023 {
1024 char *filename;
1025 char *sanitized_name;
1026 char *c;
1027 void *area;
1028 int fd;
1029 unsigned long hpagesize;
1030
1031 hpagesize = gethugepagesize(path);
1032 if (!hpagesize) {
1033 goto error;
1034 }
1035
1036 if (memory < hpagesize) {
1037 return NULL;
1038 }
1039
1040 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1041 error_setg(errp,
1042 "host lacks kvm mmu notifiers, -mem-path unsupported");
1043 goto error;
1044 }
1045
1046 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1047 sanitized_name = g_strdup(block->mr->name);
1048 for (c = sanitized_name; *c != '\0'; c++) {
1049 if (*c == '/')
1050 *c = '_';
1051 }
1052
1053 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1054 sanitized_name);
1055 g_free(sanitized_name);
1056
1057 fd = mkstemp(filename);
1058 if (fd < 0) {
1059 error_setg_errno(errp, errno,
1060 "unable to create backing store for hugepages");
1061 g_free(filename);
1062 goto error;
1063 }
1064 unlink(filename);
1065 g_free(filename);
1066
1067 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1068
1069 /*
1070 * ftruncate is not supported by hugetlbfs in older
1071 * hosts, so don't bother bailing out on errors.
1072 * If anything goes wrong with it under other filesystems,
1073 * mmap will fail.
1074 */
1075 if (ftruncate(fd, memory)) {
1076 perror("ftruncate");
1077 }
1078
1079 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1080 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1081 fd, 0);
1082 if (area == MAP_FAILED) {
1083 error_setg_errno(errp, errno,
1084 "unable to map backing store for hugepages");
1085 close(fd);
1086 goto error;
1087 }
1088
1089 if (mem_prealloc) {
1090 os_mem_prealloc(fd, area, memory);
1091 }
1092
1093 block->fd = fd;
1094 return area;
1095
1096 error:
1097 if (mem_prealloc) {
1098 exit(1);
1099 }
1100 return NULL;
1101 }
1102 #endif
1103
1104 static ram_addr_t find_ram_offset(ram_addr_t size)
1105 {
1106 RAMBlock *block, *next_block;
1107 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1108
1109 assert(size != 0); /* it would hand out same offset multiple times */
1110
1111 if (QTAILQ_EMPTY(&ram_list.blocks))
1112 return 0;
1113
1114 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1115 ram_addr_t end, next = RAM_ADDR_MAX;
1116
1117 end = block->offset + block->length;
1118
1119 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1120 if (next_block->offset >= end) {
1121 next = MIN(next, next_block->offset);
1122 }
1123 }
1124 if (next - end >= size && next - end < mingap) {
1125 offset = end;
1126 mingap = next - end;
1127 }
1128 }
1129
1130 if (offset == RAM_ADDR_MAX) {
1131 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1132 (uint64_t)size);
1133 abort();
1134 }
1135
1136 return offset;
1137 }
1138
1139 ram_addr_t last_ram_offset(void)
1140 {
1141 RAMBlock *block;
1142 ram_addr_t last = 0;
1143
1144 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1145 last = MAX(last, block->offset + block->length);
1146
1147 return last;
1148 }
1149
1150 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1151 {
1152 int ret;
1153
1154 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1155 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1156 "dump-guest-core", true)) {
1157 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1158 if (ret) {
1159 perror("qemu_madvise");
1160 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1161 "but dump_guest_core=off specified\n");
1162 }
1163 }
1164 }
1165
1166 static RAMBlock *find_ram_block(ram_addr_t addr)
1167 {
1168 RAMBlock *block;
1169
1170 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1171 if (block->offset == addr) {
1172 return block;
1173 }
1174 }
1175
1176 return NULL;
1177 }
1178
1179 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1180 {
1181 RAMBlock *new_block = find_ram_block(addr);
1182 RAMBlock *block;
1183
1184 assert(new_block);
1185 assert(!new_block->idstr[0]);
1186
1187 if (dev) {
1188 char *id = qdev_get_dev_path(dev);
1189 if (id) {
1190 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1191 g_free(id);
1192 }
1193 }
1194 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1195
1196 /* This assumes the iothread lock is taken here too. */
1197 qemu_mutex_lock_ramlist();
1198 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1199 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1200 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1201 new_block->idstr);
1202 abort();
1203 }
1204 }
1205 qemu_mutex_unlock_ramlist();
1206 }
1207
1208 void qemu_ram_unset_idstr(ram_addr_t addr)
1209 {
1210 RAMBlock *block = find_ram_block(addr);
1211
1212 if (block) {
1213 memset(block->idstr, 0, sizeof(block->idstr));
1214 }
1215 }
1216
1217 static int memory_try_enable_merging(void *addr, size_t len)
1218 {
1219 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1220 /* disabled by the user */
1221 return 0;
1222 }
1223
1224 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1225 }
1226
1227 static ram_addr_t ram_block_add(RAMBlock *new_block)
1228 {
1229 RAMBlock *block;
1230 ram_addr_t old_ram_size, new_ram_size;
1231
1232 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1233
1234 /* This assumes the iothread lock is taken here too. */
1235 qemu_mutex_lock_ramlist();
1236 new_block->offset = find_ram_offset(new_block->length);
1237
1238 if (!new_block->host) {
1239 if (xen_enabled()) {
1240 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1241 } else {
1242 new_block->host = phys_mem_alloc(new_block->length);
1243 if (!new_block->host) {
1244 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1245 new_block->mr->name, strerror(errno));
1246 exit(1);
1247 }
1248 memory_try_enable_merging(new_block->host, new_block->length);
1249 }
1250 }
1251
1252 /* Keep the list sorted from biggest to smallest block. */
1253 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1254 if (block->length < new_block->length) {
1255 break;
1256 }
1257 }
1258 if (block) {
1259 QTAILQ_INSERT_BEFORE(block, new_block, next);
1260 } else {
1261 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1262 }
1263 ram_list.mru_block = NULL;
1264
1265 ram_list.version++;
1266 qemu_mutex_unlock_ramlist();
1267
1268 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1269
1270 if (new_ram_size > old_ram_size) {
1271 int i;
1272 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1273 ram_list.dirty_memory[i] =
1274 bitmap_zero_extend(ram_list.dirty_memory[i],
1275 old_ram_size, new_ram_size);
1276 }
1277 }
1278 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1279
1280 qemu_ram_setup_dump(new_block->host, new_block->length);
1281 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1282 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1283
1284 if (kvm_enabled()) {
1285 kvm_setup_guest_memory(new_block->host, new_block->length);
1286 }
1287
1288 return new_block->offset;
1289 }
1290
1291 #ifdef __linux__
1292 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1293 bool share, const char *mem_path,
1294 Error **errp)
1295 {
1296 RAMBlock *new_block;
1297
1298 if (xen_enabled()) {
1299 error_setg(errp, "-mem-path not supported with Xen");
1300 return -1;
1301 }
1302
1303 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1304 /*
1305 * file_ram_alloc() needs to allocate just like
1306 * phys_mem_alloc, but we haven't bothered to provide
1307 * a hook there.
1308 */
1309 error_setg(errp,
1310 "-mem-path not supported with this accelerator");
1311 return -1;
1312 }
1313
1314 size = TARGET_PAGE_ALIGN(size);
1315 new_block = g_malloc0(sizeof(*new_block));
1316 new_block->mr = mr;
1317 new_block->length = size;
1318 new_block->flags = share ? RAM_SHARED : 0;
1319 new_block->host = file_ram_alloc(new_block, size,
1320 mem_path, errp);
1321 if (!new_block->host) {
1322 g_free(new_block);
1323 return -1;
1324 }
1325
1326 return ram_block_add(new_block);
1327 }
1328 #endif
1329
1330 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1331 MemoryRegion *mr)
1332 {
1333 RAMBlock *new_block;
1334
1335 size = TARGET_PAGE_ALIGN(size);
1336 new_block = g_malloc0(sizeof(*new_block));
1337 new_block->mr = mr;
1338 new_block->length = size;
1339 new_block->fd = -1;
1340 new_block->host = host;
1341 if (host) {
1342 new_block->flags |= RAM_PREALLOC;
1343 }
1344 return ram_block_add(new_block);
1345 }
1346
1347 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1348 {
1349 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1350 }
1351
1352 void qemu_ram_free_from_ptr(ram_addr_t addr)
1353 {
1354 RAMBlock *block;
1355
1356 /* This assumes the iothread lock is taken here too. */
1357 qemu_mutex_lock_ramlist();
1358 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1359 if (addr == block->offset) {
1360 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1361 ram_list.mru_block = NULL;
1362 ram_list.version++;
1363 g_free(block);
1364 break;
1365 }
1366 }
1367 qemu_mutex_unlock_ramlist();
1368 }
1369
1370 void qemu_ram_free(ram_addr_t addr)
1371 {
1372 RAMBlock *block;
1373
1374 /* This assumes the iothread lock is taken here too. */
1375 qemu_mutex_lock_ramlist();
1376 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1377 if (addr == block->offset) {
1378 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1379 ram_list.mru_block = NULL;
1380 ram_list.version++;
1381 if (block->flags & RAM_PREALLOC) {
1382 ;
1383 } else if (xen_enabled()) {
1384 xen_invalidate_map_cache_entry(block->host);
1385 #ifndef _WIN32
1386 } else if (block->fd >= 0) {
1387 munmap(block->host, block->length);
1388 close(block->fd);
1389 #endif
1390 } else {
1391 qemu_anon_ram_free(block->host, block->length);
1392 }
1393 g_free(block);
1394 break;
1395 }
1396 }
1397 qemu_mutex_unlock_ramlist();
1398
1399 }
1400
1401 #ifndef _WIN32
1402 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1403 {
1404 RAMBlock *block;
1405 ram_addr_t offset;
1406 int flags;
1407 void *area, *vaddr;
1408
1409 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1410 offset = addr - block->offset;
1411 if (offset < block->length) {
1412 vaddr = block->host + offset;
1413 if (block->flags & RAM_PREALLOC) {
1414 ;
1415 } else if (xen_enabled()) {
1416 abort();
1417 } else {
1418 flags = MAP_FIXED;
1419 munmap(vaddr, length);
1420 if (block->fd >= 0) {
1421 flags |= (block->flags & RAM_SHARED ?
1422 MAP_SHARED : MAP_PRIVATE);
1423 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1424 flags, block->fd, offset);
1425 } else {
1426 /*
1427 * Remap needs to match alloc. Accelerators that
1428 * set phys_mem_alloc never remap. If they did,
1429 * we'd need a remap hook here.
1430 */
1431 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1432
1433 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1434 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1435 flags, -1, 0);
1436 }
1437 if (area != vaddr) {
1438 fprintf(stderr, "Could not remap addr: "
1439 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1440 length, addr);
1441 exit(1);
1442 }
1443 memory_try_enable_merging(vaddr, length);
1444 qemu_ram_setup_dump(vaddr, length);
1445 }
1446 return;
1447 }
1448 }
1449 }
1450 #endif /* !_WIN32 */
1451
1452 int qemu_get_ram_fd(ram_addr_t addr)
1453 {
1454 RAMBlock *block = qemu_get_ram_block(addr);
1455
1456 return block->fd;
1457 }
1458
1459 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1460 {
1461 RAMBlock *block = qemu_get_ram_block(addr);
1462
1463 return block->host;
1464 }
1465
1466 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1467 With the exception of the softmmu code in this file, this should
1468 only be used for local memory (e.g. video ram) that the device owns,
1469 and knows it isn't going to access beyond the end of the block.
1470
1471 It should not be used for general purpose DMA.
1472 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1473 */
1474 void *qemu_get_ram_ptr(ram_addr_t addr)
1475 {
1476 RAMBlock *block = qemu_get_ram_block(addr);
1477
1478 if (xen_enabled()) {
1479 /* We need to check if the requested address is in the RAM
1480 * because we don't want to map the entire memory in QEMU.
1481 * In that case just map until the end of the page.
1482 */
1483 if (block->offset == 0) {
1484 return xen_map_cache(addr, 0, 0);
1485 } else if (block->host == NULL) {
1486 block->host =
1487 xen_map_cache(block->offset, block->length, 1);
1488 }
1489 }
1490 return block->host + (addr - block->offset);
1491 }
1492
1493 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1494 * but takes a size argument */
1495 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1496 {
1497 if (*size == 0) {
1498 return NULL;
1499 }
1500 if (xen_enabled()) {
1501 return xen_map_cache(addr, *size, 1);
1502 } else {
1503 RAMBlock *block;
1504
1505 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1506 if (addr - block->offset < block->length) {
1507 if (addr - block->offset + *size > block->length)
1508 *size = block->length - addr + block->offset;
1509 return block->host + (addr - block->offset);
1510 }
1511 }
1512
1513 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1514 abort();
1515 }
1516 }
1517
1518 /* Some of the softmmu routines need to translate from a host pointer
1519 (typically a TLB entry) back to a ram offset. */
1520 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1521 {
1522 RAMBlock *block;
1523 uint8_t *host = ptr;
1524
1525 if (xen_enabled()) {
1526 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1527 return qemu_get_ram_block(*ram_addr)->mr;
1528 }
1529
1530 block = ram_list.mru_block;
1531 if (block && block->host && host - block->host < block->length) {
1532 goto found;
1533 }
1534
1535 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1536 /* This case append when the block is not mapped. */
1537 if (block->host == NULL) {
1538 continue;
1539 }
1540 if (host - block->host < block->length) {
1541 goto found;
1542 }
1543 }
1544
1545 return NULL;
1546
1547 found:
1548 *ram_addr = block->offset + (host - block->host);
1549 return block->mr;
1550 }
1551
1552 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1553 uint64_t val, unsigned size)
1554 {
1555 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1556 tb_invalidate_phys_page_fast(ram_addr, size);
1557 }
1558 switch (size) {
1559 case 1:
1560 stb_p(qemu_get_ram_ptr(ram_addr), val);
1561 break;
1562 case 2:
1563 stw_p(qemu_get_ram_ptr(ram_addr), val);
1564 break;
1565 case 4:
1566 stl_p(qemu_get_ram_ptr(ram_addr), val);
1567 break;
1568 default:
1569 abort();
1570 }
1571 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1572 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1573 /* we remove the notdirty callback only if the code has been
1574 flushed */
1575 if (!cpu_physical_memory_is_clean(ram_addr)) {
1576 CPUArchState *env = current_cpu->env_ptr;
1577 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1578 }
1579 }
1580
1581 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1582 unsigned size, bool is_write)
1583 {
1584 return is_write;
1585 }
1586
1587 static const MemoryRegionOps notdirty_mem_ops = {
1588 .write = notdirty_mem_write,
1589 .valid.accepts = notdirty_mem_accepts,
1590 .endianness = DEVICE_NATIVE_ENDIAN,
1591 };
1592
1593 /* Generate a debug exception if a watchpoint has been hit. */
1594 static void check_watchpoint(int offset, int len_mask, int flags)
1595 {
1596 CPUState *cpu = current_cpu;
1597 CPUArchState *env = cpu->env_ptr;
1598 target_ulong pc, cs_base;
1599 target_ulong vaddr;
1600 CPUWatchpoint *wp;
1601 int cpu_flags;
1602
1603 if (cpu->watchpoint_hit) {
1604 /* We re-entered the check after replacing the TB. Now raise
1605 * the debug interrupt so that is will trigger after the
1606 * current instruction. */
1607 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1608 return;
1609 }
1610 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1611 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1612 if ((vaddr == (wp->vaddr & len_mask) ||
1613 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1614 wp->flags |= BP_WATCHPOINT_HIT;
1615 if (!cpu->watchpoint_hit) {
1616 cpu->watchpoint_hit = wp;
1617 tb_check_watchpoint(cpu);
1618 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1619 cpu->exception_index = EXCP_DEBUG;
1620 cpu_loop_exit(cpu);
1621 } else {
1622 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1623 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1624 cpu_resume_from_signal(cpu, NULL);
1625 }
1626 }
1627 } else {
1628 wp->flags &= ~BP_WATCHPOINT_HIT;
1629 }
1630 }
1631 }
1632
1633 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1634 so these check for a hit then pass through to the normal out-of-line
1635 phys routines. */
1636 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1637 unsigned size)
1638 {
1639 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1640 switch (size) {
1641 case 1: return ldub_phys(&address_space_memory, addr);
1642 case 2: return lduw_phys(&address_space_memory, addr);
1643 case 4: return ldl_phys(&address_space_memory, addr);
1644 default: abort();
1645 }
1646 }
1647
1648 static void watch_mem_write(void *opaque, hwaddr addr,
1649 uint64_t val, unsigned size)
1650 {
1651 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1652 switch (size) {
1653 case 1:
1654 stb_phys(&address_space_memory, addr, val);
1655 break;
1656 case 2:
1657 stw_phys(&address_space_memory, addr, val);
1658 break;
1659 case 4:
1660 stl_phys(&address_space_memory, addr, val);
1661 break;
1662 default: abort();
1663 }
1664 }
1665
1666 static const MemoryRegionOps watch_mem_ops = {
1667 .read = watch_mem_read,
1668 .write = watch_mem_write,
1669 .endianness = DEVICE_NATIVE_ENDIAN,
1670 };
1671
1672 static uint64_t subpage_read(void *opaque, hwaddr addr,
1673 unsigned len)
1674 {
1675 subpage_t *subpage = opaque;
1676 uint8_t buf[4];
1677
1678 #if defined(DEBUG_SUBPAGE)
1679 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1680 subpage, len, addr);
1681 #endif
1682 address_space_read(subpage->as, addr + subpage->base, buf, len);
1683 switch (len) {
1684 case 1:
1685 return ldub_p(buf);
1686 case 2:
1687 return lduw_p(buf);
1688 case 4:
1689 return ldl_p(buf);
1690 default:
1691 abort();
1692 }
1693 }
1694
1695 static void subpage_write(void *opaque, hwaddr addr,
1696 uint64_t value, unsigned len)
1697 {
1698 subpage_t *subpage = opaque;
1699 uint8_t buf[4];
1700
1701 #if defined(DEBUG_SUBPAGE)
1702 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1703 " value %"PRIx64"\n",
1704 __func__, subpage, len, addr, value);
1705 #endif
1706 switch (len) {
1707 case 1:
1708 stb_p(buf, value);
1709 break;
1710 case 2:
1711 stw_p(buf, value);
1712 break;
1713 case 4:
1714 stl_p(buf, value);
1715 break;
1716 default:
1717 abort();
1718 }
1719 address_space_write(subpage->as, addr + subpage->base, buf, len);
1720 }
1721
1722 static bool subpage_accepts(void *opaque, hwaddr addr,
1723 unsigned len, bool is_write)
1724 {
1725 subpage_t *subpage = opaque;
1726 #if defined(DEBUG_SUBPAGE)
1727 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1728 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1729 #endif
1730
1731 return address_space_access_valid(subpage->as, addr + subpage->base,
1732 len, is_write);
1733 }
1734
1735 static const MemoryRegionOps subpage_ops = {
1736 .read = subpage_read,
1737 .write = subpage_write,
1738 .valid.accepts = subpage_accepts,
1739 .endianness = DEVICE_NATIVE_ENDIAN,
1740 };
1741
1742 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1743 uint16_t section)
1744 {
1745 int idx, eidx;
1746
1747 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1748 return -1;
1749 idx = SUBPAGE_IDX(start);
1750 eidx = SUBPAGE_IDX(end);
1751 #if defined(DEBUG_SUBPAGE)
1752 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1753 __func__, mmio, start, end, idx, eidx, section);
1754 #endif
1755 for (; idx <= eidx; idx++) {
1756 mmio->sub_section[idx] = section;
1757 }
1758
1759 return 0;
1760 }
1761
1762 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1763 {
1764 subpage_t *mmio;
1765
1766 mmio = g_malloc0(sizeof(subpage_t));
1767
1768 mmio->as = as;
1769 mmio->base = base;
1770 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1771 NULL, TARGET_PAGE_SIZE);
1772 mmio->iomem.subpage = true;
1773 #if defined(DEBUG_SUBPAGE)
1774 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1775 mmio, base, TARGET_PAGE_SIZE);
1776 #endif
1777 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1778
1779 return mmio;
1780 }
1781
1782 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1783 MemoryRegion *mr)
1784 {
1785 assert(as);
1786 MemoryRegionSection section = {
1787 .address_space = as,
1788 .mr = mr,
1789 .offset_within_address_space = 0,
1790 .offset_within_region = 0,
1791 .size = int128_2_64(),
1792 };
1793
1794 return phys_section_add(map, &section);
1795 }
1796
1797 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1798 {
1799 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1800 }
1801
1802 static void io_mem_init(void)
1803 {
1804 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1805 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1806 "unassigned", UINT64_MAX);
1807 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1808 "notdirty", UINT64_MAX);
1809 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1810 "watch", UINT64_MAX);
1811 }
1812
1813 static void mem_begin(MemoryListener *listener)
1814 {
1815 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1816 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1817 uint16_t n;
1818
1819 n = dummy_section(&d->map, as, &io_mem_unassigned);
1820 assert(n == PHYS_SECTION_UNASSIGNED);
1821 n = dummy_section(&d->map, as, &io_mem_notdirty);
1822 assert(n == PHYS_SECTION_NOTDIRTY);
1823 n = dummy_section(&d->map, as, &io_mem_rom);
1824 assert(n == PHYS_SECTION_ROM);
1825 n = dummy_section(&d->map, as, &io_mem_watch);
1826 assert(n == PHYS_SECTION_WATCH);
1827
1828 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1829 d->as = as;
1830 as->next_dispatch = d;
1831 }
1832
1833 static void mem_commit(MemoryListener *listener)
1834 {
1835 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1836 AddressSpaceDispatch *cur = as->dispatch;
1837 AddressSpaceDispatch *next = as->next_dispatch;
1838
1839 phys_page_compact_all(next, next->map.nodes_nb);
1840
1841 as->dispatch = next;
1842
1843 if (cur) {
1844 phys_sections_free(&cur->map);
1845 g_free(cur);
1846 }
1847 }
1848
1849 static void tcg_commit(MemoryListener *listener)
1850 {
1851 CPUState *cpu;
1852
1853 /* since each CPU stores ram addresses in its TLB cache, we must
1854 reset the modified entries */
1855 /* XXX: slow ! */
1856 CPU_FOREACH(cpu) {
1857 /* FIXME: Disentangle the cpu.h circular files deps so we can
1858 directly get the right CPU from listener. */
1859 if (cpu->tcg_as_listener != listener) {
1860 continue;
1861 }
1862 tlb_flush(cpu, 1);
1863 }
1864 }
1865
1866 static void core_log_global_start(MemoryListener *listener)
1867 {
1868 cpu_physical_memory_set_dirty_tracking(true);
1869 }
1870
1871 static void core_log_global_stop(MemoryListener *listener)
1872 {
1873 cpu_physical_memory_set_dirty_tracking(false);
1874 }
1875
1876 static MemoryListener core_memory_listener = {
1877 .log_global_start = core_log_global_start,
1878 .log_global_stop = core_log_global_stop,
1879 .priority = 1,
1880 };
1881
1882 void address_space_init_dispatch(AddressSpace *as)
1883 {
1884 as->dispatch = NULL;
1885 as->dispatch_listener = (MemoryListener) {
1886 .begin = mem_begin,
1887 .commit = mem_commit,
1888 .region_add = mem_add,
1889 .region_nop = mem_add,
1890 .priority = 0,
1891 };
1892 memory_listener_register(&as->dispatch_listener, as);
1893 }
1894
1895 void address_space_destroy_dispatch(AddressSpace *as)
1896 {
1897 AddressSpaceDispatch *d = as->dispatch;
1898
1899 memory_listener_unregister(&as->dispatch_listener);
1900 g_free(d);
1901 as->dispatch = NULL;
1902 }
1903
1904 static void memory_map_init(void)
1905 {
1906 system_memory = g_malloc(sizeof(*system_memory));
1907
1908 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1909 address_space_init(&address_space_memory, system_memory, "memory");
1910
1911 system_io = g_malloc(sizeof(*system_io));
1912 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1913 65536);
1914 address_space_init(&address_space_io, system_io, "I/O");
1915
1916 memory_listener_register(&core_memory_listener, &address_space_memory);
1917 }
1918
1919 MemoryRegion *get_system_memory(void)
1920 {
1921 return system_memory;
1922 }
1923
1924 MemoryRegion *get_system_io(void)
1925 {
1926 return system_io;
1927 }
1928
1929 #endif /* !defined(CONFIG_USER_ONLY) */
1930
1931 /* physical memory access (slow version, mainly for debug) */
1932 #if defined(CONFIG_USER_ONLY)
1933 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1934 uint8_t *buf, int len, int is_write)
1935 {
1936 int l, flags;
1937 target_ulong page;
1938 void * p;
1939
1940 while (len > 0) {
1941 page = addr & TARGET_PAGE_MASK;
1942 l = (page + TARGET_PAGE_SIZE) - addr;
1943 if (l > len)
1944 l = len;
1945 flags = page_get_flags(page);
1946 if (!(flags & PAGE_VALID))
1947 return -1;
1948 if (is_write) {
1949 if (!(flags & PAGE_WRITE))
1950 return -1;
1951 /* XXX: this code should not depend on lock_user */
1952 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1953 return -1;
1954 memcpy(p, buf, l);
1955 unlock_user(p, addr, l);
1956 } else {
1957 if (!(flags & PAGE_READ))
1958 return -1;
1959 /* XXX: this code should not depend on lock_user */
1960 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1961 return -1;
1962 memcpy(buf, p, l);
1963 unlock_user(p, addr, 0);
1964 }
1965 len -= l;
1966 buf += l;
1967 addr += l;
1968 }
1969 return 0;
1970 }
1971
1972 #else
1973
1974 static void invalidate_and_set_dirty(hwaddr addr,
1975 hwaddr length)
1976 {
1977 if (cpu_physical_memory_is_clean(addr)) {
1978 /* invalidate code */
1979 tb_invalidate_phys_page_range(addr, addr + length, 0);
1980 /* set dirty bit */
1981 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1982 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1983 }
1984 xen_modified_memory(addr, length);
1985 }
1986
1987 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1988 {
1989 unsigned access_size_max = mr->ops->valid.max_access_size;
1990
1991 /* Regions are assumed to support 1-4 byte accesses unless
1992 otherwise specified. */
1993 if (access_size_max == 0) {
1994 access_size_max = 4;
1995 }
1996
1997 /* Bound the maximum access by the alignment of the address. */
1998 if (!mr->ops->impl.unaligned) {
1999 unsigned align_size_max = addr & -addr;
2000 if (align_size_max != 0 && align_size_max < access_size_max) {
2001 access_size_max = align_size_max;
2002 }
2003 }
2004
2005 /* Don't attempt accesses larger than the maximum. */
2006 if (l > access_size_max) {
2007 l = access_size_max;
2008 }
2009 if (l & (l - 1)) {
2010 l = 1 << (qemu_fls(l) - 1);
2011 }
2012
2013 return l;
2014 }
2015
2016 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2017 int len, bool is_write)
2018 {
2019 hwaddr l;
2020 uint8_t *ptr;
2021 uint64_t val;
2022 hwaddr addr1;
2023 MemoryRegion *mr;
2024 bool error = false;
2025
2026 while (len > 0) {
2027 l = len;
2028 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2029
2030 if (is_write) {
2031 if (!memory_access_is_direct(mr, is_write)) {
2032 l = memory_access_size(mr, l, addr1);
2033 /* XXX: could force current_cpu to NULL to avoid
2034 potential bugs */
2035 switch (l) {
2036 case 8:
2037 /* 64 bit write access */
2038 val = ldq_p(buf);
2039 error |= io_mem_write(mr, addr1, val, 8);
2040 break;
2041 case 4:
2042 /* 32 bit write access */
2043 val = ldl_p(buf);
2044 error |= io_mem_write(mr, addr1, val, 4);
2045 break;
2046 case 2:
2047 /* 16 bit write access */
2048 val = lduw_p(buf);
2049 error |= io_mem_write(mr, addr1, val, 2);
2050 break;
2051 case 1:
2052 /* 8 bit write access */
2053 val = ldub_p(buf);
2054 error |= io_mem_write(mr, addr1, val, 1);
2055 break;
2056 default:
2057 abort();
2058 }
2059 } else {
2060 addr1 += memory_region_get_ram_addr(mr);
2061 /* RAM case */
2062 ptr = qemu_get_ram_ptr(addr1);
2063 memcpy(ptr, buf, l);
2064 invalidate_and_set_dirty(addr1, l);
2065 }
2066 } else {
2067 if (!memory_access_is_direct(mr, is_write)) {
2068 /* I/O case */
2069 l = memory_access_size(mr, l, addr1);
2070 switch (l) {
2071 case 8:
2072 /* 64 bit read access */
2073 error |= io_mem_read(mr, addr1, &val, 8);
2074 stq_p(buf, val);
2075 break;
2076 case 4:
2077 /* 32 bit read access */
2078 error |= io_mem_read(mr, addr1, &val, 4);
2079 stl_p(buf, val);
2080 break;
2081 case 2:
2082 /* 16 bit read access */
2083 error |= io_mem_read(mr, addr1, &val, 2);
2084 stw_p(buf, val);
2085 break;
2086 case 1:
2087 /* 8 bit read access */
2088 error |= io_mem_read(mr, addr1, &val, 1);
2089 stb_p(buf, val);
2090 break;
2091 default:
2092 abort();
2093 }
2094 } else {
2095 /* RAM case */
2096 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2097 memcpy(buf, ptr, l);
2098 }
2099 }
2100 len -= l;
2101 buf += l;
2102 addr += l;
2103 }
2104
2105 return error;
2106 }
2107
2108 bool address_space_write(AddressSpace *as, hwaddr addr,
2109 const uint8_t *buf, int len)
2110 {
2111 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2112 }
2113
2114 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2115 {
2116 return address_space_rw(as, addr, buf, len, false);
2117 }
2118
2119
2120 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2121 int len, int is_write)
2122 {
2123 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2124 }
2125
2126 enum write_rom_type {
2127 WRITE_DATA,
2128 FLUSH_CACHE,
2129 };
2130
2131 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2132 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2133 {
2134 hwaddr l;
2135 uint8_t *ptr;
2136 hwaddr addr1;
2137 MemoryRegion *mr;
2138
2139 while (len > 0) {
2140 l = len;
2141 mr = address_space_translate(as, addr, &addr1, &l, true);
2142
2143 if (!(memory_region_is_ram(mr) ||
2144 memory_region_is_romd(mr))) {
2145 /* do nothing */
2146 } else {
2147 addr1 += memory_region_get_ram_addr(mr);
2148 /* ROM/RAM case */
2149 ptr = qemu_get_ram_ptr(addr1);
2150 switch (type) {
2151 case WRITE_DATA:
2152 memcpy(ptr, buf, l);
2153 invalidate_and_set_dirty(addr1, l);
2154 break;
2155 case FLUSH_CACHE:
2156 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2157 break;
2158 }
2159 }
2160 len -= l;
2161 buf += l;
2162 addr += l;
2163 }
2164 }
2165
2166 /* used for ROM loading : can write in RAM and ROM */
2167 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2168 const uint8_t *buf, int len)
2169 {
2170 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2171 }
2172
2173 void cpu_flush_icache_range(hwaddr start, int len)
2174 {
2175 /*
2176 * This function should do the same thing as an icache flush that was
2177 * triggered from within the guest. For TCG we are always cache coherent,
2178 * so there is no need to flush anything. For KVM / Xen we need to flush
2179 * the host's instruction cache at least.
2180 */
2181 if (tcg_enabled()) {
2182 return;
2183 }
2184
2185 cpu_physical_memory_write_rom_internal(&address_space_memory,
2186 start, NULL, len, FLUSH_CACHE);
2187 }
2188
2189 typedef struct {
2190 MemoryRegion *mr;
2191 void *buffer;
2192 hwaddr addr;
2193 hwaddr len;
2194 } BounceBuffer;
2195
2196 static BounceBuffer bounce;
2197
2198 typedef struct MapClient {
2199 void *opaque;
2200 void (*callback)(void *opaque);
2201 QLIST_ENTRY(MapClient) link;
2202 } MapClient;
2203
2204 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2205 = QLIST_HEAD_INITIALIZER(map_client_list);
2206
2207 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2208 {
2209 MapClient *client = g_malloc(sizeof(*client));
2210
2211 client->opaque = opaque;
2212 client->callback = callback;
2213 QLIST_INSERT_HEAD(&map_client_list, client, link);
2214 return client;
2215 }
2216
2217 static void cpu_unregister_map_client(void *_client)
2218 {
2219 MapClient *client = (MapClient *)_client;
2220
2221 QLIST_REMOVE(client, link);
2222 g_free(client);
2223 }
2224
2225 static void cpu_notify_map_clients(void)
2226 {
2227 MapClient *client;
2228
2229 while (!QLIST_EMPTY(&map_client_list)) {
2230 client = QLIST_FIRST(&map_client_list);
2231 client->callback(client->opaque);
2232 cpu_unregister_map_client(client);
2233 }
2234 }
2235
2236 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2237 {
2238 MemoryRegion *mr;
2239 hwaddr l, xlat;
2240
2241 while (len > 0) {
2242 l = len;
2243 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2244 if (!memory_access_is_direct(mr, is_write)) {
2245 l = memory_access_size(mr, l, addr);
2246 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2247 return false;
2248 }
2249 }
2250
2251 len -= l;
2252 addr += l;
2253 }
2254 return true;
2255 }
2256
2257 /* Map a physical memory region into a host virtual address.
2258 * May map a subset of the requested range, given by and returned in *plen.
2259 * May return NULL if resources needed to perform the mapping are exhausted.
2260 * Use only for reads OR writes - not for read-modify-write operations.
2261 * Use cpu_register_map_client() to know when retrying the map operation is
2262 * likely to succeed.
2263 */
2264 void *address_space_map(AddressSpace *as,
2265 hwaddr addr,
2266 hwaddr *plen,
2267 bool is_write)
2268 {
2269 hwaddr len = *plen;
2270 hwaddr done = 0;
2271 hwaddr l, xlat, base;
2272 MemoryRegion *mr, *this_mr;
2273 ram_addr_t raddr;
2274
2275 if (len == 0) {
2276 return NULL;
2277 }
2278
2279 l = len;
2280 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2281 if (!memory_access_is_direct(mr, is_write)) {
2282 if (bounce.buffer) {
2283 return NULL;
2284 }
2285 /* Avoid unbounded allocations */
2286 l = MIN(l, TARGET_PAGE_SIZE);
2287 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2288 bounce.addr = addr;
2289 bounce.len = l;
2290
2291 memory_region_ref(mr);
2292 bounce.mr = mr;
2293 if (!is_write) {
2294 address_space_read(as, addr, bounce.buffer, l);
2295 }
2296
2297 *plen = l;
2298 return bounce.buffer;
2299 }
2300
2301 base = xlat;
2302 raddr = memory_region_get_ram_addr(mr);
2303
2304 for (;;) {
2305 len -= l;
2306 addr += l;
2307 done += l;
2308 if (len == 0) {
2309 break;
2310 }
2311
2312 l = len;
2313 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2314 if (this_mr != mr || xlat != base + done) {
2315 break;
2316 }
2317 }
2318
2319 memory_region_ref(mr);
2320 *plen = done;
2321 return qemu_ram_ptr_length(raddr + base, plen);
2322 }
2323
2324 /* Unmaps a memory region previously mapped by address_space_map().
2325 * Will also mark the memory as dirty if is_write == 1. access_len gives
2326 * the amount of memory that was actually read or written by the caller.
2327 */
2328 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2329 int is_write, hwaddr access_len)
2330 {
2331 if (buffer != bounce.buffer) {
2332 MemoryRegion *mr;
2333 ram_addr_t addr1;
2334
2335 mr = qemu_ram_addr_from_host(buffer, &addr1);
2336 assert(mr != NULL);
2337 if (is_write) {
2338 while (access_len) {
2339 unsigned l;
2340 l = TARGET_PAGE_SIZE;
2341 if (l > access_len)
2342 l = access_len;
2343 invalidate_and_set_dirty(addr1, l);
2344 addr1 += l;
2345 access_len -= l;
2346 }
2347 }
2348 if (xen_enabled()) {
2349 xen_invalidate_map_cache_entry(buffer);
2350 }
2351 memory_region_unref(mr);
2352 return;
2353 }
2354 if (is_write) {
2355 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2356 }
2357 qemu_vfree(bounce.buffer);
2358 bounce.buffer = NULL;
2359 memory_region_unref(bounce.mr);
2360 cpu_notify_map_clients();
2361 }
2362
2363 void *cpu_physical_memory_map(hwaddr addr,
2364 hwaddr *plen,
2365 int is_write)
2366 {
2367 return address_space_map(&address_space_memory, addr, plen, is_write);
2368 }
2369
2370 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2371 int is_write, hwaddr access_len)
2372 {
2373 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2374 }
2375
2376 /* warning: addr must be aligned */
2377 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2378 enum device_endian endian)
2379 {
2380 uint8_t *ptr;
2381 uint64_t val;
2382 MemoryRegion *mr;
2383 hwaddr l = 4;
2384 hwaddr addr1;
2385
2386 mr = address_space_translate(as, addr, &addr1, &l, false);
2387 if (l < 4 || !memory_access_is_direct(mr, false)) {
2388 /* I/O case */
2389 io_mem_read(mr, addr1, &val, 4);
2390 #if defined(TARGET_WORDS_BIGENDIAN)
2391 if (endian == DEVICE_LITTLE_ENDIAN) {
2392 val = bswap32(val);
2393 }
2394 #else
2395 if (endian == DEVICE_BIG_ENDIAN) {
2396 val = bswap32(val);
2397 }
2398 #endif
2399 } else {
2400 /* RAM case */
2401 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2402 & TARGET_PAGE_MASK)
2403 + addr1);
2404 switch (endian) {
2405 case DEVICE_LITTLE_ENDIAN:
2406 val = ldl_le_p(ptr);
2407 break;
2408 case DEVICE_BIG_ENDIAN:
2409 val = ldl_be_p(ptr);
2410 break;
2411 default:
2412 val = ldl_p(ptr);
2413 break;
2414 }
2415 }
2416 return val;
2417 }
2418
2419 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2420 {
2421 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2422 }
2423
2424 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2425 {
2426 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2427 }
2428
2429 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2430 {
2431 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2432 }
2433
2434 /* warning: addr must be aligned */
2435 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2436 enum device_endian endian)
2437 {
2438 uint8_t *ptr;
2439 uint64_t val;
2440 MemoryRegion *mr;
2441 hwaddr l = 8;
2442 hwaddr addr1;
2443
2444 mr = address_space_translate(as, addr, &addr1, &l,
2445 false);
2446 if (l < 8 || !memory_access_is_direct(mr, false)) {
2447 /* I/O case */
2448 io_mem_read(mr, addr1, &val, 8);
2449 #if defined(TARGET_WORDS_BIGENDIAN)
2450 if (endian == DEVICE_LITTLE_ENDIAN) {
2451 val = bswap64(val);
2452 }
2453 #else
2454 if (endian == DEVICE_BIG_ENDIAN) {
2455 val = bswap64(val);
2456 }
2457 #endif
2458 } else {
2459 /* RAM case */
2460 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2461 & TARGET_PAGE_MASK)
2462 + addr1);
2463 switch (endian) {
2464 case DEVICE_LITTLE_ENDIAN:
2465 val = ldq_le_p(ptr);
2466 break;
2467 case DEVICE_BIG_ENDIAN:
2468 val = ldq_be_p(ptr);
2469 break;
2470 default:
2471 val = ldq_p(ptr);
2472 break;
2473 }
2474 }
2475 return val;
2476 }
2477
2478 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2479 {
2480 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2481 }
2482
2483 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2484 {
2485 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2486 }
2487
2488 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2489 {
2490 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2491 }
2492
2493 /* XXX: optimize */
2494 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2495 {
2496 uint8_t val;
2497 address_space_rw(as, addr, &val, 1, 0);
2498 return val;
2499 }
2500
2501 /* warning: addr must be aligned */
2502 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2503 enum device_endian endian)
2504 {
2505 uint8_t *ptr;
2506 uint64_t val;
2507 MemoryRegion *mr;
2508 hwaddr l = 2;
2509 hwaddr addr1;
2510
2511 mr = address_space_translate(as, addr, &addr1, &l,
2512 false);
2513 if (l < 2 || !memory_access_is_direct(mr, false)) {
2514 /* I/O case */
2515 io_mem_read(mr, addr1, &val, 2);
2516 #if defined(TARGET_WORDS_BIGENDIAN)
2517 if (endian == DEVICE_LITTLE_ENDIAN) {
2518 val = bswap16(val);
2519 }
2520 #else
2521 if (endian == DEVICE_BIG_ENDIAN) {
2522 val = bswap16(val);
2523 }
2524 #endif
2525 } else {
2526 /* RAM case */
2527 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2528 & TARGET_PAGE_MASK)
2529 + addr1);
2530 switch (endian) {
2531 case DEVICE_LITTLE_ENDIAN:
2532 val = lduw_le_p(ptr);
2533 break;
2534 case DEVICE_BIG_ENDIAN:
2535 val = lduw_be_p(ptr);
2536 break;
2537 default:
2538 val = lduw_p(ptr);
2539 break;
2540 }
2541 }
2542 return val;
2543 }
2544
2545 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2546 {
2547 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2548 }
2549
2550 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2551 {
2552 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2553 }
2554
2555 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2556 {
2557 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2558 }
2559
2560 /* warning: addr must be aligned. The ram page is not masked as dirty
2561 and the code inside is not invalidated. It is useful if the dirty
2562 bits are used to track modified PTEs */
2563 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2564 {
2565 uint8_t *ptr;
2566 MemoryRegion *mr;
2567 hwaddr l = 4;
2568 hwaddr addr1;
2569
2570 mr = address_space_translate(as, addr, &addr1, &l,
2571 true);
2572 if (l < 4 || !memory_access_is_direct(mr, true)) {
2573 io_mem_write(mr, addr1, val, 4);
2574 } else {
2575 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2576 ptr = qemu_get_ram_ptr(addr1);
2577 stl_p(ptr, val);
2578
2579 if (unlikely(in_migration)) {
2580 if (cpu_physical_memory_is_clean(addr1)) {
2581 /* invalidate code */
2582 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2583 /* set dirty bit */
2584 cpu_physical_memory_set_dirty_flag(addr1,
2585 DIRTY_MEMORY_MIGRATION);
2586 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2587 }
2588 }
2589 }
2590 }
2591
2592 /* warning: addr must be aligned */
2593 static inline void stl_phys_internal(AddressSpace *as,
2594 hwaddr addr, uint32_t val,
2595 enum device_endian endian)
2596 {
2597 uint8_t *ptr;
2598 MemoryRegion *mr;
2599 hwaddr l = 4;
2600 hwaddr addr1;
2601
2602 mr = address_space_translate(as, addr, &addr1, &l,
2603 true);
2604 if (l < 4 || !memory_access_is_direct(mr, true)) {
2605 #if defined(TARGET_WORDS_BIGENDIAN)
2606 if (endian == DEVICE_LITTLE_ENDIAN) {
2607 val = bswap32(val);
2608 }
2609 #else
2610 if (endian == DEVICE_BIG_ENDIAN) {
2611 val = bswap32(val);
2612 }
2613 #endif
2614 io_mem_write(mr, addr1, val, 4);
2615 } else {
2616 /* RAM case */
2617 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2618 ptr = qemu_get_ram_ptr(addr1);
2619 switch (endian) {
2620 case DEVICE_LITTLE_ENDIAN:
2621 stl_le_p(ptr, val);
2622 break;
2623 case DEVICE_BIG_ENDIAN:
2624 stl_be_p(ptr, val);
2625 break;
2626 default:
2627 stl_p(ptr, val);
2628 break;
2629 }
2630 invalidate_and_set_dirty(addr1, 4);
2631 }
2632 }
2633
2634 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2635 {
2636 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2637 }
2638
2639 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2640 {
2641 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2642 }
2643
2644 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2645 {
2646 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2647 }
2648
2649 /* XXX: optimize */
2650 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2651 {
2652 uint8_t v = val;
2653 address_space_rw(as, addr, &v, 1, 1);
2654 }
2655
2656 /* warning: addr must be aligned */
2657 static inline void stw_phys_internal(AddressSpace *as,
2658 hwaddr addr, uint32_t val,
2659 enum device_endian endian)
2660 {
2661 uint8_t *ptr;
2662 MemoryRegion *mr;
2663 hwaddr l = 2;
2664 hwaddr addr1;
2665
2666 mr = address_space_translate(as, addr, &addr1, &l, true);
2667 if (l < 2 || !memory_access_is_direct(mr, true)) {
2668 #if defined(TARGET_WORDS_BIGENDIAN)
2669 if (endian == DEVICE_LITTLE_ENDIAN) {
2670 val = bswap16(val);
2671 }
2672 #else
2673 if (endian == DEVICE_BIG_ENDIAN) {
2674 val = bswap16(val);
2675 }
2676 #endif
2677 io_mem_write(mr, addr1, val, 2);
2678 } else {
2679 /* RAM case */
2680 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2681 ptr = qemu_get_ram_ptr(addr1);
2682 switch (endian) {
2683 case DEVICE_LITTLE_ENDIAN:
2684 stw_le_p(ptr, val);
2685 break;
2686 case DEVICE_BIG_ENDIAN:
2687 stw_be_p(ptr, val);
2688 break;
2689 default:
2690 stw_p(ptr, val);
2691 break;
2692 }
2693 invalidate_and_set_dirty(addr1, 2);
2694 }
2695 }
2696
2697 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2698 {
2699 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2700 }
2701
2702 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2703 {
2704 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2705 }
2706
2707 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2708 {
2709 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2710 }
2711
2712 /* XXX: optimize */
2713 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2714 {
2715 val = tswap64(val);
2716 address_space_rw(as, addr, (void *) &val, 8, 1);
2717 }
2718
2719 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2720 {
2721 val = cpu_to_le64(val);
2722 address_space_rw(as, addr, (void *) &val, 8, 1);
2723 }
2724
2725 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2726 {
2727 val = cpu_to_be64(val);
2728 address_space_rw(as, addr, (void *) &val, 8, 1);
2729 }
2730
2731 /* virtual memory access for debug (includes writing to ROM) */
2732 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2733 uint8_t *buf, int len, int is_write)
2734 {
2735 int l;
2736 hwaddr phys_addr;
2737 target_ulong page;
2738
2739 while (len > 0) {
2740 page = addr & TARGET_PAGE_MASK;
2741 phys_addr = cpu_get_phys_page_debug(cpu, page);
2742 /* if no physical page mapped, return an error */
2743 if (phys_addr == -1)
2744 return -1;
2745 l = (page + TARGET_PAGE_SIZE) - addr;
2746 if (l > len)
2747 l = len;
2748 phys_addr += (addr & ~TARGET_PAGE_MASK);
2749 if (is_write) {
2750 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2751 } else {
2752 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2753 }
2754 len -= l;
2755 buf += l;
2756 addr += l;
2757 }
2758 return 0;
2759 }
2760 #endif
2761
2762 /*
2763 * A helper function for the _utterly broken_ virtio device model to find out if
2764 * it's running on a big endian machine. Don't do this at home kids!
2765 */
2766 bool target_words_bigendian(void);
2767 bool target_words_bigendian(void)
2768 {
2769 #if defined(TARGET_WORDS_BIGENDIAN)
2770 return true;
2771 #else
2772 return false;
2773 #endif
2774 }
2775
2776 #ifndef CONFIG_USER_ONLY
2777 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2778 {
2779 MemoryRegion*mr;
2780 hwaddr l = 1;
2781
2782 mr = address_space_translate(&address_space_memory,
2783 phys_addr, &phys_addr, &l, false);
2784
2785 return !(memory_region_is_ram(mr) ||
2786 memory_region_is_romd(mr));
2787 }
2788
2789 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2790 {
2791 RAMBlock *block;
2792
2793 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2794 func(block->host, block->offset, block->length, opaque);
2795 }
2796 }
2797 #endif