]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec: Change memory_region_section_get_iotlb() argument to CPUState
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 #endif
74
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
83
84 #if !defined(CONFIG_USER_ONLY)
85
86 typedef struct PhysPageEntry PhysPageEntry;
87
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
93 };
94
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
99
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
102
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104
105 typedef PhysPageEntry Node[P_L2_SIZE];
106
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
115
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
119 */
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
123 };
124
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
132
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
137
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
141
142 static MemoryRegion io_mem_watch;
143 #endif
144
145 #if !defined(CONFIG_USER_ONLY)
146
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148 {
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153 }
154 }
155
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
157 {
158 unsigned i;
159 uint32_t ret;
160
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167 }
168 return ret;
169 }
170
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
174 {
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
186 }
187 }
188 } else {
189 p = map->nodes[lp->ptr];
190 }
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 }
202 ++lp;
203 }
204 }
205
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
209 {
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
214 }
215
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
218 */
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
220 {
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
225
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
228 }
229
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
234 }
235
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
240 }
241 }
242
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
246 }
247
248 assert(valid_ptr < P_L2_SIZE);
249
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
253 }
254
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
262 */
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
266 }
267 }
268
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
270 {
271 DECLARE_BITMAP(compacted, nodes_nb);
272
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 }
276 }
277
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
280 {
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
284
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
288 }
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291 }
292
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
299 }
300 }
301
302 bool memory_region_is_unassigned(MemoryRegion *mr)
303 {
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
306 }
307
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
311 {
312 MemoryRegionSection *section;
313 subpage_t *subpage;
314
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 }
320 return section;
321 }
322
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
326 {
327 MemoryRegionSection *section;
328 Int128 diff;
329
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
333
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
336
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
340 }
341
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
343 {
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
346 }
347 if (memory_region_is_romd(mr)) {
348 return !is_write;
349 }
350
351 return false;
352 }
353
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
356 bool is_write)
357 {
358 IOMMUTLBEntry iotlb;
359 MemoryRegionSection *section;
360 MemoryRegion *mr;
361 hwaddr len = *plen;
362
363 for (;;) {
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
365 mr = section->mr;
366
367 if (!mr->iommu_ops) {
368 break;
369 }
370
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
377 break;
378 }
379
380 as = iotlb.target_as;
381 }
382
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
386 }
387
388 *plen = len;
389 *xlat = addr;
390 return mr;
391 }
392
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
395 hwaddr *plen)
396 {
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
399
400 assert(!section->mr->iommu_ops);
401 return section;
402 }
403 #endif
404
405 void cpu_exec_init_all(void)
406 {
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
409 memory_map_init();
410 io_mem_init();
411 #endif
412 }
413
414 #if !defined(CONFIG_USER_ONLY)
415
416 static int cpu_common_post_load(void *opaque, int version_id)
417 {
418 CPUState *cpu = opaque;
419
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu->env_ptr, 1);
424
425 return 0;
426 }
427
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
430 .version_id = 1,
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
438 }
439 };
440
441 #endif
442
443 CPUState *qemu_get_cpu(int index)
444 {
445 CPUState *cpu;
446
447 CPU_FOREACH(cpu) {
448 if (cpu->cpu_index == index) {
449 return cpu;
450 }
451 }
452
453 return NULL;
454 }
455
456 #if !defined(CONFIG_USER_ONLY)
457 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
458 {
459 /* We only support one address space per cpu at the moment. */
460 assert(cpu->as == as);
461
462 if (cpu->tcg_as_listener) {
463 memory_listener_unregister(cpu->tcg_as_listener);
464 } else {
465 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
466 }
467 cpu->tcg_as_listener->commit = tcg_commit;
468 memory_listener_register(cpu->tcg_as_listener, as);
469 }
470 #endif
471
472 void cpu_exec_init(CPUArchState *env)
473 {
474 CPUState *cpu = ENV_GET_CPU(env);
475 CPUClass *cc = CPU_GET_CLASS(cpu);
476 CPUState *some_cpu;
477 int cpu_index;
478
479 #if defined(CONFIG_USER_ONLY)
480 cpu_list_lock();
481 #endif
482 cpu_index = 0;
483 CPU_FOREACH(some_cpu) {
484 cpu_index++;
485 }
486 cpu->cpu_index = cpu_index;
487 cpu->numa_node = 0;
488 QTAILQ_INIT(&cpu->breakpoints);
489 QTAILQ_INIT(&cpu->watchpoints);
490 #ifndef CONFIG_USER_ONLY
491 cpu->as = &address_space_memory;
492 cpu->thread_id = qemu_get_thread_id();
493 #endif
494 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
495 #if defined(CONFIG_USER_ONLY)
496 cpu_list_unlock();
497 #endif
498 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
499 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
500 }
501 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
502 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
503 cpu_save, cpu_load, env);
504 assert(cc->vmsd == NULL);
505 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
506 #endif
507 if (cc->vmsd != NULL) {
508 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
509 }
510 }
511
512 #if defined(TARGET_HAS_ICE)
513 #if defined(CONFIG_USER_ONLY)
514 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
515 {
516 tb_invalidate_phys_page_range(pc, pc + 1, 0);
517 }
518 #else
519 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
520 {
521 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
522 if (phys != -1) {
523 tb_invalidate_phys_addr(cpu->as,
524 phys | (pc & ~TARGET_PAGE_MASK));
525 }
526 }
527 #endif
528 #endif /* TARGET_HAS_ICE */
529
530 #if defined(CONFIG_USER_ONLY)
531 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
532
533 {
534 }
535
536 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
537 int flags, CPUWatchpoint **watchpoint)
538 {
539 return -ENOSYS;
540 }
541 #else
542 /* Add a watchpoint. */
543 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
544 int flags, CPUWatchpoint **watchpoint)
545 {
546 CPUArchState *env = cpu->env_ptr;
547 vaddr len_mask = ~(len - 1);
548 CPUWatchpoint *wp;
549
550 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
551 if ((len & (len - 1)) || (addr & ~len_mask) ||
552 len == 0 || len > TARGET_PAGE_SIZE) {
553 error_report("tried to set invalid watchpoint at %"
554 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
555 return -EINVAL;
556 }
557 wp = g_malloc(sizeof(*wp));
558
559 wp->vaddr = addr;
560 wp->len_mask = len_mask;
561 wp->flags = flags;
562
563 /* keep all GDB-injected watchpoints in front */
564 if (flags & BP_GDB) {
565 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
566 } else {
567 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
568 }
569
570 tlb_flush_page(env, addr);
571
572 if (watchpoint)
573 *watchpoint = wp;
574 return 0;
575 }
576
577 /* Remove a specific watchpoint. */
578 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
579 int flags)
580 {
581 vaddr len_mask = ~(len - 1);
582 CPUWatchpoint *wp;
583
584 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
585 if (addr == wp->vaddr && len_mask == wp->len_mask
586 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
587 cpu_watchpoint_remove_by_ref(cpu, wp);
588 return 0;
589 }
590 }
591 return -ENOENT;
592 }
593
594 /* Remove a specific watchpoint by reference. */
595 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
596 {
597 CPUArchState *env = cpu->env_ptr;
598
599 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
600
601 tlb_flush_page(env, watchpoint->vaddr);
602
603 g_free(watchpoint);
604 }
605
606 /* Remove all matching watchpoints. */
607 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
608 {
609 CPUWatchpoint *wp, *next;
610
611 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
612 if (wp->flags & mask) {
613 cpu_watchpoint_remove_by_ref(cpu, wp);
614 }
615 }
616 }
617 #endif
618
619 /* Add a breakpoint. */
620 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
621 CPUBreakpoint **breakpoint)
622 {
623 #if defined(TARGET_HAS_ICE)
624 CPUBreakpoint *bp;
625
626 bp = g_malloc(sizeof(*bp));
627
628 bp->pc = pc;
629 bp->flags = flags;
630
631 /* keep all GDB-injected breakpoints in front */
632 if (flags & BP_GDB) {
633 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
634 } else {
635 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
636 }
637
638 breakpoint_invalidate(cpu, pc);
639
640 if (breakpoint) {
641 *breakpoint = bp;
642 }
643 return 0;
644 #else
645 return -ENOSYS;
646 #endif
647 }
648
649 /* Remove a specific breakpoint. */
650 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
651 {
652 #if defined(TARGET_HAS_ICE)
653 CPUBreakpoint *bp;
654
655 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
656 if (bp->pc == pc && bp->flags == flags) {
657 cpu_breakpoint_remove_by_ref(cpu, bp);
658 return 0;
659 }
660 }
661 return -ENOENT;
662 #else
663 return -ENOSYS;
664 #endif
665 }
666
667 /* Remove a specific breakpoint by reference. */
668 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
669 {
670 #if defined(TARGET_HAS_ICE)
671 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
672
673 breakpoint_invalidate(cpu, breakpoint->pc);
674
675 g_free(breakpoint);
676 #endif
677 }
678
679 /* Remove all matching breakpoints. */
680 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
681 {
682 #if defined(TARGET_HAS_ICE)
683 CPUBreakpoint *bp, *next;
684
685 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
686 if (bp->flags & mask) {
687 cpu_breakpoint_remove_by_ref(cpu, bp);
688 }
689 }
690 #endif
691 }
692
693 /* enable or disable single step mode. EXCP_DEBUG is returned by the
694 CPU loop after each instruction */
695 void cpu_single_step(CPUState *cpu, int enabled)
696 {
697 #if defined(TARGET_HAS_ICE)
698 if (cpu->singlestep_enabled != enabled) {
699 cpu->singlestep_enabled = enabled;
700 if (kvm_enabled()) {
701 kvm_update_guest_debug(cpu, 0);
702 } else {
703 /* must flush all the translated code to avoid inconsistencies */
704 /* XXX: only flush what is necessary */
705 CPUArchState *env = cpu->env_ptr;
706 tb_flush(env);
707 }
708 }
709 #endif
710 }
711
712 void cpu_abort(CPUArchState *env, const char *fmt, ...)
713 {
714 CPUState *cpu = ENV_GET_CPU(env);
715 va_list ap;
716 va_list ap2;
717
718 va_start(ap, fmt);
719 va_copy(ap2, ap);
720 fprintf(stderr, "qemu: fatal: ");
721 vfprintf(stderr, fmt, ap);
722 fprintf(stderr, "\n");
723 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724 if (qemu_log_enabled()) {
725 qemu_log("qemu: fatal: ");
726 qemu_log_vprintf(fmt, ap2);
727 qemu_log("\n");
728 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
729 qemu_log_flush();
730 qemu_log_close();
731 }
732 va_end(ap2);
733 va_end(ap);
734 #if defined(CONFIG_USER_ONLY)
735 {
736 struct sigaction act;
737 sigfillset(&act.sa_mask);
738 act.sa_handler = SIG_DFL;
739 sigaction(SIGABRT, &act, NULL);
740 }
741 #endif
742 abort();
743 }
744
745 #if !defined(CONFIG_USER_ONLY)
746 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
747 {
748 RAMBlock *block;
749
750 /* The list is protected by the iothread lock here. */
751 block = ram_list.mru_block;
752 if (block && addr - block->offset < block->length) {
753 goto found;
754 }
755 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
756 if (addr - block->offset < block->length) {
757 goto found;
758 }
759 }
760
761 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
762 abort();
763
764 found:
765 ram_list.mru_block = block;
766 return block;
767 }
768
769 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
770 {
771 ram_addr_t start1;
772 RAMBlock *block;
773 ram_addr_t end;
774
775 end = TARGET_PAGE_ALIGN(start + length);
776 start &= TARGET_PAGE_MASK;
777
778 block = qemu_get_ram_block(start);
779 assert(block == qemu_get_ram_block(end - 1));
780 start1 = (uintptr_t)block->host + (start - block->offset);
781 cpu_tlb_reset_dirty_all(start1, length);
782 }
783
784 /* Note: start and end must be within the same ram block. */
785 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
786 unsigned client)
787 {
788 if (length == 0)
789 return;
790 cpu_physical_memory_clear_dirty_range(start, length, client);
791
792 if (tcg_enabled()) {
793 tlb_reset_dirty_range_all(start, length);
794 }
795 }
796
797 static void cpu_physical_memory_set_dirty_tracking(bool enable)
798 {
799 in_migration = enable;
800 }
801
802 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
803 MemoryRegionSection *section,
804 target_ulong vaddr,
805 hwaddr paddr, hwaddr xlat,
806 int prot,
807 target_ulong *address)
808 {
809 hwaddr iotlb;
810 CPUWatchpoint *wp;
811
812 if (memory_region_is_ram(section->mr)) {
813 /* Normal RAM. */
814 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
815 + xlat;
816 if (!section->readonly) {
817 iotlb |= PHYS_SECTION_NOTDIRTY;
818 } else {
819 iotlb |= PHYS_SECTION_ROM;
820 }
821 } else {
822 iotlb = section - section->address_space->dispatch->map.sections;
823 iotlb += xlat;
824 }
825
826 /* Make accesses to pages with watchpoints go via the
827 watchpoint trap routines. */
828 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
829 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
830 /* Avoid trapping reads of pages with a write breakpoint. */
831 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
832 iotlb = PHYS_SECTION_WATCH + paddr;
833 *address |= TLB_MMIO;
834 break;
835 }
836 }
837 }
838
839 return iotlb;
840 }
841 #endif /* defined(CONFIG_USER_ONLY) */
842
843 #if !defined(CONFIG_USER_ONLY)
844
845 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
846 uint16_t section);
847 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
848
849 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
850
851 /*
852 * Set a custom physical guest memory alloator.
853 * Accelerators with unusual needs may need this. Hopefully, we can
854 * get rid of it eventually.
855 */
856 void phys_mem_set_alloc(void *(*alloc)(size_t))
857 {
858 phys_mem_alloc = alloc;
859 }
860
861 static uint16_t phys_section_add(PhysPageMap *map,
862 MemoryRegionSection *section)
863 {
864 /* The physical section number is ORed with a page-aligned
865 * pointer to produce the iotlb entries. Thus it should
866 * never overflow into the page-aligned value.
867 */
868 assert(map->sections_nb < TARGET_PAGE_SIZE);
869
870 if (map->sections_nb == map->sections_nb_alloc) {
871 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
872 map->sections = g_renew(MemoryRegionSection, map->sections,
873 map->sections_nb_alloc);
874 }
875 map->sections[map->sections_nb] = *section;
876 memory_region_ref(section->mr);
877 return map->sections_nb++;
878 }
879
880 static void phys_section_destroy(MemoryRegion *mr)
881 {
882 memory_region_unref(mr);
883
884 if (mr->subpage) {
885 subpage_t *subpage = container_of(mr, subpage_t, iomem);
886 memory_region_destroy(&subpage->iomem);
887 g_free(subpage);
888 }
889 }
890
891 static void phys_sections_free(PhysPageMap *map)
892 {
893 while (map->sections_nb > 0) {
894 MemoryRegionSection *section = &map->sections[--map->sections_nb];
895 phys_section_destroy(section->mr);
896 }
897 g_free(map->sections);
898 g_free(map->nodes);
899 }
900
901 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
902 {
903 subpage_t *subpage;
904 hwaddr base = section->offset_within_address_space
905 & TARGET_PAGE_MASK;
906 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
907 d->map.nodes, d->map.sections);
908 MemoryRegionSection subsection = {
909 .offset_within_address_space = base,
910 .size = int128_make64(TARGET_PAGE_SIZE),
911 };
912 hwaddr start, end;
913
914 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
915
916 if (!(existing->mr->subpage)) {
917 subpage = subpage_init(d->as, base);
918 subsection.address_space = d->as;
919 subsection.mr = &subpage->iomem;
920 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
921 phys_section_add(&d->map, &subsection));
922 } else {
923 subpage = container_of(existing->mr, subpage_t, iomem);
924 }
925 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
926 end = start + int128_get64(section->size) - 1;
927 subpage_register(subpage, start, end,
928 phys_section_add(&d->map, section));
929 }
930
931
932 static void register_multipage(AddressSpaceDispatch *d,
933 MemoryRegionSection *section)
934 {
935 hwaddr start_addr = section->offset_within_address_space;
936 uint16_t section_index = phys_section_add(&d->map, section);
937 uint64_t num_pages = int128_get64(int128_rshift(section->size,
938 TARGET_PAGE_BITS));
939
940 assert(num_pages);
941 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
942 }
943
944 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
945 {
946 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
947 AddressSpaceDispatch *d = as->next_dispatch;
948 MemoryRegionSection now = *section, remain = *section;
949 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
950
951 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
952 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
953 - now.offset_within_address_space;
954
955 now.size = int128_min(int128_make64(left), now.size);
956 register_subpage(d, &now);
957 } else {
958 now.size = int128_zero();
959 }
960 while (int128_ne(remain.size, now.size)) {
961 remain.size = int128_sub(remain.size, now.size);
962 remain.offset_within_address_space += int128_get64(now.size);
963 remain.offset_within_region += int128_get64(now.size);
964 now = remain;
965 if (int128_lt(remain.size, page_size)) {
966 register_subpage(d, &now);
967 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
968 now.size = page_size;
969 register_subpage(d, &now);
970 } else {
971 now.size = int128_and(now.size, int128_neg(page_size));
972 register_multipage(d, &now);
973 }
974 }
975 }
976
977 void qemu_flush_coalesced_mmio_buffer(void)
978 {
979 if (kvm_enabled())
980 kvm_flush_coalesced_mmio_buffer();
981 }
982
983 void qemu_mutex_lock_ramlist(void)
984 {
985 qemu_mutex_lock(&ram_list.mutex);
986 }
987
988 void qemu_mutex_unlock_ramlist(void)
989 {
990 qemu_mutex_unlock(&ram_list.mutex);
991 }
992
993 #ifdef __linux__
994
995 #include <sys/vfs.h>
996
997 #define HUGETLBFS_MAGIC 0x958458f6
998
999 static long gethugepagesize(const char *path)
1000 {
1001 struct statfs fs;
1002 int ret;
1003
1004 do {
1005 ret = statfs(path, &fs);
1006 } while (ret != 0 && errno == EINTR);
1007
1008 if (ret != 0) {
1009 perror(path);
1010 return 0;
1011 }
1012
1013 if (fs.f_type != HUGETLBFS_MAGIC)
1014 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1015
1016 return fs.f_bsize;
1017 }
1018
1019 static sigjmp_buf sigjump;
1020
1021 static void sigbus_handler(int signal)
1022 {
1023 siglongjmp(sigjump, 1);
1024 }
1025
1026 static void *file_ram_alloc(RAMBlock *block,
1027 ram_addr_t memory,
1028 const char *path)
1029 {
1030 char *filename;
1031 char *sanitized_name;
1032 char *c;
1033 void *area;
1034 int fd;
1035 unsigned long hpagesize;
1036
1037 hpagesize = gethugepagesize(path);
1038 if (!hpagesize) {
1039 goto error;
1040 }
1041
1042 if (memory < hpagesize) {
1043 return NULL;
1044 }
1045
1046 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1047 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1048 goto error;
1049 }
1050
1051 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1052 sanitized_name = g_strdup(block->mr->name);
1053 for (c = sanitized_name; *c != '\0'; c++) {
1054 if (*c == '/')
1055 *c = '_';
1056 }
1057
1058 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1059 sanitized_name);
1060 g_free(sanitized_name);
1061
1062 fd = mkstemp(filename);
1063 if (fd < 0) {
1064 perror("unable to create backing store for hugepages");
1065 g_free(filename);
1066 goto error;
1067 }
1068 unlink(filename);
1069 g_free(filename);
1070
1071 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1072
1073 /*
1074 * ftruncate is not supported by hugetlbfs in older
1075 * hosts, so don't bother bailing out on errors.
1076 * If anything goes wrong with it under other filesystems,
1077 * mmap will fail.
1078 */
1079 if (ftruncate(fd, memory))
1080 perror("ftruncate");
1081
1082 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1083 if (area == MAP_FAILED) {
1084 perror("file_ram_alloc: can't mmap RAM pages");
1085 close(fd);
1086 goto error;
1087 }
1088
1089 if (mem_prealloc) {
1090 int ret, i;
1091 struct sigaction act, oldact;
1092 sigset_t set, oldset;
1093
1094 memset(&act, 0, sizeof(act));
1095 act.sa_handler = &sigbus_handler;
1096 act.sa_flags = 0;
1097
1098 ret = sigaction(SIGBUS, &act, &oldact);
1099 if (ret) {
1100 perror("file_ram_alloc: failed to install signal handler");
1101 exit(1);
1102 }
1103
1104 /* unblock SIGBUS */
1105 sigemptyset(&set);
1106 sigaddset(&set, SIGBUS);
1107 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1108
1109 if (sigsetjmp(sigjump, 1)) {
1110 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1111 exit(1);
1112 }
1113
1114 /* MAP_POPULATE silently ignores failures */
1115 for (i = 0; i < (memory/hpagesize); i++) {
1116 memset(area + (hpagesize*i), 0, 1);
1117 }
1118
1119 ret = sigaction(SIGBUS, &oldact, NULL);
1120 if (ret) {
1121 perror("file_ram_alloc: failed to reinstall signal handler");
1122 exit(1);
1123 }
1124
1125 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1126 }
1127
1128 block->fd = fd;
1129 return area;
1130
1131 error:
1132 if (mem_prealloc) {
1133 exit(1);
1134 }
1135 return NULL;
1136 }
1137 #else
1138 static void *file_ram_alloc(RAMBlock *block,
1139 ram_addr_t memory,
1140 const char *path)
1141 {
1142 fprintf(stderr, "-mem-path not supported on this host\n");
1143 exit(1);
1144 }
1145 #endif
1146
1147 static ram_addr_t find_ram_offset(ram_addr_t size)
1148 {
1149 RAMBlock *block, *next_block;
1150 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1151
1152 assert(size != 0); /* it would hand out same offset multiple times */
1153
1154 if (QTAILQ_EMPTY(&ram_list.blocks))
1155 return 0;
1156
1157 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1158 ram_addr_t end, next = RAM_ADDR_MAX;
1159
1160 end = block->offset + block->length;
1161
1162 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1163 if (next_block->offset >= end) {
1164 next = MIN(next, next_block->offset);
1165 }
1166 }
1167 if (next - end >= size && next - end < mingap) {
1168 offset = end;
1169 mingap = next - end;
1170 }
1171 }
1172
1173 if (offset == RAM_ADDR_MAX) {
1174 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1175 (uint64_t)size);
1176 abort();
1177 }
1178
1179 return offset;
1180 }
1181
1182 ram_addr_t last_ram_offset(void)
1183 {
1184 RAMBlock *block;
1185 ram_addr_t last = 0;
1186
1187 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1188 last = MAX(last, block->offset + block->length);
1189
1190 return last;
1191 }
1192
1193 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1194 {
1195 int ret;
1196
1197 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1198 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1199 "dump-guest-core", true)) {
1200 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1201 if (ret) {
1202 perror("qemu_madvise");
1203 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1204 "but dump_guest_core=off specified\n");
1205 }
1206 }
1207 }
1208
1209 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1210 {
1211 RAMBlock *new_block, *block;
1212
1213 new_block = NULL;
1214 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1215 if (block->offset == addr) {
1216 new_block = block;
1217 break;
1218 }
1219 }
1220 assert(new_block);
1221 assert(!new_block->idstr[0]);
1222
1223 if (dev) {
1224 char *id = qdev_get_dev_path(dev);
1225 if (id) {
1226 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1227 g_free(id);
1228 }
1229 }
1230 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1231
1232 /* This assumes the iothread lock is taken here too. */
1233 qemu_mutex_lock_ramlist();
1234 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1235 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1236 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1237 new_block->idstr);
1238 abort();
1239 }
1240 }
1241 qemu_mutex_unlock_ramlist();
1242 }
1243
1244 static int memory_try_enable_merging(void *addr, size_t len)
1245 {
1246 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1247 /* disabled by the user */
1248 return 0;
1249 }
1250
1251 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1252 }
1253
1254 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1255 MemoryRegion *mr)
1256 {
1257 RAMBlock *block, *new_block;
1258 ram_addr_t old_ram_size, new_ram_size;
1259
1260 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1261
1262 size = TARGET_PAGE_ALIGN(size);
1263 new_block = g_malloc0(sizeof(*new_block));
1264 new_block->fd = -1;
1265
1266 /* This assumes the iothread lock is taken here too. */
1267 qemu_mutex_lock_ramlist();
1268 new_block->mr = mr;
1269 new_block->offset = find_ram_offset(size);
1270 if (host) {
1271 new_block->host = host;
1272 new_block->flags |= RAM_PREALLOC_MASK;
1273 } else if (xen_enabled()) {
1274 if (mem_path) {
1275 fprintf(stderr, "-mem-path not supported with Xen\n");
1276 exit(1);
1277 }
1278 xen_ram_alloc(new_block->offset, size, mr);
1279 } else {
1280 if (mem_path) {
1281 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1282 /*
1283 * file_ram_alloc() needs to allocate just like
1284 * phys_mem_alloc, but we haven't bothered to provide
1285 * a hook there.
1286 */
1287 fprintf(stderr,
1288 "-mem-path not supported with this accelerator\n");
1289 exit(1);
1290 }
1291 new_block->host = file_ram_alloc(new_block, size, mem_path);
1292 }
1293 if (!new_block->host) {
1294 new_block->host = phys_mem_alloc(size);
1295 if (!new_block->host) {
1296 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1297 new_block->mr->name, strerror(errno));
1298 exit(1);
1299 }
1300 memory_try_enable_merging(new_block->host, size);
1301 }
1302 }
1303 new_block->length = size;
1304
1305 /* Keep the list sorted from biggest to smallest block. */
1306 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1307 if (block->length < new_block->length) {
1308 break;
1309 }
1310 }
1311 if (block) {
1312 QTAILQ_INSERT_BEFORE(block, new_block, next);
1313 } else {
1314 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1315 }
1316 ram_list.mru_block = NULL;
1317
1318 ram_list.version++;
1319 qemu_mutex_unlock_ramlist();
1320
1321 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1322
1323 if (new_ram_size > old_ram_size) {
1324 int i;
1325 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1326 ram_list.dirty_memory[i] =
1327 bitmap_zero_extend(ram_list.dirty_memory[i],
1328 old_ram_size, new_ram_size);
1329 }
1330 }
1331 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1332
1333 qemu_ram_setup_dump(new_block->host, size);
1334 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1335 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1336
1337 if (kvm_enabled())
1338 kvm_setup_guest_memory(new_block->host, size);
1339
1340 return new_block->offset;
1341 }
1342
1343 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1344 {
1345 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1346 }
1347
1348 void qemu_ram_free_from_ptr(ram_addr_t addr)
1349 {
1350 RAMBlock *block;
1351
1352 /* This assumes the iothread lock is taken here too. */
1353 qemu_mutex_lock_ramlist();
1354 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1355 if (addr == block->offset) {
1356 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1357 ram_list.mru_block = NULL;
1358 ram_list.version++;
1359 g_free(block);
1360 break;
1361 }
1362 }
1363 qemu_mutex_unlock_ramlist();
1364 }
1365
1366 void qemu_ram_free(ram_addr_t addr)
1367 {
1368 RAMBlock *block;
1369
1370 /* This assumes the iothread lock is taken here too. */
1371 qemu_mutex_lock_ramlist();
1372 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1373 if (addr == block->offset) {
1374 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1375 ram_list.mru_block = NULL;
1376 ram_list.version++;
1377 if (block->flags & RAM_PREALLOC_MASK) {
1378 ;
1379 } else if (xen_enabled()) {
1380 xen_invalidate_map_cache_entry(block->host);
1381 #ifndef _WIN32
1382 } else if (block->fd >= 0) {
1383 munmap(block->host, block->length);
1384 close(block->fd);
1385 #endif
1386 } else {
1387 qemu_anon_ram_free(block->host, block->length);
1388 }
1389 g_free(block);
1390 break;
1391 }
1392 }
1393 qemu_mutex_unlock_ramlist();
1394
1395 }
1396
1397 #ifndef _WIN32
1398 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1399 {
1400 RAMBlock *block;
1401 ram_addr_t offset;
1402 int flags;
1403 void *area, *vaddr;
1404
1405 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1406 offset = addr - block->offset;
1407 if (offset < block->length) {
1408 vaddr = block->host + offset;
1409 if (block->flags & RAM_PREALLOC_MASK) {
1410 ;
1411 } else if (xen_enabled()) {
1412 abort();
1413 } else {
1414 flags = MAP_FIXED;
1415 munmap(vaddr, length);
1416 if (block->fd >= 0) {
1417 #ifdef MAP_POPULATE
1418 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1419 MAP_PRIVATE;
1420 #else
1421 flags |= MAP_PRIVATE;
1422 #endif
1423 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1424 flags, block->fd, offset);
1425 } else {
1426 /*
1427 * Remap needs to match alloc. Accelerators that
1428 * set phys_mem_alloc never remap. If they did,
1429 * we'd need a remap hook here.
1430 */
1431 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1432
1433 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1434 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1435 flags, -1, 0);
1436 }
1437 if (area != vaddr) {
1438 fprintf(stderr, "Could not remap addr: "
1439 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1440 length, addr);
1441 exit(1);
1442 }
1443 memory_try_enable_merging(vaddr, length);
1444 qemu_ram_setup_dump(vaddr, length);
1445 }
1446 return;
1447 }
1448 }
1449 }
1450 #endif /* !_WIN32 */
1451
1452 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1453 With the exception of the softmmu code in this file, this should
1454 only be used for local memory (e.g. video ram) that the device owns,
1455 and knows it isn't going to access beyond the end of the block.
1456
1457 It should not be used for general purpose DMA.
1458 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1459 */
1460 void *qemu_get_ram_ptr(ram_addr_t addr)
1461 {
1462 RAMBlock *block = qemu_get_ram_block(addr);
1463
1464 if (xen_enabled()) {
1465 /* We need to check if the requested address is in the RAM
1466 * because we don't want to map the entire memory in QEMU.
1467 * In that case just map until the end of the page.
1468 */
1469 if (block->offset == 0) {
1470 return xen_map_cache(addr, 0, 0);
1471 } else if (block->host == NULL) {
1472 block->host =
1473 xen_map_cache(block->offset, block->length, 1);
1474 }
1475 }
1476 return block->host + (addr - block->offset);
1477 }
1478
1479 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1480 * but takes a size argument */
1481 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1482 {
1483 if (*size == 0) {
1484 return NULL;
1485 }
1486 if (xen_enabled()) {
1487 return xen_map_cache(addr, *size, 1);
1488 } else {
1489 RAMBlock *block;
1490
1491 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1492 if (addr - block->offset < block->length) {
1493 if (addr - block->offset + *size > block->length)
1494 *size = block->length - addr + block->offset;
1495 return block->host + (addr - block->offset);
1496 }
1497 }
1498
1499 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1500 abort();
1501 }
1502 }
1503
1504 /* Some of the softmmu routines need to translate from a host pointer
1505 (typically a TLB entry) back to a ram offset. */
1506 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1507 {
1508 RAMBlock *block;
1509 uint8_t *host = ptr;
1510
1511 if (xen_enabled()) {
1512 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1513 return qemu_get_ram_block(*ram_addr)->mr;
1514 }
1515
1516 block = ram_list.mru_block;
1517 if (block && block->host && host - block->host < block->length) {
1518 goto found;
1519 }
1520
1521 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1522 /* This case append when the block is not mapped. */
1523 if (block->host == NULL) {
1524 continue;
1525 }
1526 if (host - block->host < block->length) {
1527 goto found;
1528 }
1529 }
1530
1531 return NULL;
1532
1533 found:
1534 *ram_addr = block->offset + (host - block->host);
1535 return block->mr;
1536 }
1537
1538 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1539 uint64_t val, unsigned size)
1540 {
1541 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1542 tb_invalidate_phys_page_fast(ram_addr, size);
1543 }
1544 switch (size) {
1545 case 1:
1546 stb_p(qemu_get_ram_ptr(ram_addr), val);
1547 break;
1548 case 2:
1549 stw_p(qemu_get_ram_ptr(ram_addr), val);
1550 break;
1551 case 4:
1552 stl_p(qemu_get_ram_ptr(ram_addr), val);
1553 break;
1554 default:
1555 abort();
1556 }
1557 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1558 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1559 /* we remove the notdirty callback only if the code has been
1560 flushed */
1561 if (!cpu_physical_memory_is_clean(ram_addr)) {
1562 CPUArchState *env = current_cpu->env_ptr;
1563 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1564 }
1565 }
1566
1567 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1568 unsigned size, bool is_write)
1569 {
1570 return is_write;
1571 }
1572
1573 static const MemoryRegionOps notdirty_mem_ops = {
1574 .write = notdirty_mem_write,
1575 .valid.accepts = notdirty_mem_accepts,
1576 .endianness = DEVICE_NATIVE_ENDIAN,
1577 };
1578
1579 /* Generate a debug exception if a watchpoint has been hit. */
1580 static void check_watchpoint(int offset, int len_mask, int flags)
1581 {
1582 CPUState *cpu = current_cpu;
1583 CPUArchState *env = cpu->env_ptr;
1584 target_ulong pc, cs_base;
1585 target_ulong vaddr;
1586 CPUWatchpoint *wp;
1587 int cpu_flags;
1588
1589 if (cpu->watchpoint_hit) {
1590 /* We re-entered the check after replacing the TB. Now raise
1591 * the debug interrupt so that is will trigger after the
1592 * current instruction. */
1593 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1594 return;
1595 }
1596 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1597 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1598 if ((vaddr == (wp->vaddr & len_mask) ||
1599 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1600 wp->flags |= BP_WATCHPOINT_HIT;
1601 if (!cpu->watchpoint_hit) {
1602 cpu->watchpoint_hit = wp;
1603 tb_check_watchpoint(cpu);
1604 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1605 cpu->exception_index = EXCP_DEBUG;
1606 cpu_loop_exit(cpu);
1607 } else {
1608 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1609 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1610 cpu_resume_from_signal(cpu, NULL);
1611 }
1612 }
1613 } else {
1614 wp->flags &= ~BP_WATCHPOINT_HIT;
1615 }
1616 }
1617 }
1618
1619 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1620 so these check for a hit then pass through to the normal out-of-line
1621 phys routines. */
1622 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1623 unsigned size)
1624 {
1625 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1626 switch (size) {
1627 case 1: return ldub_phys(&address_space_memory, addr);
1628 case 2: return lduw_phys(&address_space_memory, addr);
1629 case 4: return ldl_phys(&address_space_memory, addr);
1630 default: abort();
1631 }
1632 }
1633
1634 static void watch_mem_write(void *opaque, hwaddr addr,
1635 uint64_t val, unsigned size)
1636 {
1637 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1638 switch (size) {
1639 case 1:
1640 stb_phys(&address_space_memory, addr, val);
1641 break;
1642 case 2:
1643 stw_phys(&address_space_memory, addr, val);
1644 break;
1645 case 4:
1646 stl_phys(&address_space_memory, addr, val);
1647 break;
1648 default: abort();
1649 }
1650 }
1651
1652 static const MemoryRegionOps watch_mem_ops = {
1653 .read = watch_mem_read,
1654 .write = watch_mem_write,
1655 .endianness = DEVICE_NATIVE_ENDIAN,
1656 };
1657
1658 static uint64_t subpage_read(void *opaque, hwaddr addr,
1659 unsigned len)
1660 {
1661 subpage_t *subpage = opaque;
1662 uint8_t buf[4];
1663
1664 #if defined(DEBUG_SUBPAGE)
1665 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1666 subpage, len, addr);
1667 #endif
1668 address_space_read(subpage->as, addr + subpage->base, buf, len);
1669 switch (len) {
1670 case 1:
1671 return ldub_p(buf);
1672 case 2:
1673 return lduw_p(buf);
1674 case 4:
1675 return ldl_p(buf);
1676 default:
1677 abort();
1678 }
1679 }
1680
1681 static void subpage_write(void *opaque, hwaddr addr,
1682 uint64_t value, unsigned len)
1683 {
1684 subpage_t *subpage = opaque;
1685 uint8_t buf[4];
1686
1687 #if defined(DEBUG_SUBPAGE)
1688 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1689 " value %"PRIx64"\n",
1690 __func__, subpage, len, addr, value);
1691 #endif
1692 switch (len) {
1693 case 1:
1694 stb_p(buf, value);
1695 break;
1696 case 2:
1697 stw_p(buf, value);
1698 break;
1699 case 4:
1700 stl_p(buf, value);
1701 break;
1702 default:
1703 abort();
1704 }
1705 address_space_write(subpage->as, addr + subpage->base, buf, len);
1706 }
1707
1708 static bool subpage_accepts(void *opaque, hwaddr addr,
1709 unsigned len, bool is_write)
1710 {
1711 subpage_t *subpage = opaque;
1712 #if defined(DEBUG_SUBPAGE)
1713 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1714 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1715 #endif
1716
1717 return address_space_access_valid(subpage->as, addr + subpage->base,
1718 len, is_write);
1719 }
1720
1721 static const MemoryRegionOps subpage_ops = {
1722 .read = subpage_read,
1723 .write = subpage_write,
1724 .valid.accepts = subpage_accepts,
1725 .endianness = DEVICE_NATIVE_ENDIAN,
1726 };
1727
1728 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1729 uint16_t section)
1730 {
1731 int idx, eidx;
1732
1733 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1734 return -1;
1735 idx = SUBPAGE_IDX(start);
1736 eidx = SUBPAGE_IDX(end);
1737 #if defined(DEBUG_SUBPAGE)
1738 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1739 __func__, mmio, start, end, idx, eidx, section);
1740 #endif
1741 for (; idx <= eidx; idx++) {
1742 mmio->sub_section[idx] = section;
1743 }
1744
1745 return 0;
1746 }
1747
1748 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1749 {
1750 subpage_t *mmio;
1751
1752 mmio = g_malloc0(sizeof(subpage_t));
1753
1754 mmio->as = as;
1755 mmio->base = base;
1756 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1757 "subpage", TARGET_PAGE_SIZE);
1758 mmio->iomem.subpage = true;
1759 #if defined(DEBUG_SUBPAGE)
1760 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1761 mmio, base, TARGET_PAGE_SIZE);
1762 #endif
1763 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1764
1765 return mmio;
1766 }
1767
1768 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1769 {
1770 MemoryRegionSection section = {
1771 .address_space = &address_space_memory,
1772 .mr = mr,
1773 .offset_within_address_space = 0,
1774 .offset_within_region = 0,
1775 .size = int128_2_64(),
1776 };
1777
1778 return phys_section_add(map, &section);
1779 }
1780
1781 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1782 {
1783 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1784 }
1785
1786 static void io_mem_init(void)
1787 {
1788 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1789 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1790 "unassigned", UINT64_MAX);
1791 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1792 "notdirty", UINT64_MAX);
1793 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1794 "watch", UINT64_MAX);
1795 }
1796
1797 static void mem_begin(MemoryListener *listener)
1798 {
1799 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1800 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1801 uint16_t n;
1802
1803 n = dummy_section(&d->map, &io_mem_unassigned);
1804 assert(n == PHYS_SECTION_UNASSIGNED);
1805 n = dummy_section(&d->map, &io_mem_notdirty);
1806 assert(n == PHYS_SECTION_NOTDIRTY);
1807 n = dummy_section(&d->map, &io_mem_rom);
1808 assert(n == PHYS_SECTION_ROM);
1809 n = dummy_section(&d->map, &io_mem_watch);
1810 assert(n == PHYS_SECTION_WATCH);
1811
1812 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1813 d->as = as;
1814 as->next_dispatch = d;
1815 }
1816
1817 static void mem_commit(MemoryListener *listener)
1818 {
1819 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1820 AddressSpaceDispatch *cur = as->dispatch;
1821 AddressSpaceDispatch *next = as->next_dispatch;
1822
1823 phys_page_compact_all(next, next->map.nodes_nb);
1824
1825 as->dispatch = next;
1826
1827 if (cur) {
1828 phys_sections_free(&cur->map);
1829 g_free(cur);
1830 }
1831 }
1832
1833 static void tcg_commit(MemoryListener *listener)
1834 {
1835 CPUState *cpu;
1836
1837 /* since each CPU stores ram addresses in its TLB cache, we must
1838 reset the modified entries */
1839 /* XXX: slow ! */
1840 CPU_FOREACH(cpu) {
1841 CPUArchState *env = cpu->env_ptr;
1842
1843 /* FIXME: Disentangle the cpu.h circular files deps so we can
1844 directly get the right CPU from listener. */
1845 if (cpu->tcg_as_listener != listener) {
1846 continue;
1847 }
1848 tlb_flush(env, 1);
1849 }
1850 }
1851
1852 static void core_log_global_start(MemoryListener *listener)
1853 {
1854 cpu_physical_memory_set_dirty_tracking(true);
1855 }
1856
1857 static void core_log_global_stop(MemoryListener *listener)
1858 {
1859 cpu_physical_memory_set_dirty_tracking(false);
1860 }
1861
1862 static MemoryListener core_memory_listener = {
1863 .log_global_start = core_log_global_start,
1864 .log_global_stop = core_log_global_stop,
1865 .priority = 1,
1866 };
1867
1868 void address_space_init_dispatch(AddressSpace *as)
1869 {
1870 as->dispatch = NULL;
1871 as->dispatch_listener = (MemoryListener) {
1872 .begin = mem_begin,
1873 .commit = mem_commit,
1874 .region_add = mem_add,
1875 .region_nop = mem_add,
1876 .priority = 0,
1877 };
1878 memory_listener_register(&as->dispatch_listener, as);
1879 }
1880
1881 void address_space_destroy_dispatch(AddressSpace *as)
1882 {
1883 AddressSpaceDispatch *d = as->dispatch;
1884
1885 memory_listener_unregister(&as->dispatch_listener);
1886 g_free(d);
1887 as->dispatch = NULL;
1888 }
1889
1890 static void memory_map_init(void)
1891 {
1892 system_memory = g_malloc(sizeof(*system_memory));
1893
1894 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1895 address_space_init(&address_space_memory, system_memory, "memory");
1896
1897 system_io = g_malloc(sizeof(*system_io));
1898 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1899 65536);
1900 address_space_init(&address_space_io, system_io, "I/O");
1901
1902 memory_listener_register(&core_memory_listener, &address_space_memory);
1903 }
1904
1905 MemoryRegion *get_system_memory(void)
1906 {
1907 return system_memory;
1908 }
1909
1910 MemoryRegion *get_system_io(void)
1911 {
1912 return system_io;
1913 }
1914
1915 #endif /* !defined(CONFIG_USER_ONLY) */
1916
1917 /* physical memory access (slow version, mainly for debug) */
1918 #if defined(CONFIG_USER_ONLY)
1919 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1920 uint8_t *buf, int len, int is_write)
1921 {
1922 int l, flags;
1923 target_ulong page;
1924 void * p;
1925
1926 while (len > 0) {
1927 page = addr & TARGET_PAGE_MASK;
1928 l = (page + TARGET_PAGE_SIZE) - addr;
1929 if (l > len)
1930 l = len;
1931 flags = page_get_flags(page);
1932 if (!(flags & PAGE_VALID))
1933 return -1;
1934 if (is_write) {
1935 if (!(flags & PAGE_WRITE))
1936 return -1;
1937 /* XXX: this code should not depend on lock_user */
1938 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1939 return -1;
1940 memcpy(p, buf, l);
1941 unlock_user(p, addr, l);
1942 } else {
1943 if (!(flags & PAGE_READ))
1944 return -1;
1945 /* XXX: this code should not depend on lock_user */
1946 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1947 return -1;
1948 memcpy(buf, p, l);
1949 unlock_user(p, addr, 0);
1950 }
1951 len -= l;
1952 buf += l;
1953 addr += l;
1954 }
1955 return 0;
1956 }
1957
1958 #else
1959
1960 static void invalidate_and_set_dirty(hwaddr addr,
1961 hwaddr length)
1962 {
1963 if (cpu_physical_memory_is_clean(addr)) {
1964 /* invalidate code */
1965 tb_invalidate_phys_page_range(addr, addr + length, 0);
1966 /* set dirty bit */
1967 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1968 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1969 }
1970 xen_modified_memory(addr, length);
1971 }
1972
1973 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1974 {
1975 unsigned access_size_max = mr->ops->valid.max_access_size;
1976
1977 /* Regions are assumed to support 1-4 byte accesses unless
1978 otherwise specified. */
1979 if (access_size_max == 0) {
1980 access_size_max = 4;
1981 }
1982
1983 /* Bound the maximum access by the alignment of the address. */
1984 if (!mr->ops->impl.unaligned) {
1985 unsigned align_size_max = addr & -addr;
1986 if (align_size_max != 0 && align_size_max < access_size_max) {
1987 access_size_max = align_size_max;
1988 }
1989 }
1990
1991 /* Don't attempt accesses larger than the maximum. */
1992 if (l > access_size_max) {
1993 l = access_size_max;
1994 }
1995 if (l & (l - 1)) {
1996 l = 1 << (qemu_fls(l) - 1);
1997 }
1998
1999 return l;
2000 }
2001
2002 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2003 int len, bool is_write)
2004 {
2005 hwaddr l;
2006 uint8_t *ptr;
2007 uint64_t val;
2008 hwaddr addr1;
2009 MemoryRegion *mr;
2010 bool error = false;
2011
2012 while (len > 0) {
2013 l = len;
2014 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2015
2016 if (is_write) {
2017 if (!memory_access_is_direct(mr, is_write)) {
2018 l = memory_access_size(mr, l, addr1);
2019 /* XXX: could force current_cpu to NULL to avoid
2020 potential bugs */
2021 switch (l) {
2022 case 8:
2023 /* 64 bit write access */
2024 val = ldq_p(buf);
2025 error |= io_mem_write(mr, addr1, val, 8);
2026 break;
2027 case 4:
2028 /* 32 bit write access */
2029 val = ldl_p(buf);
2030 error |= io_mem_write(mr, addr1, val, 4);
2031 break;
2032 case 2:
2033 /* 16 bit write access */
2034 val = lduw_p(buf);
2035 error |= io_mem_write(mr, addr1, val, 2);
2036 break;
2037 case 1:
2038 /* 8 bit write access */
2039 val = ldub_p(buf);
2040 error |= io_mem_write(mr, addr1, val, 1);
2041 break;
2042 default:
2043 abort();
2044 }
2045 } else {
2046 addr1 += memory_region_get_ram_addr(mr);
2047 /* RAM case */
2048 ptr = qemu_get_ram_ptr(addr1);
2049 memcpy(ptr, buf, l);
2050 invalidate_and_set_dirty(addr1, l);
2051 }
2052 } else {
2053 if (!memory_access_is_direct(mr, is_write)) {
2054 /* I/O case */
2055 l = memory_access_size(mr, l, addr1);
2056 switch (l) {
2057 case 8:
2058 /* 64 bit read access */
2059 error |= io_mem_read(mr, addr1, &val, 8);
2060 stq_p(buf, val);
2061 break;
2062 case 4:
2063 /* 32 bit read access */
2064 error |= io_mem_read(mr, addr1, &val, 4);
2065 stl_p(buf, val);
2066 break;
2067 case 2:
2068 /* 16 bit read access */
2069 error |= io_mem_read(mr, addr1, &val, 2);
2070 stw_p(buf, val);
2071 break;
2072 case 1:
2073 /* 8 bit read access */
2074 error |= io_mem_read(mr, addr1, &val, 1);
2075 stb_p(buf, val);
2076 break;
2077 default:
2078 abort();
2079 }
2080 } else {
2081 /* RAM case */
2082 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2083 memcpy(buf, ptr, l);
2084 }
2085 }
2086 len -= l;
2087 buf += l;
2088 addr += l;
2089 }
2090
2091 return error;
2092 }
2093
2094 bool address_space_write(AddressSpace *as, hwaddr addr,
2095 const uint8_t *buf, int len)
2096 {
2097 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2098 }
2099
2100 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2101 {
2102 return address_space_rw(as, addr, buf, len, false);
2103 }
2104
2105
2106 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2107 int len, int is_write)
2108 {
2109 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2110 }
2111
2112 enum write_rom_type {
2113 WRITE_DATA,
2114 FLUSH_CACHE,
2115 };
2116
2117 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2118 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2119 {
2120 hwaddr l;
2121 uint8_t *ptr;
2122 hwaddr addr1;
2123 MemoryRegion *mr;
2124
2125 while (len > 0) {
2126 l = len;
2127 mr = address_space_translate(as, addr, &addr1, &l, true);
2128
2129 if (!(memory_region_is_ram(mr) ||
2130 memory_region_is_romd(mr))) {
2131 /* do nothing */
2132 } else {
2133 addr1 += memory_region_get_ram_addr(mr);
2134 /* ROM/RAM case */
2135 ptr = qemu_get_ram_ptr(addr1);
2136 switch (type) {
2137 case WRITE_DATA:
2138 memcpy(ptr, buf, l);
2139 invalidate_and_set_dirty(addr1, l);
2140 break;
2141 case FLUSH_CACHE:
2142 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2143 break;
2144 }
2145 }
2146 len -= l;
2147 buf += l;
2148 addr += l;
2149 }
2150 }
2151
2152 /* used for ROM loading : can write in RAM and ROM */
2153 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2154 const uint8_t *buf, int len)
2155 {
2156 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2157 }
2158
2159 void cpu_flush_icache_range(hwaddr start, int len)
2160 {
2161 /*
2162 * This function should do the same thing as an icache flush that was
2163 * triggered from within the guest. For TCG we are always cache coherent,
2164 * so there is no need to flush anything. For KVM / Xen we need to flush
2165 * the host's instruction cache at least.
2166 */
2167 if (tcg_enabled()) {
2168 return;
2169 }
2170
2171 cpu_physical_memory_write_rom_internal(&address_space_memory,
2172 start, NULL, len, FLUSH_CACHE);
2173 }
2174
2175 typedef struct {
2176 MemoryRegion *mr;
2177 void *buffer;
2178 hwaddr addr;
2179 hwaddr len;
2180 } BounceBuffer;
2181
2182 static BounceBuffer bounce;
2183
2184 typedef struct MapClient {
2185 void *opaque;
2186 void (*callback)(void *opaque);
2187 QLIST_ENTRY(MapClient) link;
2188 } MapClient;
2189
2190 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2191 = QLIST_HEAD_INITIALIZER(map_client_list);
2192
2193 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2194 {
2195 MapClient *client = g_malloc(sizeof(*client));
2196
2197 client->opaque = opaque;
2198 client->callback = callback;
2199 QLIST_INSERT_HEAD(&map_client_list, client, link);
2200 return client;
2201 }
2202
2203 static void cpu_unregister_map_client(void *_client)
2204 {
2205 MapClient *client = (MapClient *)_client;
2206
2207 QLIST_REMOVE(client, link);
2208 g_free(client);
2209 }
2210
2211 static void cpu_notify_map_clients(void)
2212 {
2213 MapClient *client;
2214
2215 while (!QLIST_EMPTY(&map_client_list)) {
2216 client = QLIST_FIRST(&map_client_list);
2217 client->callback(client->opaque);
2218 cpu_unregister_map_client(client);
2219 }
2220 }
2221
2222 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2223 {
2224 MemoryRegion *mr;
2225 hwaddr l, xlat;
2226
2227 while (len > 0) {
2228 l = len;
2229 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2230 if (!memory_access_is_direct(mr, is_write)) {
2231 l = memory_access_size(mr, l, addr);
2232 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2233 return false;
2234 }
2235 }
2236
2237 len -= l;
2238 addr += l;
2239 }
2240 return true;
2241 }
2242
2243 /* Map a physical memory region into a host virtual address.
2244 * May map a subset of the requested range, given by and returned in *plen.
2245 * May return NULL if resources needed to perform the mapping are exhausted.
2246 * Use only for reads OR writes - not for read-modify-write operations.
2247 * Use cpu_register_map_client() to know when retrying the map operation is
2248 * likely to succeed.
2249 */
2250 void *address_space_map(AddressSpace *as,
2251 hwaddr addr,
2252 hwaddr *plen,
2253 bool is_write)
2254 {
2255 hwaddr len = *plen;
2256 hwaddr done = 0;
2257 hwaddr l, xlat, base;
2258 MemoryRegion *mr, *this_mr;
2259 ram_addr_t raddr;
2260
2261 if (len == 0) {
2262 return NULL;
2263 }
2264
2265 l = len;
2266 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2267 if (!memory_access_is_direct(mr, is_write)) {
2268 if (bounce.buffer) {
2269 return NULL;
2270 }
2271 /* Avoid unbounded allocations */
2272 l = MIN(l, TARGET_PAGE_SIZE);
2273 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2274 bounce.addr = addr;
2275 bounce.len = l;
2276
2277 memory_region_ref(mr);
2278 bounce.mr = mr;
2279 if (!is_write) {
2280 address_space_read(as, addr, bounce.buffer, l);
2281 }
2282
2283 *plen = l;
2284 return bounce.buffer;
2285 }
2286
2287 base = xlat;
2288 raddr = memory_region_get_ram_addr(mr);
2289
2290 for (;;) {
2291 len -= l;
2292 addr += l;
2293 done += l;
2294 if (len == 0) {
2295 break;
2296 }
2297
2298 l = len;
2299 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2300 if (this_mr != mr || xlat != base + done) {
2301 break;
2302 }
2303 }
2304
2305 memory_region_ref(mr);
2306 *plen = done;
2307 return qemu_ram_ptr_length(raddr + base, plen);
2308 }
2309
2310 /* Unmaps a memory region previously mapped by address_space_map().
2311 * Will also mark the memory as dirty if is_write == 1. access_len gives
2312 * the amount of memory that was actually read or written by the caller.
2313 */
2314 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2315 int is_write, hwaddr access_len)
2316 {
2317 if (buffer != bounce.buffer) {
2318 MemoryRegion *mr;
2319 ram_addr_t addr1;
2320
2321 mr = qemu_ram_addr_from_host(buffer, &addr1);
2322 assert(mr != NULL);
2323 if (is_write) {
2324 while (access_len) {
2325 unsigned l;
2326 l = TARGET_PAGE_SIZE;
2327 if (l > access_len)
2328 l = access_len;
2329 invalidate_and_set_dirty(addr1, l);
2330 addr1 += l;
2331 access_len -= l;
2332 }
2333 }
2334 if (xen_enabled()) {
2335 xen_invalidate_map_cache_entry(buffer);
2336 }
2337 memory_region_unref(mr);
2338 return;
2339 }
2340 if (is_write) {
2341 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2342 }
2343 qemu_vfree(bounce.buffer);
2344 bounce.buffer = NULL;
2345 memory_region_unref(bounce.mr);
2346 cpu_notify_map_clients();
2347 }
2348
2349 void *cpu_physical_memory_map(hwaddr addr,
2350 hwaddr *plen,
2351 int is_write)
2352 {
2353 return address_space_map(&address_space_memory, addr, plen, is_write);
2354 }
2355
2356 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2357 int is_write, hwaddr access_len)
2358 {
2359 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2360 }
2361
2362 /* warning: addr must be aligned */
2363 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2364 enum device_endian endian)
2365 {
2366 uint8_t *ptr;
2367 uint64_t val;
2368 MemoryRegion *mr;
2369 hwaddr l = 4;
2370 hwaddr addr1;
2371
2372 mr = address_space_translate(as, addr, &addr1, &l, false);
2373 if (l < 4 || !memory_access_is_direct(mr, false)) {
2374 /* I/O case */
2375 io_mem_read(mr, addr1, &val, 4);
2376 #if defined(TARGET_WORDS_BIGENDIAN)
2377 if (endian == DEVICE_LITTLE_ENDIAN) {
2378 val = bswap32(val);
2379 }
2380 #else
2381 if (endian == DEVICE_BIG_ENDIAN) {
2382 val = bswap32(val);
2383 }
2384 #endif
2385 } else {
2386 /* RAM case */
2387 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2388 & TARGET_PAGE_MASK)
2389 + addr1);
2390 switch (endian) {
2391 case DEVICE_LITTLE_ENDIAN:
2392 val = ldl_le_p(ptr);
2393 break;
2394 case DEVICE_BIG_ENDIAN:
2395 val = ldl_be_p(ptr);
2396 break;
2397 default:
2398 val = ldl_p(ptr);
2399 break;
2400 }
2401 }
2402 return val;
2403 }
2404
2405 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2406 {
2407 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2408 }
2409
2410 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2411 {
2412 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2413 }
2414
2415 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2416 {
2417 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2418 }
2419
2420 /* warning: addr must be aligned */
2421 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2422 enum device_endian endian)
2423 {
2424 uint8_t *ptr;
2425 uint64_t val;
2426 MemoryRegion *mr;
2427 hwaddr l = 8;
2428 hwaddr addr1;
2429
2430 mr = address_space_translate(as, addr, &addr1, &l,
2431 false);
2432 if (l < 8 || !memory_access_is_direct(mr, false)) {
2433 /* I/O case */
2434 io_mem_read(mr, addr1, &val, 8);
2435 #if defined(TARGET_WORDS_BIGENDIAN)
2436 if (endian == DEVICE_LITTLE_ENDIAN) {
2437 val = bswap64(val);
2438 }
2439 #else
2440 if (endian == DEVICE_BIG_ENDIAN) {
2441 val = bswap64(val);
2442 }
2443 #endif
2444 } else {
2445 /* RAM case */
2446 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2447 & TARGET_PAGE_MASK)
2448 + addr1);
2449 switch (endian) {
2450 case DEVICE_LITTLE_ENDIAN:
2451 val = ldq_le_p(ptr);
2452 break;
2453 case DEVICE_BIG_ENDIAN:
2454 val = ldq_be_p(ptr);
2455 break;
2456 default:
2457 val = ldq_p(ptr);
2458 break;
2459 }
2460 }
2461 return val;
2462 }
2463
2464 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2465 {
2466 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2467 }
2468
2469 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2470 {
2471 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2472 }
2473
2474 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2475 {
2476 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2477 }
2478
2479 /* XXX: optimize */
2480 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2481 {
2482 uint8_t val;
2483 address_space_rw(as, addr, &val, 1, 0);
2484 return val;
2485 }
2486
2487 /* warning: addr must be aligned */
2488 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2489 enum device_endian endian)
2490 {
2491 uint8_t *ptr;
2492 uint64_t val;
2493 MemoryRegion *mr;
2494 hwaddr l = 2;
2495 hwaddr addr1;
2496
2497 mr = address_space_translate(as, addr, &addr1, &l,
2498 false);
2499 if (l < 2 || !memory_access_is_direct(mr, false)) {
2500 /* I/O case */
2501 io_mem_read(mr, addr1, &val, 2);
2502 #if defined(TARGET_WORDS_BIGENDIAN)
2503 if (endian == DEVICE_LITTLE_ENDIAN) {
2504 val = bswap16(val);
2505 }
2506 #else
2507 if (endian == DEVICE_BIG_ENDIAN) {
2508 val = bswap16(val);
2509 }
2510 #endif
2511 } else {
2512 /* RAM case */
2513 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2514 & TARGET_PAGE_MASK)
2515 + addr1);
2516 switch (endian) {
2517 case DEVICE_LITTLE_ENDIAN:
2518 val = lduw_le_p(ptr);
2519 break;
2520 case DEVICE_BIG_ENDIAN:
2521 val = lduw_be_p(ptr);
2522 break;
2523 default:
2524 val = lduw_p(ptr);
2525 break;
2526 }
2527 }
2528 return val;
2529 }
2530
2531 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2532 {
2533 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2534 }
2535
2536 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2537 {
2538 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2539 }
2540
2541 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2542 {
2543 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2544 }
2545
2546 /* warning: addr must be aligned. The ram page is not masked as dirty
2547 and the code inside is not invalidated. It is useful if the dirty
2548 bits are used to track modified PTEs */
2549 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2550 {
2551 uint8_t *ptr;
2552 MemoryRegion *mr;
2553 hwaddr l = 4;
2554 hwaddr addr1;
2555
2556 mr = address_space_translate(as, addr, &addr1, &l,
2557 true);
2558 if (l < 4 || !memory_access_is_direct(mr, true)) {
2559 io_mem_write(mr, addr1, val, 4);
2560 } else {
2561 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2562 ptr = qemu_get_ram_ptr(addr1);
2563 stl_p(ptr, val);
2564
2565 if (unlikely(in_migration)) {
2566 if (cpu_physical_memory_is_clean(addr1)) {
2567 /* invalidate code */
2568 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2569 /* set dirty bit */
2570 cpu_physical_memory_set_dirty_flag(addr1,
2571 DIRTY_MEMORY_MIGRATION);
2572 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2573 }
2574 }
2575 }
2576 }
2577
2578 /* warning: addr must be aligned */
2579 static inline void stl_phys_internal(AddressSpace *as,
2580 hwaddr addr, uint32_t val,
2581 enum device_endian endian)
2582 {
2583 uint8_t *ptr;
2584 MemoryRegion *mr;
2585 hwaddr l = 4;
2586 hwaddr addr1;
2587
2588 mr = address_space_translate(as, addr, &addr1, &l,
2589 true);
2590 if (l < 4 || !memory_access_is_direct(mr, true)) {
2591 #if defined(TARGET_WORDS_BIGENDIAN)
2592 if (endian == DEVICE_LITTLE_ENDIAN) {
2593 val = bswap32(val);
2594 }
2595 #else
2596 if (endian == DEVICE_BIG_ENDIAN) {
2597 val = bswap32(val);
2598 }
2599 #endif
2600 io_mem_write(mr, addr1, val, 4);
2601 } else {
2602 /* RAM case */
2603 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2604 ptr = qemu_get_ram_ptr(addr1);
2605 switch (endian) {
2606 case DEVICE_LITTLE_ENDIAN:
2607 stl_le_p(ptr, val);
2608 break;
2609 case DEVICE_BIG_ENDIAN:
2610 stl_be_p(ptr, val);
2611 break;
2612 default:
2613 stl_p(ptr, val);
2614 break;
2615 }
2616 invalidate_and_set_dirty(addr1, 4);
2617 }
2618 }
2619
2620 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2621 {
2622 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2623 }
2624
2625 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2626 {
2627 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2628 }
2629
2630 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2631 {
2632 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2633 }
2634
2635 /* XXX: optimize */
2636 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2637 {
2638 uint8_t v = val;
2639 address_space_rw(as, addr, &v, 1, 1);
2640 }
2641
2642 /* warning: addr must be aligned */
2643 static inline void stw_phys_internal(AddressSpace *as,
2644 hwaddr addr, uint32_t val,
2645 enum device_endian endian)
2646 {
2647 uint8_t *ptr;
2648 MemoryRegion *mr;
2649 hwaddr l = 2;
2650 hwaddr addr1;
2651
2652 mr = address_space_translate(as, addr, &addr1, &l, true);
2653 if (l < 2 || !memory_access_is_direct(mr, true)) {
2654 #if defined(TARGET_WORDS_BIGENDIAN)
2655 if (endian == DEVICE_LITTLE_ENDIAN) {
2656 val = bswap16(val);
2657 }
2658 #else
2659 if (endian == DEVICE_BIG_ENDIAN) {
2660 val = bswap16(val);
2661 }
2662 #endif
2663 io_mem_write(mr, addr1, val, 2);
2664 } else {
2665 /* RAM case */
2666 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2667 ptr = qemu_get_ram_ptr(addr1);
2668 switch (endian) {
2669 case DEVICE_LITTLE_ENDIAN:
2670 stw_le_p(ptr, val);
2671 break;
2672 case DEVICE_BIG_ENDIAN:
2673 stw_be_p(ptr, val);
2674 break;
2675 default:
2676 stw_p(ptr, val);
2677 break;
2678 }
2679 invalidate_and_set_dirty(addr1, 2);
2680 }
2681 }
2682
2683 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2684 {
2685 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2686 }
2687
2688 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2689 {
2690 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2691 }
2692
2693 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2694 {
2695 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2696 }
2697
2698 /* XXX: optimize */
2699 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2700 {
2701 val = tswap64(val);
2702 address_space_rw(as, addr, (void *) &val, 8, 1);
2703 }
2704
2705 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2706 {
2707 val = cpu_to_le64(val);
2708 address_space_rw(as, addr, (void *) &val, 8, 1);
2709 }
2710
2711 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2712 {
2713 val = cpu_to_be64(val);
2714 address_space_rw(as, addr, (void *) &val, 8, 1);
2715 }
2716
2717 /* virtual memory access for debug (includes writing to ROM) */
2718 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2719 uint8_t *buf, int len, int is_write)
2720 {
2721 int l;
2722 hwaddr phys_addr;
2723 target_ulong page;
2724
2725 while (len > 0) {
2726 page = addr & TARGET_PAGE_MASK;
2727 phys_addr = cpu_get_phys_page_debug(cpu, page);
2728 /* if no physical page mapped, return an error */
2729 if (phys_addr == -1)
2730 return -1;
2731 l = (page + TARGET_PAGE_SIZE) - addr;
2732 if (l > len)
2733 l = len;
2734 phys_addr += (addr & ~TARGET_PAGE_MASK);
2735 if (is_write) {
2736 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2737 } else {
2738 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2739 }
2740 len -= l;
2741 buf += l;
2742 addr += l;
2743 }
2744 return 0;
2745 }
2746 #endif
2747
2748 #if !defined(CONFIG_USER_ONLY)
2749
2750 /*
2751 * A helper function for the _utterly broken_ virtio device model to find out if
2752 * it's running on a big endian machine. Don't do this at home kids!
2753 */
2754 bool virtio_is_big_endian(void);
2755 bool virtio_is_big_endian(void)
2756 {
2757 #if defined(TARGET_WORDS_BIGENDIAN)
2758 return true;
2759 #else
2760 return false;
2761 #endif
2762 }
2763
2764 #endif
2765
2766 #ifndef CONFIG_USER_ONLY
2767 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2768 {
2769 MemoryRegion*mr;
2770 hwaddr l = 1;
2771
2772 mr = address_space_translate(&address_space_memory,
2773 phys_addr, &phys_addr, &l, false);
2774
2775 return !(memory_region_is_ram(mr) ||
2776 memory_region_is_romd(mr));
2777 }
2778
2779 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2780 {
2781 RAMBlock *block;
2782
2783 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2784 func(block->host, block->offset, block->length, opaque);
2785 }
2786 }
2787 #endif