]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
cputlb: Change tlb_flush_page() argument to CPUState
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 #endif
74
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
83
84 #if !defined(CONFIG_USER_ONLY)
85
86 typedef struct PhysPageEntry PhysPageEntry;
87
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
93 };
94
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
99
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
102
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104
105 typedef PhysPageEntry Node[P_L2_SIZE];
106
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
115
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
119 */
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
123 };
124
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
132
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
137
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
141
142 static MemoryRegion io_mem_watch;
143 #endif
144
145 #if !defined(CONFIG_USER_ONLY)
146
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148 {
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153 }
154 }
155
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
157 {
158 unsigned i;
159 uint32_t ret;
160
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167 }
168 return ret;
169 }
170
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
174 {
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
186 }
187 }
188 } else {
189 p = map->nodes[lp->ptr];
190 }
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 }
202 ++lp;
203 }
204 }
205
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
209 {
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
214 }
215
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
218 */
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
220 {
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
225
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
228 }
229
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
234 }
235
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
240 }
241 }
242
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
246 }
247
248 assert(valid_ptr < P_L2_SIZE);
249
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
253 }
254
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
262 */
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
266 }
267 }
268
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
270 {
271 DECLARE_BITMAP(compacted, nodes_nb);
272
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 }
276 }
277
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
280 {
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
284
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
288 }
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291 }
292
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
299 }
300 }
301
302 bool memory_region_is_unassigned(MemoryRegion *mr)
303 {
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
306 }
307
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
311 {
312 MemoryRegionSection *section;
313 subpage_t *subpage;
314
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 }
320 return section;
321 }
322
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
326 {
327 MemoryRegionSection *section;
328 Int128 diff;
329
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
333
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
336
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
340 }
341
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
343 {
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
346 }
347 if (memory_region_is_romd(mr)) {
348 return !is_write;
349 }
350
351 return false;
352 }
353
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
356 bool is_write)
357 {
358 IOMMUTLBEntry iotlb;
359 MemoryRegionSection *section;
360 MemoryRegion *mr;
361 hwaddr len = *plen;
362
363 for (;;) {
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
365 mr = section->mr;
366
367 if (!mr->iommu_ops) {
368 break;
369 }
370
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
377 break;
378 }
379
380 as = iotlb.target_as;
381 }
382
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
386 }
387
388 *plen = len;
389 *xlat = addr;
390 return mr;
391 }
392
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
395 hwaddr *plen)
396 {
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
399
400 assert(!section->mr->iommu_ops);
401 return section;
402 }
403 #endif
404
405 void cpu_exec_init_all(void)
406 {
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
409 memory_map_init();
410 io_mem_init();
411 #endif
412 }
413
414 #if !defined(CONFIG_USER_ONLY)
415
416 static int cpu_common_post_load(void *opaque, int version_id)
417 {
418 CPUState *cpu = opaque;
419
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu->env_ptr, 1);
424
425 return 0;
426 }
427
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
430 .version_id = 1,
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
438 }
439 };
440
441 #endif
442
443 CPUState *qemu_get_cpu(int index)
444 {
445 CPUState *cpu;
446
447 CPU_FOREACH(cpu) {
448 if (cpu->cpu_index == index) {
449 return cpu;
450 }
451 }
452
453 return NULL;
454 }
455
456 #if !defined(CONFIG_USER_ONLY)
457 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
458 {
459 /* We only support one address space per cpu at the moment. */
460 assert(cpu->as == as);
461
462 if (cpu->tcg_as_listener) {
463 memory_listener_unregister(cpu->tcg_as_listener);
464 } else {
465 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
466 }
467 cpu->tcg_as_listener->commit = tcg_commit;
468 memory_listener_register(cpu->tcg_as_listener, as);
469 }
470 #endif
471
472 void cpu_exec_init(CPUArchState *env)
473 {
474 CPUState *cpu = ENV_GET_CPU(env);
475 CPUClass *cc = CPU_GET_CLASS(cpu);
476 CPUState *some_cpu;
477 int cpu_index;
478
479 #if defined(CONFIG_USER_ONLY)
480 cpu_list_lock();
481 #endif
482 cpu_index = 0;
483 CPU_FOREACH(some_cpu) {
484 cpu_index++;
485 }
486 cpu->cpu_index = cpu_index;
487 cpu->numa_node = 0;
488 QTAILQ_INIT(&cpu->breakpoints);
489 QTAILQ_INIT(&cpu->watchpoints);
490 #ifndef CONFIG_USER_ONLY
491 cpu->as = &address_space_memory;
492 cpu->thread_id = qemu_get_thread_id();
493 #endif
494 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
495 #if defined(CONFIG_USER_ONLY)
496 cpu_list_unlock();
497 #endif
498 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
499 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
500 }
501 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
502 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
503 cpu_save, cpu_load, env);
504 assert(cc->vmsd == NULL);
505 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
506 #endif
507 if (cc->vmsd != NULL) {
508 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
509 }
510 }
511
512 #if defined(TARGET_HAS_ICE)
513 #if defined(CONFIG_USER_ONLY)
514 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
515 {
516 tb_invalidate_phys_page_range(pc, pc + 1, 0);
517 }
518 #else
519 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
520 {
521 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
522 if (phys != -1) {
523 tb_invalidate_phys_addr(cpu->as,
524 phys | (pc & ~TARGET_PAGE_MASK));
525 }
526 }
527 #endif
528 #endif /* TARGET_HAS_ICE */
529
530 #if defined(CONFIG_USER_ONLY)
531 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
532
533 {
534 }
535
536 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
537 int flags, CPUWatchpoint **watchpoint)
538 {
539 return -ENOSYS;
540 }
541 #else
542 /* Add a watchpoint. */
543 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
544 int flags, CPUWatchpoint **watchpoint)
545 {
546 vaddr len_mask = ~(len - 1);
547 CPUWatchpoint *wp;
548
549 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
550 if ((len & (len - 1)) || (addr & ~len_mask) ||
551 len == 0 || len > TARGET_PAGE_SIZE) {
552 error_report("tried to set invalid watchpoint at %"
553 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
554 return -EINVAL;
555 }
556 wp = g_malloc(sizeof(*wp));
557
558 wp->vaddr = addr;
559 wp->len_mask = len_mask;
560 wp->flags = flags;
561
562 /* keep all GDB-injected watchpoints in front */
563 if (flags & BP_GDB) {
564 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
565 } else {
566 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
567 }
568
569 tlb_flush_page(cpu, addr);
570
571 if (watchpoint)
572 *watchpoint = wp;
573 return 0;
574 }
575
576 /* Remove a specific watchpoint. */
577 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
578 int flags)
579 {
580 vaddr len_mask = ~(len - 1);
581 CPUWatchpoint *wp;
582
583 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
584 if (addr == wp->vaddr && len_mask == wp->len_mask
585 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
586 cpu_watchpoint_remove_by_ref(cpu, wp);
587 return 0;
588 }
589 }
590 return -ENOENT;
591 }
592
593 /* Remove a specific watchpoint by reference. */
594 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
595 {
596 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
597
598 tlb_flush_page(cpu, watchpoint->vaddr);
599
600 g_free(watchpoint);
601 }
602
603 /* Remove all matching watchpoints. */
604 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
605 {
606 CPUWatchpoint *wp, *next;
607
608 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
609 if (wp->flags & mask) {
610 cpu_watchpoint_remove_by_ref(cpu, wp);
611 }
612 }
613 }
614 #endif
615
616 /* Add a breakpoint. */
617 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
618 CPUBreakpoint **breakpoint)
619 {
620 #if defined(TARGET_HAS_ICE)
621 CPUBreakpoint *bp;
622
623 bp = g_malloc(sizeof(*bp));
624
625 bp->pc = pc;
626 bp->flags = flags;
627
628 /* keep all GDB-injected breakpoints in front */
629 if (flags & BP_GDB) {
630 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
631 } else {
632 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
633 }
634
635 breakpoint_invalidate(cpu, pc);
636
637 if (breakpoint) {
638 *breakpoint = bp;
639 }
640 return 0;
641 #else
642 return -ENOSYS;
643 #endif
644 }
645
646 /* Remove a specific breakpoint. */
647 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
648 {
649 #if defined(TARGET_HAS_ICE)
650 CPUBreakpoint *bp;
651
652 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
653 if (bp->pc == pc && bp->flags == flags) {
654 cpu_breakpoint_remove_by_ref(cpu, bp);
655 return 0;
656 }
657 }
658 return -ENOENT;
659 #else
660 return -ENOSYS;
661 #endif
662 }
663
664 /* Remove a specific breakpoint by reference. */
665 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
666 {
667 #if defined(TARGET_HAS_ICE)
668 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
669
670 breakpoint_invalidate(cpu, breakpoint->pc);
671
672 g_free(breakpoint);
673 #endif
674 }
675
676 /* Remove all matching breakpoints. */
677 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
678 {
679 #if defined(TARGET_HAS_ICE)
680 CPUBreakpoint *bp, *next;
681
682 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
683 if (bp->flags & mask) {
684 cpu_breakpoint_remove_by_ref(cpu, bp);
685 }
686 }
687 #endif
688 }
689
690 /* enable or disable single step mode. EXCP_DEBUG is returned by the
691 CPU loop after each instruction */
692 void cpu_single_step(CPUState *cpu, int enabled)
693 {
694 #if defined(TARGET_HAS_ICE)
695 if (cpu->singlestep_enabled != enabled) {
696 cpu->singlestep_enabled = enabled;
697 if (kvm_enabled()) {
698 kvm_update_guest_debug(cpu, 0);
699 } else {
700 /* must flush all the translated code to avoid inconsistencies */
701 /* XXX: only flush what is necessary */
702 CPUArchState *env = cpu->env_ptr;
703 tb_flush(env);
704 }
705 }
706 #endif
707 }
708
709 void cpu_abort(CPUState *cpu, const char *fmt, ...)
710 {
711 va_list ap;
712 va_list ap2;
713
714 va_start(ap, fmt);
715 va_copy(ap2, ap);
716 fprintf(stderr, "qemu: fatal: ");
717 vfprintf(stderr, fmt, ap);
718 fprintf(stderr, "\n");
719 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
720 if (qemu_log_enabled()) {
721 qemu_log("qemu: fatal: ");
722 qemu_log_vprintf(fmt, ap2);
723 qemu_log("\n");
724 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
725 qemu_log_flush();
726 qemu_log_close();
727 }
728 va_end(ap2);
729 va_end(ap);
730 #if defined(CONFIG_USER_ONLY)
731 {
732 struct sigaction act;
733 sigfillset(&act.sa_mask);
734 act.sa_handler = SIG_DFL;
735 sigaction(SIGABRT, &act, NULL);
736 }
737 #endif
738 abort();
739 }
740
741 #if !defined(CONFIG_USER_ONLY)
742 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
743 {
744 RAMBlock *block;
745
746 /* The list is protected by the iothread lock here. */
747 block = ram_list.mru_block;
748 if (block && addr - block->offset < block->length) {
749 goto found;
750 }
751 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
752 if (addr - block->offset < block->length) {
753 goto found;
754 }
755 }
756
757 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
758 abort();
759
760 found:
761 ram_list.mru_block = block;
762 return block;
763 }
764
765 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
766 {
767 ram_addr_t start1;
768 RAMBlock *block;
769 ram_addr_t end;
770
771 end = TARGET_PAGE_ALIGN(start + length);
772 start &= TARGET_PAGE_MASK;
773
774 block = qemu_get_ram_block(start);
775 assert(block == qemu_get_ram_block(end - 1));
776 start1 = (uintptr_t)block->host + (start - block->offset);
777 cpu_tlb_reset_dirty_all(start1, length);
778 }
779
780 /* Note: start and end must be within the same ram block. */
781 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
782 unsigned client)
783 {
784 if (length == 0)
785 return;
786 cpu_physical_memory_clear_dirty_range(start, length, client);
787
788 if (tcg_enabled()) {
789 tlb_reset_dirty_range_all(start, length);
790 }
791 }
792
793 static void cpu_physical_memory_set_dirty_tracking(bool enable)
794 {
795 in_migration = enable;
796 }
797
798 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
799 MemoryRegionSection *section,
800 target_ulong vaddr,
801 hwaddr paddr, hwaddr xlat,
802 int prot,
803 target_ulong *address)
804 {
805 hwaddr iotlb;
806 CPUWatchpoint *wp;
807
808 if (memory_region_is_ram(section->mr)) {
809 /* Normal RAM. */
810 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
811 + xlat;
812 if (!section->readonly) {
813 iotlb |= PHYS_SECTION_NOTDIRTY;
814 } else {
815 iotlb |= PHYS_SECTION_ROM;
816 }
817 } else {
818 iotlb = section - section->address_space->dispatch->map.sections;
819 iotlb += xlat;
820 }
821
822 /* Make accesses to pages with watchpoints go via the
823 watchpoint trap routines. */
824 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
825 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
826 /* Avoid trapping reads of pages with a write breakpoint. */
827 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
828 iotlb = PHYS_SECTION_WATCH + paddr;
829 *address |= TLB_MMIO;
830 break;
831 }
832 }
833 }
834
835 return iotlb;
836 }
837 #endif /* defined(CONFIG_USER_ONLY) */
838
839 #if !defined(CONFIG_USER_ONLY)
840
841 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
842 uint16_t section);
843 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
844
845 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
846
847 /*
848 * Set a custom physical guest memory alloator.
849 * Accelerators with unusual needs may need this. Hopefully, we can
850 * get rid of it eventually.
851 */
852 void phys_mem_set_alloc(void *(*alloc)(size_t))
853 {
854 phys_mem_alloc = alloc;
855 }
856
857 static uint16_t phys_section_add(PhysPageMap *map,
858 MemoryRegionSection *section)
859 {
860 /* The physical section number is ORed with a page-aligned
861 * pointer to produce the iotlb entries. Thus it should
862 * never overflow into the page-aligned value.
863 */
864 assert(map->sections_nb < TARGET_PAGE_SIZE);
865
866 if (map->sections_nb == map->sections_nb_alloc) {
867 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
868 map->sections = g_renew(MemoryRegionSection, map->sections,
869 map->sections_nb_alloc);
870 }
871 map->sections[map->sections_nb] = *section;
872 memory_region_ref(section->mr);
873 return map->sections_nb++;
874 }
875
876 static void phys_section_destroy(MemoryRegion *mr)
877 {
878 memory_region_unref(mr);
879
880 if (mr->subpage) {
881 subpage_t *subpage = container_of(mr, subpage_t, iomem);
882 memory_region_destroy(&subpage->iomem);
883 g_free(subpage);
884 }
885 }
886
887 static void phys_sections_free(PhysPageMap *map)
888 {
889 while (map->sections_nb > 0) {
890 MemoryRegionSection *section = &map->sections[--map->sections_nb];
891 phys_section_destroy(section->mr);
892 }
893 g_free(map->sections);
894 g_free(map->nodes);
895 }
896
897 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
898 {
899 subpage_t *subpage;
900 hwaddr base = section->offset_within_address_space
901 & TARGET_PAGE_MASK;
902 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
903 d->map.nodes, d->map.sections);
904 MemoryRegionSection subsection = {
905 .offset_within_address_space = base,
906 .size = int128_make64(TARGET_PAGE_SIZE),
907 };
908 hwaddr start, end;
909
910 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
911
912 if (!(existing->mr->subpage)) {
913 subpage = subpage_init(d->as, base);
914 subsection.address_space = d->as;
915 subsection.mr = &subpage->iomem;
916 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
917 phys_section_add(&d->map, &subsection));
918 } else {
919 subpage = container_of(existing->mr, subpage_t, iomem);
920 }
921 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
922 end = start + int128_get64(section->size) - 1;
923 subpage_register(subpage, start, end,
924 phys_section_add(&d->map, section));
925 }
926
927
928 static void register_multipage(AddressSpaceDispatch *d,
929 MemoryRegionSection *section)
930 {
931 hwaddr start_addr = section->offset_within_address_space;
932 uint16_t section_index = phys_section_add(&d->map, section);
933 uint64_t num_pages = int128_get64(int128_rshift(section->size,
934 TARGET_PAGE_BITS));
935
936 assert(num_pages);
937 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
938 }
939
940 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
941 {
942 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
943 AddressSpaceDispatch *d = as->next_dispatch;
944 MemoryRegionSection now = *section, remain = *section;
945 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
946
947 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
948 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
949 - now.offset_within_address_space;
950
951 now.size = int128_min(int128_make64(left), now.size);
952 register_subpage(d, &now);
953 } else {
954 now.size = int128_zero();
955 }
956 while (int128_ne(remain.size, now.size)) {
957 remain.size = int128_sub(remain.size, now.size);
958 remain.offset_within_address_space += int128_get64(now.size);
959 remain.offset_within_region += int128_get64(now.size);
960 now = remain;
961 if (int128_lt(remain.size, page_size)) {
962 register_subpage(d, &now);
963 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
964 now.size = page_size;
965 register_subpage(d, &now);
966 } else {
967 now.size = int128_and(now.size, int128_neg(page_size));
968 register_multipage(d, &now);
969 }
970 }
971 }
972
973 void qemu_flush_coalesced_mmio_buffer(void)
974 {
975 if (kvm_enabled())
976 kvm_flush_coalesced_mmio_buffer();
977 }
978
979 void qemu_mutex_lock_ramlist(void)
980 {
981 qemu_mutex_lock(&ram_list.mutex);
982 }
983
984 void qemu_mutex_unlock_ramlist(void)
985 {
986 qemu_mutex_unlock(&ram_list.mutex);
987 }
988
989 #ifdef __linux__
990
991 #include <sys/vfs.h>
992
993 #define HUGETLBFS_MAGIC 0x958458f6
994
995 static long gethugepagesize(const char *path)
996 {
997 struct statfs fs;
998 int ret;
999
1000 do {
1001 ret = statfs(path, &fs);
1002 } while (ret != 0 && errno == EINTR);
1003
1004 if (ret != 0) {
1005 perror(path);
1006 return 0;
1007 }
1008
1009 if (fs.f_type != HUGETLBFS_MAGIC)
1010 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1011
1012 return fs.f_bsize;
1013 }
1014
1015 static sigjmp_buf sigjump;
1016
1017 static void sigbus_handler(int signal)
1018 {
1019 siglongjmp(sigjump, 1);
1020 }
1021
1022 static void *file_ram_alloc(RAMBlock *block,
1023 ram_addr_t memory,
1024 const char *path)
1025 {
1026 char *filename;
1027 char *sanitized_name;
1028 char *c;
1029 void *area;
1030 int fd;
1031 unsigned long hpagesize;
1032
1033 hpagesize = gethugepagesize(path);
1034 if (!hpagesize) {
1035 goto error;
1036 }
1037
1038 if (memory < hpagesize) {
1039 return NULL;
1040 }
1041
1042 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1043 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1044 goto error;
1045 }
1046
1047 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1048 sanitized_name = g_strdup(block->mr->name);
1049 for (c = sanitized_name; *c != '\0'; c++) {
1050 if (*c == '/')
1051 *c = '_';
1052 }
1053
1054 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1055 sanitized_name);
1056 g_free(sanitized_name);
1057
1058 fd = mkstemp(filename);
1059 if (fd < 0) {
1060 perror("unable to create backing store for hugepages");
1061 g_free(filename);
1062 goto error;
1063 }
1064 unlink(filename);
1065 g_free(filename);
1066
1067 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1068
1069 /*
1070 * ftruncate is not supported by hugetlbfs in older
1071 * hosts, so don't bother bailing out on errors.
1072 * If anything goes wrong with it under other filesystems,
1073 * mmap will fail.
1074 */
1075 if (ftruncate(fd, memory))
1076 perror("ftruncate");
1077
1078 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1079 if (area == MAP_FAILED) {
1080 perror("file_ram_alloc: can't mmap RAM pages");
1081 close(fd);
1082 goto error;
1083 }
1084
1085 if (mem_prealloc) {
1086 int ret, i;
1087 struct sigaction act, oldact;
1088 sigset_t set, oldset;
1089
1090 memset(&act, 0, sizeof(act));
1091 act.sa_handler = &sigbus_handler;
1092 act.sa_flags = 0;
1093
1094 ret = sigaction(SIGBUS, &act, &oldact);
1095 if (ret) {
1096 perror("file_ram_alloc: failed to install signal handler");
1097 exit(1);
1098 }
1099
1100 /* unblock SIGBUS */
1101 sigemptyset(&set);
1102 sigaddset(&set, SIGBUS);
1103 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1104
1105 if (sigsetjmp(sigjump, 1)) {
1106 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1107 exit(1);
1108 }
1109
1110 /* MAP_POPULATE silently ignores failures */
1111 for (i = 0; i < (memory/hpagesize); i++) {
1112 memset(area + (hpagesize*i), 0, 1);
1113 }
1114
1115 ret = sigaction(SIGBUS, &oldact, NULL);
1116 if (ret) {
1117 perror("file_ram_alloc: failed to reinstall signal handler");
1118 exit(1);
1119 }
1120
1121 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1122 }
1123
1124 block->fd = fd;
1125 return area;
1126
1127 error:
1128 if (mem_prealloc) {
1129 exit(1);
1130 }
1131 return NULL;
1132 }
1133 #else
1134 static void *file_ram_alloc(RAMBlock *block,
1135 ram_addr_t memory,
1136 const char *path)
1137 {
1138 fprintf(stderr, "-mem-path not supported on this host\n");
1139 exit(1);
1140 }
1141 #endif
1142
1143 static ram_addr_t find_ram_offset(ram_addr_t size)
1144 {
1145 RAMBlock *block, *next_block;
1146 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1147
1148 assert(size != 0); /* it would hand out same offset multiple times */
1149
1150 if (QTAILQ_EMPTY(&ram_list.blocks))
1151 return 0;
1152
1153 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1154 ram_addr_t end, next = RAM_ADDR_MAX;
1155
1156 end = block->offset + block->length;
1157
1158 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1159 if (next_block->offset >= end) {
1160 next = MIN(next, next_block->offset);
1161 }
1162 }
1163 if (next - end >= size && next - end < mingap) {
1164 offset = end;
1165 mingap = next - end;
1166 }
1167 }
1168
1169 if (offset == RAM_ADDR_MAX) {
1170 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1171 (uint64_t)size);
1172 abort();
1173 }
1174
1175 return offset;
1176 }
1177
1178 ram_addr_t last_ram_offset(void)
1179 {
1180 RAMBlock *block;
1181 ram_addr_t last = 0;
1182
1183 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1184 last = MAX(last, block->offset + block->length);
1185
1186 return last;
1187 }
1188
1189 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1190 {
1191 int ret;
1192
1193 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1194 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1195 "dump-guest-core", true)) {
1196 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1197 if (ret) {
1198 perror("qemu_madvise");
1199 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1200 "but dump_guest_core=off specified\n");
1201 }
1202 }
1203 }
1204
1205 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1206 {
1207 RAMBlock *new_block, *block;
1208
1209 new_block = NULL;
1210 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1211 if (block->offset == addr) {
1212 new_block = block;
1213 break;
1214 }
1215 }
1216 assert(new_block);
1217 assert(!new_block->idstr[0]);
1218
1219 if (dev) {
1220 char *id = qdev_get_dev_path(dev);
1221 if (id) {
1222 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1223 g_free(id);
1224 }
1225 }
1226 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1227
1228 /* This assumes the iothread lock is taken here too. */
1229 qemu_mutex_lock_ramlist();
1230 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1231 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1232 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1233 new_block->idstr);
1234 abort();
1235 }
1236 }
1237 qemu_mutex_unlock_ramlist();
1238 }
1239
1240 static int memory_try_enable_merging(void *addr, size_t len)
1241 {
1242 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1243 /* disabled by the user */
1244 return 0;
1245 }
1246
1247 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1248 }
1249
1250 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1251 MemoryRegion *mr)
1252 {
1253 RAMBlock *block, *new_block;
1254 ram_addr_t old_ram_size, new_ram_size;
1255
1256 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1257
1258 size = TARGET_PAGE_ALIGN(size);
1259 new_block = g_malloc0(sizeof(*new_block));
1260 new_block->fd = -1;
1261
1262 /* This assumes the iothread lock is taken here too. */
1263 qemu_mutex_lock_ramlist();
1264 new_block->mr = mr;
1265 new_block->offset = find_ram_offset(size);
1266 if (host) {
1267 new_block->host = host;
1268 new_block->flags |= RAM_PREALLOC_MASK;
1269 } else if (xen_enabled()) {
1270 if (mem_path) {
1271 fprintf(stderr, "-mem-path not supported with Xen\n");
1272 exit(1);
1273 }
1274 xen_ram_alloc(new_block->offset, size, mr);
1275 } else {
1276 if (mem_path) {
1277 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1278 /*
1279 * file_ram_alloc() needs to allocate just like
1280 * phys_mem_alloc, but we haven't bothered to provide
1281 * a hook there.
1282 */
1283 fprintf(stderr,
1284 "-mem-path not supported with this accelerator\n");
1285 exit(1);
1286 }
1287 new_block->host = file_ram_alloc(new_block, size, mem_path);
1288 }
1289 if (!new_block->host) {
1290 new_block->host = phys_mem_alloc(size);
1291 if (!new_block->host) {
1292 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1293 new_block->mr->name, strerror(errno));
1294 exit(1);
1295 }
1296 memory_try_enable_merging(new_block->host, size);
1297 }
1298 }
1299 new_block->length = size;
1300
1301 /* Keep the list sorted from biggest to smallest block. */
1302 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1303 if (block->length < new_block->length) {
1304 break;
1305 }
1306 }
1307 if (block) {
1308 QTAILQ_INSERT_BEFORE(block, new_block, next);
1309 } else {
1310 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1311 }
1312 ram_list.mru_block = NULL;
1313
1314 ram_list.version++;
1315 qemu_mutex_unlock_ramlist();
1316
1317 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1318
1319 if (new_ram_size > old_ram_size) {
1320 int i;
1321 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1322 ram_list.dirty_memory[i] =
1323 bitmap_zero_extend(ram_list.dirty_memory[i],
1324 old_ram_size, new_ram_size);
1325 }
1326 }
1327 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1328
1329 qemu_ram_setup_dump(new_block->host, size);
1330 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1331 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1332
1333 if (kvm_enabled())
1334 kvm_setup_guest_memory(new_block->host, size);
1335
1336 return new_block->offset;
1337 }
1338
1339 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1340 {
1341 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1342 }
1343
1344 void qemu_ram_free_from_ptr(ram_addr_t addr)
1345 {
1346 RAMBlock *block;
1347
1348 /* This assumes the iothread lock is taken here too. */
1349 qemu_mutex_lock_ramlist();
1350 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1351 if (addr == block->offset) {
1352 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1353 ram_list.mru_block = NULL;
1354 ram_list.version++;
1355 g_free(block);
1356 break;
1357 }
1358 }
1359 qemu_mutex_unlock_ramlist();
1360 }
1361
1362 void qemu_ram_free(ram_addr_t addr)
1363 {
1364 RAMBlock *block;
1365
1366 /* This assumes the iothread lock is taken here too. */
1367 qemu_mutex_lock_ramlist();
1368 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1369 if (addr == block->offset) {
1370 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1371 ram_list.mru_block = NULL;
1372 ram_list.version++;
1373 if (block->flags & RAM_PREALLOC_MASK) {
1374 ;
1375 } else if (xen_enabled()) {
1376 xen_invalidate_map_cache_entry(block->host);
1377 #ifndef _WIN32
1378 } else if (block->fd >= 0) {
1379 munmap(block->host, block->length);
1380 close(block->fd);
1381 #endif
1382 } else {
1383 qemu_anon_ram_free(block->host, block->length);
1384 }
1385 g_free(block);
1386 break;
1387 }
1388 }
1389 qemu_mutex_unlock_ramlist();
1390
1391 }
1392
1393 #ifndef _WIN32
1394 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1395 {
1396 RAMBlock *block;
1397 ram_addr_t offset;
1398 int flags;
1399 void *area, *vaddr;
1400
1401 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1402 offset = addr - block->offset;
1403 if (offset < block->length) {
1404 vaddr = block->host + offset;
1405 if (block->flags & RAM_PREALLOC_MASK) {
1406 ;
1407 } else if (xen_enabled()) {
1408 abort();
1409 } else {
1410 flags = MAP_FIXED;
1411 munmap(vaddr, length);
1412 if (block->fd >= 0) {
1413 #ifdef MAP_POPULATE
1414 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1415 MAP_PRIVATE;
1416 #else
1417 flags |= MAP_PRIVATE;
1418 #endif
1419 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1420 flags, block->fd, offset);
1421 } else {
1422 /*
1423 * Remap needs to match alloc. Accelerators that
1424 * set phys_mem_alloc never remap. If they did,
1425 * we'd need a remap hook here.
1426 */
1427 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1428
1429 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1430 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1431 flags, -1, 0);
1432 }
1433 if (area != vaddr) {
1434 fprintf(stderr, "Could not remap addr: "
1435 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1436 length, addr);
1437 exit(1);
1438 }
1439 memory_try_enable_merging(vaddr, length);
1440 qemu_ram_setup_dump(vaddr, length);
1441 }
1442 return;
1443 }
1444 }
1445 }
1446 #endif /* !_WIN32 */
1447
1448 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1449 With the exception of the softmmu code in this file, this should
1450 only be used for local memory (e.g. video ram) that the device owns,
1451 and knows it isn't going to access beyond the end of the block.
1452
1453 It should not be used for general purpose DMA.
1454 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1455 */
1456 void *qemu_get_ram_ptr(ram_addr_t addr)
1457 {
1458 RAMBlock *block = qemu_get_ram_block(addr);
1459
1460 if (xen_enabled()) {
1461 /* We need to check if the requested address is in the RAM
1462 * because we don't want to map the entire memory in QEMU.
1463 * In that case just map until the end of the page.
1464 */
1465 if (block->offset == 0) {
1466 return xen_map_cache(addr, 0, 0);
1467 } else if (block->host == NULL) {
1468 block->host =
1469 xen_map_cache(block->offset, block->length, 1);
1470 }
1471 }
1472 return block->host + (addr - block->offset);
1473 }
1474
1475 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1476 * but takes a size argument */
1477 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1478 {
1479 if (*size == 0) {
1480 return NULL;
1481 }
1482 if (xen_enabled()) {
1483 return xen_map_cache(addr, *size, 1);
1484 } else {
1485 RAMBlock *block;
1486
1487 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1488 if (addr - block->offset < block->length) {
1489 if (addr - block->offset + *size > block->length)
1490 *size = block->length - addr + block->offset;
1491 return block->host + (addr - block->offset);
1492 }
1493 }
1494
1495 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1496 abort();
1497 }
1498 }
1499
1500 /* Some of the softmmu routines need to translate from a host pointer
1501 (typically a TLB entry) back to a ram offset. */
1502 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1503 {
1504 RAMBlock *block;
1505 uint8_t *host = ptr;
1506
1507 if (xen_enabled()) {
1508 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1509 return qemu_get_ram_block(*ram_addr)->mr;
1510 }
1511
1512 block = ram_list.mru_block;
1513 if (block && block->host && host - block->host < block->length) {
1514 goto found;
1515 }
1516
1517 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1518 /* This case append when the block is not mapped. */
1519 if (block->host == NULL) {
1520 continue;
1521 }
1522 if (host - block->host < block->length) {
1523 goto found;
1524 }
1525 }
1526
1527 return NULL;
1528
1529 found:
1530 *ram_addr = block->offset + (host - block->host);
1531 return block->mr;
1532 }
1533
1534 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1535 uint64_t val, unsigned size)
1536 {
1537 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1538 tb_invalidate_phys_page_fast(ram_addr, size);
1539 }
1540 switch (size) {
1541 case 1:
1542 stb_p(qemu_get_ram_ptr(ram_addr), val);
1543 break;
1544 case 2:
1545 stw_p(qemu_get_ram_ptr(ram_addr), val);
1546 break;
1547 case 4:
1548 stl_p(qemu_get_ram_ptr(ram_addr), val);
1549 break;
1550 default:
1551 abort();
1552 }
1553 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1554 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1555 /* we remove the notdirty callback only if the code has been
1556 flushed */
1557 if (!cpu_physical_memory_is_clean(ram_addr)) {
1558 CPUArchState *env = current_cpu->env_ptr;
1559 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1560 }
1561 }
1562
1563 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1564 unsigned size, bool is_write)
1565 {
1566 return is_write;
1567 }
1568
1569 static const MemoryRegionOps notdirty_mem_ops = {
1570 .write = notdirty_mem_write,
1571 .valid.accepts = notdirty_mem_accepts,
1572 .endianness = DEVICE_NATIVE_ENDIAN,
1573 };
1574
1575 /* Generate a debug exception if a watchpoint has been hit. */
1576 static void check_watchpoint(int offset, int len_mask, int flags)
1577 {
1578 CPUState *cpu = current_cpu;
1579 CPUArchState *env = cpu->env_ptr;
1580 target_ulong pc, cs_base;
1581 target_ulong vaddr;
1582 CPUWatchpoint *wp;
1583 int cpu_flags;
1584
1585 if (cpu->watchpoint_hit) {
1586 /* We re-entered the check after replacing the TB. Now raise
1587 * the debug interrupt so that is will trigger after the
1588 * current instruction. */
1589 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1590 return;
1591 }
1592 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1593 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1594 if ((vaddr == (wp->vaddr & len_mask) ||
1595 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1596 wp->flags |= BP_WATCHPOINT_HIT;
1597 if (!cpu->watchpoint_hit) {
1598 cpu->watchpoint_hit = wp;
1599 tb_check_watchpoint(cpu);
1600 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1601 cpu->exception_index = EXCP_DEBUG;
1602 cpu_loop_exit(cpu);
1603 } else {
1604 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1605 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1606 cpu_resume_from_signal(cpu, NULL);
1607 }
1608 }
1609 } else {
1610 wp->flags &= ~BP_WATCHPOINT_HIT;
1611 }
1612 }
1613 }
1614
1615 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1616 so these check for a hit then pass through to the normal out-of-line
1617 phys routines. */
1618 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1619 unsigned size)
1620 {
1621 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1622 switch (size) {
1623 case 1: return ldub_phys(&address_space_memory, addr);
1624 case 2: return lduw_phys(&address_space_memory, addr);
1625 case 4: return ldl_phys(&address_space_memory, addr);
1626 default: abort();
1627 }
1628 }
1629
1630 static void watch_mem_write(void *opaque, hwaddr addr,
1631 uint64_t val, unsigned size)
1632 {
1633 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1634 switch (size) {
1635 case 1:
1636 stb_phys(&address_space_memory, addr, val);
1637 break;
1638 case 2:
1639 stw_phys(&address_space_memory, addr, val);
1640 break;
1641 case 4:
1642 stl_phys(&address_space_memory, addr, val);
1643 break;
1644 default: abort();
1645 }
1646 }
1647
1648 static const MemoryRegionOps watch_mem_ops = {
1649 .read = watch_mem_read,
1650 .write = watch_mem_write,
1651 .endianness = DEVICE_NATIVE_ENDIAN,
1652 };
1653
1654 static uint64_t subpage_read(void *opaque, hwaddr addr,
1655 unsigned len)
1656 {
1657 subpage_t *subpage = opaque;
1658 uint8_t buf[4];
1659
1660 #if defined(DEBUG_SUBPAGE)
1661 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1662 subpage, len, addr);
1663 #endif
1664 address_space_read(subpage->as, addr + subpage->base, buf, len);
1665 switch (len) {
1666 case 1:
1667 return ldub_p(buf);
1668 case 2:
1669 return lduw_p(buf);
1670 case 4:
1671 return ldl_p(buf);
1672 default:
1673 abort();
1674 }
1675 }
1676
1677 static void subpage_write(void *opaque, hwaddr addr,
1678 uint64_t value, unsigned len)
1679 {
1680 subpage_t *subpage = opaque;
1681 uint8_t buf[4];
1682
1683 #if defined(DEBUG_SUBPAGE)
1684 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1685 " value %"PRIx64"\n",
1686 __func__, subpage, len, addr, value);
1687 #endif
1688 switch (len) {
1689 case 1:
1690 stb_p(buf, value);
1691 break;
1692 case 2:
1693 stw_p(buf, value);
1694 break;
1695 case 4:
1696 stl_p(buf, value);
1697 break;
1698 default:
1699 abort();
1700 }
1701 address_space_write(subpage->as, addr + subpage->base, buf, len);
1702 }
1703
1704 static bool subpage_accepts(void *opaque, hwaddr addr,
1705 unsigned len, bool is_write)
1706 {
1707 subpage_t *subpage = opaque;
1708 #if defined(DEBUG_SUBPAGE)
1709 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1710 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1711 #endif
1712
1713 return address_space_access_valid(subpage->as, addr + subpage->base,
1714 len, is_write);
1715 }
1716
1717 static const MemoryRegionOps subpage_ops = {
1718 .read = subpage_read,
1719 .write = subpage_write,
1720 .valid.accepts = subpage_accepts,
1721 .endianness = DEVICE_NATIVE_ENDIAN,
1722 };
1723
1724 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1725 uint16_t section)
1726 {
1727 int idx, eidx;
1728
1729 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1730 return -1;
1731 idx = SUBPAGE_IDX(start);
1732 eidx = SUBPAGE_IDX(end);
1733 #if defined(DEBUG_SUBPAGE)
1734 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1735 __func__, mmio, start, end, idx, eidx, section);
1736 #endif
1737 for (; idx <= eidx; idx++) {
1738 mmio->sub_section[idx] = section;
1739 }
1740
1741 return 0;
1742 }
1743
1744 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1745 {
1746 subpage_t *mmio;
1747
1748 mmio = g_malloc0(sizeof(subpage_t));
1749
1750 mmio->as = as;
1751 mmio->base = base;
1752 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1753 "subpage", TARGET_PAGE_SIZE);
1754 mmio->iomem.subpage = true;
1755 #if defined(DEBUG_SUBPAGE)
1756 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1757 mmio, base, TARGET_PAGE_SIZE);
1758 #endif
1759 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1760
1761 return mmio;
1762 }
1763
1764 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1765 {
1766 MemoryRegionSection section = {
1767 .address_space = &address_space_memory,
1768 .mr = mr,
1769 .offset_within_address_space = 0,
1770 .offset_within_region = 0,
1771 .size = int128_2_64(),
1772 };
1773
1774 return phys_section_add(map, &section);
1775 }
1776
1777 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1778 {
1779 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1780 }
1781
1782 static void io_mem_init(void)
1783 {
1784 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1785 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1786 "unassigned", UINT64_MAX);
1787 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1788 "notdirty", UINT64_MAX);
1789 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1790 "watch", UINT64_MAX);
1791 }
1792
1793 static void mem_begin(MemoryListener *listener)
1794 {
1795 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1796 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1797 uint16_t n;
1798
1799 n = dummy_section(&d->map, &io_mem_unassigned);
1800 assert(n == PHYS_SECTION_UNASSIGNED);
1801 n = dummy_section(&d->map, &io_mem_notdirty);
1802 assert(n == PHYS_SECTION_NOTDIRTY);
1803 n = dummy_section(&d->map, &io_mem_rom);
1804 assert(n == PHYS_SECTION_ROM);
1805 n = dummy_section(&d->map, &io_mem_watch);
1806 assert(n == PHYS_SECTION_WATCH);
1807
1808 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1809 d->as = as;
1810 as->next_dispatch = d;
1811 }
1812
1813 static void mem_commit(MemoryListener *listener)
1814 {
1815 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1816 AddressSpaceDispatch *cur = as->dispatch;
1817 AddressSpaceDispatch *next = as->next_dispatch;
1818
1819 phys_page_compact_all(next, next->map.nodes_nb);
1820
1821 as->dispatch = next;
1822
1823 if (cur) {
1824 phys_sections_free(&cur->map);
1825 g_free(cur);
1826 }
1827 }
1828
1829 static void tcg_commit(MemoryListener *listener)
1830 {
1831 CPUState *cpu;
1832
1833 /* since each CPU stores ram addresses in its TLB cache, we must
1834 reset the modified entries */
1835 /* XXX: slow ! */
1836 CPU_FOREACH(cpu) {
1837 CPUArchState *env = cpu->env_ptr;
1838
1839 /* FIXME: Disentangle the cpu.h circular files deps so we can
1840 directly get the right CPU from listener. */
1841 if (cpu->tcg_as_listener != listener) {
1842 continue;
1843 }
1844 tlb_flush(env, 1);
1845 }
1846 }
1847
1848 static void core_log_global_start(MemoryListener *listener)
1849 {
1850 cpu_physical_memory_set_dirty_tracking(true);
1851 }
1852
1853 static void core_log_global_stop(MemoryListener *listener)
1854 {
1855 cpu_physical_memory_set_dirty_tracking(false);
1856 }
1857
1858 static MemoryListener core_memory_listener = {
1859 .log_global_start = core_log_global_start,
1860 .log_global_stop = core_log_global_stop,
1861 .priority = 1,
1862 };
1863
1864 void address_space_init_dispatch(AddressSpace *as)
1865 {
1866 as->dispatch = NULL;
1867 as->dispatch_listener = (MemoryListener) {
1868 .begin = mem_begin,
1869 .commit = mem_commit,
1870 .region_add = mem_add,
1871 .region_nop = mem_add,
1872 .priority = 0,
1873 };
1874 memory_listener_register(&as->dispatch_listener, as);
1875 }
1876
1877 void address_space_destroy_dispatch(AddressSpace *as)
1878 {
1879 AddressSpaceDispatch *d = as->dispatch;
1880
1881 memory_listener_unregister(&as->dispatch_listener);
1882 g_free(d);
1883 as->dispatch = NULL;
1884 }
1885
1886 static void memory_map_init(void)
1887 {
1888 system_memory = g_malloc(sizeof(*system_memory));
1889
1890 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1891 address_space_init(&address_space_memory, system_memory, "memory");
1892
1893 system_io = g_malloc(sizeof(*system_io));
1894 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1895 65536);
1896 address_space_init(&address_space_io, system_io, "I/O");
1897
1898 memory_listener_register(&core_memory_listener, &address_space_memory);
1899 }
1900
1901 MemoryRegion *get_system_memory(void)
1902 {
1903 return system_memory;
1904 }
1905
1906 MemoryRegion *get_system_io(void)
1907 {
1908 return system_io;
1909 }
1910
1911 #endif /* !defined(CONFIG_USER_ONLY) */
1912
1913 /* physical memory access (slow version, mainly for debug) */
1914 #if defined(CONFIG_USER_ONLY)
1915 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1916 uint8_t *buf, int len, int is_write)
1917 {
1918 int l, flags;
1919 target_ulong page;
1920 void * p;
1921
1922 while (len > 0) {
1923 page = addr & TARGET_PAGE_MASK;
1924 l = (page + TARGET_PAGE_SIZE) - addr;
1925 if (l > len)
1926 l = len;
1927 flags = page_get_flags(page);
1928 if (!(flags & PAGE_VALID))
1929 return -1;
1930 if (is_write) {
1931 if (!(flags & PAGE_WRITE))
1932 return -1;
1933 /* XXX: this code should not depend on lock_user */
1934 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1935 return -1;
1936 memcpy(p, buf, l);
1937 unlock_user(p, addr, l);
1938 } else {
1939 if (!(flags & PAGE_READ))
1940 return -1;
1941 /* XXX: this code should not depend on lock_user */
1942 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1943 return -1;
1944 memcpy(buf, p, l);
1945 unlock_user(p, addr, 0);
1946 }
1947 len -= l;
1948 buf += l;
1949 addr += l;
1950 }
1951 return 0;
1952 }
1953
1954 #else
1955
1956 static void invalidate_and_set_dirty(hwaddr addr,
1957 hwaddr length)
1958 {
1959 if (cpu_physical_memory_is_clean(addr)) {
1960 /* invalidate code */
1961 tb_invalidate_phys_page_range(addr, addr + length, 0);
1962 /* set dirty bit */
1963 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1964 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1965 }
1966 xen_modified_memory(addr, length);
1967 }
1968
1969 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1970 {
1971 unsigned access_size_max = mr->ops->valid.max_access_size;
1972
1973 /* Regions are assumed to support 1-4 byte accesses unless
1974 otherwise specified. */
1975 if (access_size_max == 0) {
1976 access_size_max = 4;
1977 }
1978
1979 /* Bound the maximum access by the alignment of the address. */
1980 if (!mr->ops->impl.unaligned) {
1981 unsigned align_size_max = addr & -addr;
1982 if (align_size_max != 0 && align_size_max < access_size_max) {
1983 access_size_max = align_size_max;
1984 }
1985 }
1986
1987 /* Don't attempt accesses larger than the maximum. */
1988 if (l > access_size_max) {
1989 l = access_size_max;
1990 }
1991 if (l & (l - 1)) {
1992 l = 1 << (qemu_fls(l) - 1);
1993 }
1994
1995 return l;
1996 }
1997
1998 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1999 int len, bool is_write)
2000 {
2001 hwaddr l;
2002 uint8_t *ptr;
2003 uint64_t val;
2004 hwaddr addr1;
2005 MemoryRegion *mr;
2006 bool error = false;
2007
2008 while (len > 0) {
2009 l = len;
2010 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2011
2012 if (is_write) {
2013 if (!memory_access_is_direct(mr, is_write)) {
2014 l = memory_access_size(mr, l, addr1);
2015 /* XXX: could force current_cpu to NULL to avoid
2016 potential bugs */
2017 switch (l) {
2018 case 8:
2019 /* 64 bit write access */
2020 val = ldq_p(buf);
2021 error |= io_mem_write(mr, addr1, val, 8);
2022 break;
2023 case 4:
2024 /* 32 bit write access */
2025 val = ldl_p(buf);
2026 error |= io_mem_write(mr, addr1, val, 4);
2027 break;
2028 case 2:
2029 /* 16 bit write access */
2030 val = lduw_p(buf);
2031 error |= io_mem_write(mr, addr1, val, 2);
2032 break;
2033 case 1:
2034 /* 8 bit write access */
2035 val = ldub_p(buf);
2036 error |= io_mem_write(mr, addr1, val, 1);
2037 break;
2038 default:
2039 abort();
2040 }
2041 } else {
2042 addr1 += memory_region_get_ram_addr(mr);
2043 /* RAM case */
2044 ptr = qemu_get_ram_ptr(addr1);
2045 memcpy(ptr, buf, l);
2046 invalidate_and_set_dirty(addr1, l);
2047 }
2048 } else {
2049 if (!memory_access_is_direct(mr, is_write)) {
2050 /* I/O case */
2051 l = memory_access_size(mr, l, addr1);
2052 switch (l) {
2053 case 8:
2054 /* 64 bit read access */
2055 error |= io_mem_read(mr, addr1, &val, 8);
2056 stq_p(buf, val);
2057 break;
2058 case 4:
2059 /* 32 bit read access */
2060 error |= io_mem_read(mr, addr1, &val, 4);
2061 stl_p(buf, val);
2062 break;
2063 case 2:
2064 /* 16 bit read access */
2065 error |= io_mem_read(mr, addr1, &val, 2);
2066 stw_p(buf, val);
2067 break;
2068 case 1:
2069 /* 8 bit read access */
2070 error |= io_mem_read(mr, addr1, &val, 1);
2071 stb_p(buf, val);
2072 break;
2073 default:
2074 abort();
2075 }
2076 } else {
2077 /* RAM case */
2078 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2079 memcpy(buf, ptr, l);
2080 }
2081 }
2082 len -= l;
2083 buf += l;
2084 addr += l;
2085 }
2086
2087 return error;
2088 }
2089
2090 bool address_space_write(AddressSpace *as, hwaddr addr,
2091 const uint8_t *buf, int len)
2092 {
2093 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2094 }
2095
2096 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2097 {
2098 return address_space_rw(as, addr, buf, len, false);
2099 }
2100
2101
2102 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2103 int len, int is_write)
2104 {
2105 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2106 }
2107
2108 enum write_rom_type {
2109 WRITE_DATA,
2110 FLUSH_CACHE,
2111 };
2112
2113 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2114 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2115 {
2116 hwaddr l;
2117 uint8_t *ptr;
2118 hwaddr addr1;
2119 MemoryRegion *mr;
2120
2121 while (len > 0) {
2122 l = len;
2123 mr = address_space_translate(as, addr, &addr1, &l, true);
2124
2125 if (!(memory_region_is_ram(mr) ||
2126 memory_region_is_romd(mr))) {
2127 /* do nothing */
2128 } else {
2129 addr1 += memory_region_get_ram_addr(mr);
2130 /* ROM/RAM case */
2131 ptr = qemu_get_ram_ptr(addr1);
2132 switch (type) {
2133 case WRITE_DATA:
2134 memcpy(ptr, buf, l);
2135 invalidate_and_set_dirty(addr1, l);
2136 break;
2137 case FLUSH_CACHE:
2138 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2139 break;
2140 }
2141 }
2142 len -= l;
2143 buf += l;
2144 addr += l;
2145 }
2146 }
2147
2148 /* used for ROM loading : can write in RAM and ROM */
2149 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2150 const uint8_t *buf, int len)
2151 {
2152 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2153 }
2154
2155 void cpu_flush_icache_range(hwaddr start, int len)
2156 {
2157 /*
2158 * This function should do the same thing as an icache flush that was
2159 * triggered from within the guest. For TCG we are always cache coherent,
2160 * so there is no need to flush anything. For KVM / Xen we need to flush
2161 * the host's instruction cache at least.
2162 */
2163 if (tcg_enabled()) {
2164 return;
2165 }
2166
2167 cpu_physical_memory_write_rom_internal(&address_space_memory,
2168 start, NULL, len, FLUSH_CACHE);
2169 }
2170
2171 typedef struct {
2172 MemoryRegion *mr;
2173 void *buffer;
2174 hwaddr addr;
2175 hwaddr len;
2176 } BounceBuffer;
2177
2178 static BounceBuffer bounce;
2179
2180 typedef struct MapClient {
2181 void *opaque;
2182 void (*callback)(void *opaque);
2183 QLIST_ENTRY(MapClient) link;
2184 } MapClient;
2185
2186 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2187 = QLIST_HEAD_INITIALIZER(map_client_list);
2188
2189 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2190 {
2191 MapClient *client = g_malloc(sizeof(*client));
2192
2193 client->opaque = opaque;
2194 client->callback = callback;
2195 QLIST_INSERT_HEAD(&map_client_list, client, link);
2196 return client;
2197 }
2198
2199 static void cpu_unregister_map_client(void *_client)
2200 {
2201 MapClient *client = (MapClient *)_client;
2202
2203 QLIST_REMOVE(client, link);
2204 g_free(client);
2205 }
2206
2207 static void cpu_notify_map_clients(void)
2208 {
2209 MapClient *client;
2210
2211 while (!QLIST_EMPTY(&map_client_list)) {
2212 client = QLIST_FIRST(&map_client_list);
2213 client->callback(client->opaque);
2214 cpu_unregister_map_client(client);
2215 }
2216 }
2217
2218 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2219 {
2220 MemoryRegion *mr;
2221 hwaddr l, xlat;
2222
2223 while (len > 0) {
2224 l = len;
2225 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2226 if (!memory_access_is_direct(mr, is_write)) {
2227 l = memory_access_size(mr, l, addr);
2228 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2229 return false;
2230 }
2231 }
2232
2233 len -= l;
2234 addr += l;
2235 }
2236 return true;
2237 }
2238
2239 /* Map a physical memory region into a host virtual address.
2240 * May map a subset of the requested range, given by and returned in *plen.
2241 * May return NULL if resources needed to perform the mapping are exhausted.
2242 * Use only for reads OR writes - not for read-modify-write operations.
2243 * Use cpu_register_map_client() to know when retrying the map operation is
2244 * likely to succeed.
2245 */
2246 void *address_space_map(AddressSpace *as,
2247 hwaddr addr,
2248 hwaddr *plen,
2249 bool is_write)
2250 {
2251 hwaddr len = *plen;
2252 hwaddr done = 0;
2253 hwaddr l, xlat, base;
2254 MemoryRegion *mr, *this_mr;
2255 ram_addr_t raddr;
2256
2257 if (len == 0) {
2258 return NULL;
2259 }
2260
2261 l = len;
2262 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2263 if (!memory_access_is_direct(mr, is_write)) {
2264 if (bounce.buffer) {
2265 return NULL;
2266 }
2267 /* Avoid unbounded allocations */
2268 l = MIN(l, TARGET_PAGE_SIZE);
2269 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2270 bounce.addr = addr;
2271 bounce.len = l;
2272
2273 memory_region_ref(mr);
2274 bounce.mr = mr;
2275 if (!is_write) {
2276 address_space_read(as, addr, bounce.buffer, l);
2277 }
2278
2279 *plen = l;
2280 return bounce.buffer;
2281 }
2282
2283 base = xlat;
2284 raddr = memory_region_get_ram_addr(mr);
2285
2286 for (;;) {
2287 len -= l;
2288 addr += l;
2289 done += l;
2290 if (len == 0) {
2291 break;
2292 }
2293
2294 l = len;
2295 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2296 if (this_mr != mr || xlat != base + done) {
2297 break;
2298 }
2299 }
2300
2301 memory_region_ref(mr);
2302 *plen = done;
2303 return qemu_ram_ptr_length(raddr + base, plen);
2304 }
2305
2306 /* Unmaps a memory region previously mapped by address_space_map().
2307 * Will also mark the memory as dirty if is_write == 1. access_len gives
2308 * the amount of memory that was actually read or written by the caller.
2309 */
2310 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2311 int is_write, hwaddr access_len)
2312 {
2313 if (buffer != bounce.buffer) {
2314 MemoryRegion *mr;
2315 ram_addr_t addr1;
2316
2317 mr = qemu_ram_addr_from_host(buffer, &addr1);
2318 assert(mr != NULL);
2319 if (is_write) {
2320 while (access_len) {
2321 unsigned l;
2322 l = TARGET_PAGE_SIZE;
2323 if (l > access_len)
2324 l = access_len;
2325 invalidate_and_set_dirty(addr1, l);
2326 addr1 += l;
2327 access_len -= l;
2328 }
2329 }
2330 if (xen_enabled()) {
2331 xen_invalidate_map_cache_entry(buffer);
2332 }
2333 memory_region_unref(mr);
2334 return;
2335 }
2336 if (is_write) {
2337 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2338 }
2339 qemu_vfree(bounce.buffer);
2340 bounce.buffer = NULL;
2341 memory_region_unref(bounce.mr);
2342 cpu_notify_map_clients();
2343 }
2344
2345 void *cpu_physical_memory_map(hwaddr addr,
2346 hwaddr *plen,
2347 int is_write)
2348 {
2349 return address_space_map(&address_space_memory, addr, plen, is_write);
2350 }
2351
2352 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2353 int is_write, hwaddr access_len)
2354 {
2355 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2356 }
2357
2358 /* warning: addr must be aligned */
2359 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2360 enum device_endian endian)
2361 {
2362 uint8_t *ptr;
2363 uint64_t val;
2364 MemoryRegion *mr;
2365 hwaddr l = 4;
2366 hwaddr addr1;
2367
2368 mr = address_space_translate(as, addr, &addr1, &l, false);
2369 if (l < 4 || !memory_access_is_direct(mr, false)) {
2370 /* I/O case */
2371 io_mem_read(mr, addr1, &val, 4);
2372 #if defined(TARGET_WORDS_BIGENDIAN)
2373 if (endian == DEVICE_LITTLE_ENDIAN) {
2374 val = bswap32(val);
2375 }
2376 #else
2377 if (endian == DEVICE_BIG_ENDIAN) {
2378 val = bswap32(val);
2379 }
2380 #endif
2381 } else {
2382 /* RAM case */
2383 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2384 & TARGET_PAGE_MASK)
2385 + addr1);
2386 switch (endian) {
2387 case DEVICE_LITTLE_ENDIAN:
2388 val = ldl_le_p(ptr);
2389 break;
2390 case DEVICE_BIG_ENDIAN:
2391 val = ldl_be_p(ptr);
2392 break;
2393 default:
2394 val = ldl_p(ptr);
2395 break;
2396 }
2397 }
2398 return val;
2399 }
2400
2401 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2402 {
2403 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2404 }
2405
2406 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2407 {
2408 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2409 }
2410
2411 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2412 {
2413 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2414 }
2415
2416 /* warning: addr must be aligned */
2417 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2418 enum device_endian endian)
2419 {
2420 uint8_t *ptr;
2421 uint64_t val;
2422 MemoryRegion *mr;
2423 hwaddr l = 8;
2424 hwaddr addr1;
2425
2426 mr = address_space_translate(as, addr, &addr1, &l,
2427 false);
2428 if (l < 8 || !memory_access_is_direct(mr, false)) {
2429 /* I/O case */
2430 io_mem_read(mr, addr1, &val, 8);
2431 #if defined(TARGET_WORDS_BIGENDIAN)
2432 if (endian == DEVICE_LITTLE_ENDIAN) {
2433 val = bswap64(val);
2434 }
2435 #else
2436 if (endian == DEVICE_BIG_ENDIAN) {
2437 val = bswap64(val);
2438 }
2439 #endif
2440 } else {
2441 /* RAM case */
2442 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2443 & TARGET_PAGE_MASK)
2444 + addr1);
2445 switch (endian) {
2446 case DEVICE_LITTLE_ENDIAN:
2447 val = ldq_le_p(ptr);
2448 break;
2449 case DEVICE_BIG_ENDIAN:
2450 val = ldq_be_p(ptr);
2451 break;
2452 default:
2453 val = ldq_p(ptr);
2454 break;
2455 }
2456 }
2457 return val;
2458 }
2459
2460 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2461 {
2462 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2463 }
2464
2465 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2466 {
2467 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2468 }
2469
2470 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2471 {
2472 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2473 }
2474
2475 /* XXX: optimize */
2476 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2477 {
2478 uint8_t val;
2479 address_space_rw(as, addr, &val, 1, 0);
2480 return val;
2481 }
2482
2483 /* warning: addr must be aligned */
2484 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2485 enum device_endian endian)
2486 {
2487 uint8_t *ptr;
2488 uint64_t val;
2489 MemoryRegion *mr;
2490 hwaddr l = 2;
2491 hwaddr addr1;
2492
2493 mr = address_space_translate(as, addr, &addr1, &l,
2494 false);
2495 if (l < 2 || !memory_access_is_direct(mr, false)) {
2496 /* I/O case */
2497 io_mem_read(mr, addr1, &val, 2);
2498 #if defined(TARGET_WORDS_BIGENDIAN)
2499 if (endian == DEVICE_LITTLE_ENDIAN) {
2500 val = bswap16(val);
2501 }
2502 #else
2503 if (endian == DEVICE_BIG_ENDIAN) {
2504 val = bswap16(val);
2505 }
2506 #endif
2507 } else {
2508 /* RAM case */
2509 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2510 & TARGET_PAGE_MASK)
2511 + addr1);
2512 switch (endian) {
2513 case DEVICE_LITTLE_ENDIAN:
2514 val = lduw_le_p(ptr);
2515 break;
2516 case DEVICE_BIG_ENDIAN:
2517 val = lduw_be_p(ptr);
2518 break;
2519 default:
2520 val = lduw_p(ptr);
2521 break;
2522 }
2523 }
2524 return val;
2525 }
2526
2527 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2528 {
2529 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2530 }
2531
2532 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2533 {
2534 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2535 }
2536
2537 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2538 {
2539 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2540 }
2541
2542 /* warning: addr must be aligned. The ram page is not masked as dirty
2543 and the code inside is not invalidated. It is useful if the dirty
2544 bits are used to track modified PTEs */
2545 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2546 {
2547 uint8_t *ptr;
2548 MemoryRegion *mr;
2549 hwaddr l = 4;
2550 hwaddr addr1;
2551
2552 mr = address_space_translate(as, addr, &addr1, &l,
2553 true);
2554 if (l < 4 || !memory_access_is_direct(mr, true)) {
2555 io_mem_write(mr, addr1, val, 4);
2556 } else {
2557 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2558 ptr = qemu_get_ram_ptr(addr1);
2559 stl_p(ptr, val);
2560
2561 if (unlikely(in_migration)) {
2562 if (cpu_physical_memory_is_clean(addr1)) {
2563 /* invalidate code */
2564 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2565 /* set dirty bit */
2566 cpu_physical_memory_set_dirty_flag(addr1,
2567 DIRTY_MEMORY_MIGRATION);
2568 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2569 }
2570 }
2571 }
2572 }
2573
2574 /* warning: addr must be aligned */
2575 static inline void stl_phys_internal(AddressSpace *as,
2576 hwaddr addr, uint32_t val,
2577 enum device_endian endian)
2578 {
2579 uint8_t *ptr;
2580 MemoryRegion *mr;
2581 hwaddr l = 4;
2582 hwaddr addr1;
2583
2584 mr = address_space_translate(as, addr, &addr1, &l,
2585 true);
2586 if (l < 4 || !memory_access_is_direct(mr, true)) {
2587 #if defined(TARGET_WORDS_BIGENDIAN)
2588 if (endian == DEVICE_LITTLE_ENDIAN) {
2589 val = bswap32(val);
2590 }
2591 #else
2592 if (endian == DEVICE_BIG_ENDIAN) {
2593 val = bswap32(val);
2594 }
2595 #endif
2596 io_mem_write(mr, addr1, val, 4);
2597 } else {
2598 /* RAM case */
2599 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2600 ptr = qemu_get_ram_ptr(addr1);
2601 switch (endian) {
2602 case DEVICE_LITTLE_ENDIAN:
2603 stl_le_p(ptr, val);
2604 break;
2605 case DEVICE_BIG_ENDIAN:
2606 stl_be_p(ptr, val);
2607 break;
2608 default:
2609 stl_p(ptr, val);
2610 break;
2611 }
2612 invalidate_and_set_dirty(addr1, 4);
2613 }
2614 }
2615
2616 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2617 {
2618 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2619 }
2620
2621 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2622 {
2623 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2624 }
2625
2626 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2627 {
2628 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2629 }
2630
2631 /* XXX: optimize */
2632 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2633 {
2634 uint8_t v = val;
2635 address_space_rw(as, addr, &v, 1, 1);
2636 }
2637
2638 /* warning: addr must be aligned */
2639 static inline void stw_phys_internal(AddressSpace *as,
2640 hwaddr addr, uint32_t val,
2641 enum device_endian endian)
2642 {
2643 uint8_t *ptr;
2644 MemoryRegion *mr;
2645 hwaddr l = 2;
2646 hwaddr addr1;
2647
2648 mr = address_space_translate(as, addr, &addr1, &l, true);
2649 if (l < 2 || !memory_access_is_direct(mr, true)) {
2650 #if defined(TARGET_WORDS_BIGENDIAN)
2651 if (endian == DEVICE_LITTLE_ENDIAN) {
2652 val = bswap16(val);
2653 }
2654 #else
2655 if (endian == DEVICE_BIG_ENDIAN) {
2656 val = bswap16(val);
2657 }
2658 #endif
2659 io_mem_write(mr, addr1, val, 2);
2660 } else {
2661 /* RAM case */
2662 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2663 ptr = qemu_get_ram_ptr(addr1);
2664 switch (endian) {
2665 case DEVICE_LITTLE_ENDIAN:
2666 stw_le_p(ptr, val);
2667 break;
2668 case DEVICE_BIG_ENDIAN:
2669 stw_be_p(ptr, val);
2670 break;
2671 default:
2672 stw_p(ptr, val);
2673 break;
2674 }
2675 invalidate_and_set_dirty(addr1, 2);
2676 }
2677 }
2678
2679 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2680 {
2681 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2682 }
2683
2684 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2685 {
2686 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2687 }
2688
2689 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2690 {
2691 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2692 }
2693
2694 /* XXX: optimize */
2695 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2696 {
2697 val = tswap64(val);
2698 address_space_rw(as, addr, (void *) &val, 8, 1);
2699 }
2700
2701 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2702 {
2703 val = cpu_to_le64(val);
2704 address_space_rw(as, addr, (void *) &val, 8, 1);
2705 }
2706
2707 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2708 {
2709 val = cpu_to_be64(val);
2710 address_space_rw(as, addr, (void *) &val, 8, 1);
2711 }
2712
2713 /* virtual memory access for debug (includes writing to ROM) */
2714 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2715 uint8_t *buf, int len, int is_write)
2716 {
2717 int l;
2718 hwaddr phys_addr;
2719 target_ulong page;
2720
2721 while (len > 0) {
2722 page = addr & TARGET_PAGE_MASK;
2723 phys_addr = cpu_get_phys_page_debug(cpu, page);
2724 /* if no physical page mapped, return an error */
2725 if (phys_addr == -1)
2726 return -1;
2727 l = (page + TARGET_PAGE_SIZE) - addr;
2728 if (l > len)
2729 l = len;
2730 phys_addr += (addr & ~TARGET_PAGE_MASK);
2731 if (is_write) {
2732 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2733 } else {
2734 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2735 }
2736 len -= l;
2737 buf += l;
2738 addr += l;
2739 }
2740 return 0;
2741 }
2742 #endif
2743
2744 #if !defined(CONFIG_USER_ONLY)
2745
2746 /*
2747 * A helper function for the _utterly broken_ virtio device model to find out if
2748 * it's running on a big endian machine. Don't do this at home kids!
2749 */
2750 bool virtio_is_big_endian(void);
2751 bool virtio_is_big_endian(void)
2752 {
2753 #if defined(TARGET_WORDS_BIGENDIAN)
2754 return true;
2755 #else
2756 return false;
2757 #endif
2758 }
2759
2760 #endif
2761
2762 #ifndef CONFIG_USER_ONLY
2763 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2764 {
2765 MemoryRegion*mr;
2766 hwaddr l = 1;
2767
2768 mr = address_space_translate(&address_space_memory,
2769 phys_addr, &phys_addr, &l, false);
2770
2771 return !(memory_region_is_ram(mr) ||
2772 memory_region_is_romd(mr));
2773 }
2774
2775 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2776 {
2777 RAMBlock *block;
2778
2779 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2780 func(block->host, block->offset, block->length, opaque);
2781 }
2782 }
2783 #endif