]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
cpu: Turn cpu_handle_mmu_fault() into a CPUClass hook
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "exec/memory.h"
37 #include "sysemu/dma.h"
38 #include "exec/address-spaces.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #else /* !CONFIG_USER_ONLY */
42 #include "sysemu/xen-mapcache.h"
43 #include "trace.h"
44 #endif
45 #include "exec/cpu-all.h"
46
47 #include "exec/cputlb.h"
48 #include "translate-all.h"
49
50 #include "exec/memory-internal.h"
51 #include "exec/ram_addr.h"
52 #include "qemu/cache-utils.h"
53
54 #include "qemu/range.h"
55
56 //#define DEBUG_SUBPAGE
57
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
60
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
62
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
65
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
68
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
71
72 #endif
73
74 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
75 /* current CPU in the current thread. It is only valid inside
76 cpu_exec() */
77 DEFINE_TLS(CPUState *, current_cpu);
78 /* 0 = Do not count executed instructions.
79 1 = Precise instruction counting.
80 2 = Adaptive rate instruction counting. */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 typedef struct PhysPageEntry PhysPageEntry;
86
87 struct PhysPageEntry {
88 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
89 uint32_t skip : 6;
90 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
91 uint32_t ptr : 26;
92 };
93
94 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
95
96 /* Size of the L2 (and L3, etc) page tables. */
97 #define ADDR_SPACE_BITS 64
98
99 #define P_L2_BITS 9
100 #define P_L2_SIZE (1 << P_L2_BITS)
101
102 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
103
104 typedef PhysPageEntry Node[P_L2_SIZE];
105
106 typedef struct PhysPageMap {
107 unsigned sections_nb;
108 unsigned sections_nb_alloc;
109 unsigned nodes_nb;
110 unsigned nodes_nb_alloc;
111 Node *nodes;
112 MemoryRegionSection *sections;
113 } PhysPageMap;
114
115 struct AddressSpaceDispatch {
116 /* This is a multi-level map on the physical address space.
117 * The bottom level has pointers to MemoryRegionSections.
118 */
119 PhysPageEntry phys_map;
120 PhysPageMap map;
121 AddressSpace *as;
122 };
123
124 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
125 typedef struct subpage_t {
126 MemoryRegion iomem;
127 AddressSpace *as;
128 hwaddr base;
129 uint16_t sub_section[TARGET_PAGE_SIZE];
130 } subpage_t;
131
132 #define PHYS_SECTION_UNASSIGNED 0
133 #define PHYS_SECTION_NOTDIRTY 1
134 #define PHYS_SECTION_ROM 2
135 #define PHYS_SECTION_WATCH 3
136
137 static void io_mem_init(void);
138 static void memory_map_init(void);
139 static void tcg_commit(MemoryListener *listener);
140
141 static MemoryRegion io_mem_watch;
142 #endif
143
144 #if !defined(CONFIG_USER_ONLY)
145
146 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
147 {
148 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
149 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
151 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
152 }
153 }
154
155 static uint32_t phys_map_node_alloc(PhysPageMap *map)
156 {
157 unsigned i;
158 uint32_t ret;
159
160 ret = map->nodes_nb++;
161 assert(ret != PHYS_MAP_NODE_NIL);
162 assert(ret != map->nodes_nb_alloc);
163 for (i = 0; i < P_L2_SIZE; ++i) {
164 map->nodes[ret][i].skip = 1;
165 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
166 }
167 return ret;
168 }
169
170 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
171 hwaddr *index, hwaddr *nb, uint16_t leaf,
172 int level)
173 {
174 PhysPageEntry *p;
175 int i;
176 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
177
178 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
179 lp->ptr = phys_map_node_alloc(map);
180 p = map->nodes[lp->ptr];
181 if (level == 0) {
182 for (i = 0; i < P_L2_SIZE; i++) {
183 p[i].skip = 0;
184 p[i].ptr = PHYS_SECTION_UNASSIGNED;
185 }
186 }
187 } else {
188 p = map->nodes[lp->ptr];
189 }
190 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
191
192 while (*nb && lp < &p[P_L2_SIZE]) {
193 if ((*index & (step - 1)) == 0 && *nb >= step) {
194 lp->skip = 0;
195 lp->ptr = leaf;
196 *index += step;
197 *nb -= step;
198 } else {
199 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
200 }
201 ++lp;
202 }
203 }
204
205 static void phys_page_set(AddressSpaceDispatch *d,
206 hwaddr index, hwaddr nb,
207 uint16_t leaf)
208 {
209 /* Wildly overreserve - it doesn't matter much. */
210 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
211
212 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
213 }
214
215 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
216 * and update our entry so we can skip it and go directly to the destination.
217 */
218 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
219 {
220 unsigned valid_ptr = P_L2_SIZE;
221 int valid = 0;
222 PhysPageEntry *p;
223 int i;
224
225 if (lp->ptr == PHYS_MAP_NODE_NIL) {
226 return;
227 }
228
229 p = nodes[lp->ptr];
230 for (i = 0; i < P_L2_SIZE; i++) {
231 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
232 continue;
233 }
234
235 valid_ptr = i;
236 valid++;
237 if (p[i].skip) {
238 phys_page_compact(&p[i], nodes, compacted);
239 }
240 }
241
242 /* We can only compress if there's only one child. */
243 if (valid != 1) {
244 return;
245 }
246
247 assert(valid_ptr < P_L2_SIZE);
248
249 /* Don't compress if it won't fit in the # of bits we have. */
250 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
251 return;
252 }
253
254 lp->ptr = p[valid_ptr].ptr;
255 if (!p[valid_ptr].skip) {
256 /* If our only child is a leaf, make this a leaf. */
257 /* By design, we should have made this node a leaf to begin with so we
258 * should never reach here.
259 * But since it's so simple to handle this, let's do it just in case we
260 * change this rule.
261 */
262 lp->skip = 0;
263 } else {
264 lp->skip += p[valid_ptr].skip;
265 }
266 }
267
268 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
269 {
270 DECLARE_BITMAP(compacted, nodes_nb);
271
272 if (d->phys_map.skip) {
273 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
274 }
275 }
276
277 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
278 Node *nodes, MemoryRegionSection *sections)
279 {
280 PhysPageEntry *p;
281 hwaddr index = addr >> TARGET_PAGE_BITS;
282 int i;
283
284 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
285 if (lp.ptr == PHYS_MAP_NODE_NIL) {
286 return &sections[PHYS_SECTION_UNASSIGNED];
287 }
288 p = nodes[lp.ptr];
289 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
290 }
291
292 if (sections[lp.ptr].size.hi ||
293 range_covers_byte(sections[lp.ptr].offset_within_address_space,
294 sections[lp.ptr].size.lo, addr)) {
295 return &sections[lp.ptr];
296 } else {
297 return &sections[PHYS_SECTION_UNASSIGNED];
298 }
299 }
300
301 bool memory_region_is_unassigned(MemoryRegion *mr)
302 {
303 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
304 && mr != &io_mem_watch;
305 }
306
307 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
308 hwaddr addr,
309 bool resolve_subpage)
310 {
311 MemoryRegionSection *section;
312 subpage_t *subpage;
313
314 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
315 if (resolve_subpage && section->mr->subpage) {
316 subpage = container_of(section->mr, subpage_t, iomem);
317 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
318 }
319 return section;
320 }
321
322 static MemoryRegionSection *
323 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
324 hwaddr *plen, bool resolve_subpage)
325 {
326 MemoryRegionSection *section;
327 Int128 diff;
328
329 section = address_space_lookup_region(d, addr, resolve_subpage);
330 /* Compute offset within MemoryRegionSection */
331 addr -= section->offset_within_address_space;
332
333 /* Compute offset within MemoryRegion */
334 *xlat = addr + section->offset_within_region;
335
336 diff = int128_sub(section->mr->size, int128_make64(addr));
337 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
338 return section;
339 }
340
341 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
342 {
343 if (memory_region_is_ram(mr)) {
344 return !(is_write && mr->readonly);
345 }
346 if (memory_region_is_romd(mr)) {
347 return !is_write;
348 }
349
350 return false;
351 }
352
353 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
354 hwaddr *xlat, hwaddr *plen,
355 bool is_write)
356 {
357 IOMMUTLBEntry iotlb;
358 MemoryRegionSection *section;
359 MemoryRegion *mr;
360 hwaddr len = *plen;
361
362 for (;;) {
363 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
364 mr = section->mr;
365
366 if (!mr->iommu_ops) {
367 break;
368 }
369
370 iotlb = mr->iommu_ops->translate(mr, addr);
371 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
372 | (addr & iotlb.addr_mask));
373 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
374 if (!(iotlb.perm & (1 << is_write))) {
375 mr = &io_mem_unassigned;
376 break;
377 }
378
379 as = iotlb.target_as;
380 }
381
382 if (memory_access_is_direct(mr, is_write)) {
383 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
384 len = MIN(page, len);
385 }
386
387 *plen = len;
388 *xlat = addr;
389 return mr;
390 }
391
392 MemoryRegionSection *
393 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
394 hwaddr *plen)
395 {
396 MemoryRegionSection *section;
397 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
398
399 assert(!section->mr->iommu_ops);
400 return section;
401 }
402 #endif
403
404 void cpu_exec_init_all(void)
405 {
406 #if !defined(CONFIG_USER_ONLY)
407 qemu_mutex_init(&ram_list.mutex);
408 memory_map_init();
409 io_mem_init();
410 #endif
411 }
412
413 #if !defined(CONFIG_USER_ONLY)
414
415 static int cpu_common_post_load(void *opaque, int version_id)
416 {
417 CPUState *cpu = opaque;
418
419 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
420 version_id is increased. */
421 cpu->interrupt_request &= ~0x01;
422 tlb_flush(cpu->env_ptr, 1);
423
424 return 0;
425 }
426
427 const VMStateDescription vmstate_cpu_common = {
428 .name = "cpu_common",
429 .version_id = 1,
430 .minimum_version_id = 1,
431 .minimum_version_id_old = 1,
432 .post_load = cpu_common_post_load,
433 .fields = (VMStateField []) {
434 VMSTATE_UINT32(halted, CPUState),
435 VMSTATE_UINT32(interrupt_request, CPUState),
436 VMSTATE_END_OF_LIST()
437 }
438 };
439
440 #endif
441
442 CPUState *qemu_get_cpu(int index)
443 {
444 CPUState *cpu;
445
446 CPU_FOREACH(cpu) {
447 if (cpu->cpu_index == index) {
448 return cpu;
449 }
450 }
451
452 return NULL;
453 }
454
455 #if !defined(CONFIG_USER_ONLY)
456 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
457 {
458 /* We only support one address space per cpu at the moment. */
459 assert(cpu->as == as);
460
461 if (cpu->tcg_as_listener) {
462 memory_listener_unregister(cpu->tcg_as_listener);
463 } else {
464 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
465 }
466 cpu->tcg_as_listener->commit = tcg_commit;
467 memory_listener_register(cpu->tcg_as_listener, as);
468 }
469 #endif
470
471 void cpu_exec_init(CPUArchState *env)
472 {
473 CPUState *cpu = ENV_GET_CPU(env);
474 CPUClass *cc = CPU_GET_CLASS(cpu);
475 CPUState *some_cpu;
476 int cpu_index;
477
478 #if defined(CONFIG_USER_ONLY)
479 cpu_list_lock();
480 #endif
481 cpu_index = 0;
482 CPU_FOREACH(some_cpu) {
483 cpu_index++;
484 }
485 cpu->cpu_index = cpu_index;
486 cpu->numa_node = 0;
487 QTAILQ_INIT(&env->breakpoints);
488 QTAILQ_INIT(&env->watchpoints);
489 #ifndef CONFIG_USER_ONLY
490 cpu->as = &address_space_memory;
491 cpu->thread_id = qemu_get_thread_id();
492 #endif
493 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
494 #if defined(CONFIG_USER_ONLY)
495 cpu_list_unlock();
496 #endif
497 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
498 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
499 }
500 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
501 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
502 cpu_save, cpu_load, env);
503 assert(cc->vmsd == NULL);
504 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
505 #endif
506 if (cc->vmsd != NULL) {
507 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
508 }
509 }
510
511 #if defined(TARGET_HAS_ICE)
512 #if defined(CONFIG_USER_ONLY)
513 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
514 {
515 tb_invalidate_phys_page_range(pc, pc + 1, 0);
516 }
517 #else
518 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
519 {
520 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
521 if (phys != -1) {
522 tb_invalidate_phys_addr(cpu->as,
523 phys | (pc & ~TARGET_PAGE_MASK));
524 }
525 }
526 #endif
527 #endif /* TARGET_HAS_ICE */
528
529 #if defined(CONFIG_USER_ONLY)
530 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
531
532 {
533 }
534
535 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
536 int flags, CPUWatchpoint **watchpoint)
537 {
538 return -ENOSYS;
539 }
540 #else
541 /* Add a watchpoint. */
542 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
543 int flags, CPUWatchpoint **watchpoint)
544 {
545 target_ulong len_mask = ~(len - 1);
546 CPUWatchpoint *wp;
547
548 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
549 if ((len & (len - 1)) || (addr & ~len_mask) ||
550 len == 0 || len > TARGET_PAGE_SIZE) {
551 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
552 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
553 return -EINVAL;
554 }
555 wp = g_malloc(sizeof(*wp));
556
557 wp->vaddr = addr;
558 wp->len_mask = len_mask;
559 wp->flags = flags;
560
561 /* keep all GDB-injected watchpoints in front */
562 if (flags & BP_GDB)
563 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
564 else
565 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
566
567 tlb_flush_page(env, addr);
568
569 if (watchpoint)
570 *watchpoint = wp;
571 return 0;
572 }
573
574 /* Remove a specific watchpoint. */
575 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
576 int flags)
577 {
578 target_ulong len_mask = ~(len - 1);
579 CPUWatchpoint *wp;
580
581 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
582 if (addr == wp->vaddr && len_mask == wp->len_mask
583 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
584 cpu_watchpoint_remove_by_ref(env, wp);
585 return 0;
586 }
587 }
588 return -ENOENT;
589 }
590
591 /* Remove a specific watchpoint by reference. */
592 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
593 {
594 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
595
596 tlb_flush_page(env, watchpoint->vaddr);
597
598 g_free(watchpoint);
599 }
600
601 /* Remove all matching watchpoints. */
602 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
603 {
604 CPUWatchpoint *wp, *next;
605
606 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
607 if (wp->flags & mask)
608 cpu_watchpoint_remove_by_ref(env, wp);
609 }
610 }
611 #endif
612
613 /* Add a breakpoint. */
614 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
615 CPUBreakpoint **breakpoint)
616 {
617 #if defined(TARGET_HAS_ICE)
618 CPUBreakpoint *bp;
619
620 bp = g_malloc(sizeof(*bp));
621
622 bp->pc = pc;
623 bp->flags = flags;
624
625 /* keep all GDB-injected breakpoints in front */
626 if (flags & BP_GDB) {
627 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
628 } else {
629 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
630 }
631
632 breakpoint_invalidate(ENV_GET_CPU(env), pc);
633
634 if (breakpoint) {
635 *breakpoint = bp;
636 }
637 return 0;
638 #else
639 return -ENOSYS;
640 #endif
641 }
642
643 /* Remove a specific breakpoint. */
644 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
645 {
646 #if defined(TARGET_HAS_ICE)
647 CPUBreakpoint *bp;
648
649 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
650 if (bp->pc == pc && bp->flags == flags) {
651 cpu_breakpoint_remove_by_ref(env, bp);
652 return 0;
653 }
654 }
655 return -ENOENT;
656 #else
657 return -ENOSYS;
658 #endif
659 }
660
661 /* Remove a specific breakpoint by reference. */
662 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
663 {
664 #if defined(TARGET_HAS_ICE)
665 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
666
667 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
668
669 g_free(breakpoint);
670 #endif
671 }
672
673 /* Remove all matching breakpoints. */
674 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
675 {
676 #if defined(TARGET_HAS_ICE)
677 CPUBreakpoint *bp, *next;
678
679 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
680 if (bp->flags & mask)
681 cpu_breakpoint_remove_by_ref(env, bp);
682 }
683 #endif
684 }
685
686 /* enable or disable single step mode. EXCP_DEBUG is returned by the
687 CPU loop after each instruction */
688 void cpu_single_step(CPUState *cpu, int enabled)
689 {
690 #if defined(TARGET_HAS_ICE)
691 if (cpu->singlestep_enabled != enabled) {
692 cpu->singlestep_enabled = enabled;
693 if (kvm_enabled()) {
694 kvm_update_guest_debug(cpu, 0);
695 } else {
696 /* must flush all the translated code to avoid inconsistencies */
697 /* XXX: only flush what is necessary */
698 CPUArchState *env = cpu->env_ptr;
699 tb_flush(env);
700 }
701 }
702 #endif
703 }
704
705 void cpu_abort(CPUArchState *env, const char *fmt, ...)
706 {
707 CPUState *cpu = ENV_GET_CPU(env);
708 va_list ap;
709 va_list ap2;
710
711 va_start(ap, fmt);
712 va_copy(ap2, ap);
713 fprintf(stderr, "qemu: fatal: ");
714 vfprintf(stderr, fmt, ap);
715 fprintf(stderr, "\n");
716 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
717 if (qemu_log_enabled()) {
718 qemu_log("qemu: fatal: ");
719 qemu_log_vprintf(fmt, ap2);
720 qemu_log("\n");
721 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
722 qemu_log_flush();
723 qemu_log_close();
724 }
725 va_end(ap2);
726 va_end(ap);
727 #if defined(CONFIG_USER_ONLY)
728 {
729 struct sigaction act;
730 sigfillset(&act.sa_mask);
731 act.sa_handler = SIG_DFL;
732 sigaction(SIGABRT, &act, NULL);
733 }
734 #endif
735 abort();
736 }
737
738 #if !defined(CONFIG_USER_ONLY)
739 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
740 {
741 RAMBlock *block;
742
743 /* The list is protected by the iothread lock here. */
744 block = ram_list.mru_block;
745 if (block && addr - block->offset < block->length) {
746 goto found;
747 }
748 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
749 if (addr - block->offset < block->length) {
750 goto found;
751 }
752 }
753
754 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
755 abort();
756
757 found:
758 ram_list.mru_block = block;
759 return block;
760 }
761
762 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
763 {
764 ram_addr_t start1;
765 RAMBlock *block;
766 ram_addr_t end;
767
768 end = TARGET_PAGE_ALIGN(start + length);
769 start &= TARGET_PAGE_MASK;
770
771 block = qemu_get_ram_block(start);
772 assert(block == qemu_get_ram_block(end - 1));
773 start1 = (uintptr_t)block->host + (start - block->offset);
774 cpu_tlb_reset_dirty_all(start1, length);
775 }
776
777 /* Note: start and end must be within the same ram block. */
778 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
779 unsigned client)
780 {
781 if (length == 0)
782 return;
783 cpu_physical_memory_clear_dirty_range(start, length, client);
784
785 if (tcg_enabled()) {
786 tlb_reset_dirty_range_all(start, length);
787 }
788 }
789
790 static void cpu_physical_memory_set_dirty_tracking(bool enable)
791 {
792 in_migration = enable;
793 }
794
795 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
796 MemoryRegionSection *section,
797 target_ulong vaddr,
798 hwaddr paddr, hwaddr xlat,
799 int prot,
800 target_ulong *address)
801 {
802 hwaddr iotlb;
803 CPUWatchpoint *wp;
804
805 if (memory_region_is_ram(section->mr)) {
806 /* Normal RAM. */
807 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
808 + xlat;
809 if (!section->readonly) {
810 iotlb |= PHYS_SECTION_NOTDIRTY;
811 } else {
812 iotlb |= PHYS_SECTION_ROM;
813 }
814 } else {
815 iotlb = section - section->address_space->dispatch->map.sections;
816 iotlb += xlat;
817 }
818
819 /* Make accesses to pages with watchpoints go via the
820 watchpoint trap routines. */
821 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
822 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
823 /* Avoid trapping reads of pages with a write breakpoint. */
824 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
825 iotlb = PHYS_SECTION_WATCH + paddr;
826 *address |= TLB_MMIO;
827 break;
828 }
829 }
830 }
831
832 return iotlb;
833 }
834 #endif /* defined(CONFIG_USER_ONLY) */
835
836 #if !defined(CONFIG_USER_ONLY)
837
838 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
839 uint16_t section);
840 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
841
842 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
843
844 /*
845 * Set a custom physical guest memory alloator.
846 * Accelerators with unusual needs may need this. Hopefully, we can
847 * get rid of it eventually.
848 */
849 void phys_mem_set_alloc(void *(*alloc)(size_t))
850 {
851 phys_mem_alloc = alloc;
852 }
853
854 static uint16_t phys_section_add(PhysPageMap *map,
855 MemoryRegionSection *section)
856 {
857 /* The physical section number is ORed with a page-aligned
858 * pointer to produce the iotlb entries. Thus it should
859 * never overflow into the page-aligned value.
860 */
861 assert(map->sections_nb < TARGET_PAGE_SIZE);
862
863 if (map->sections_nb == map->sections_nb_alloc) {
864 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
865 map->sections = g_renew(MemoryRegionSection, map->sections,
866 map->sections_nb_alloc);
867 }
868 map->sections[map->sections_nb] = *section;
869 memory_region_ref(section->mr);
870 return map->sections_nb++;
871 }
872
873 static void phys_section_destroy(MemoryRegion *mr)
874 {
875 memory_region_unref(mr);
876
877 if (mr->subpage) {
878 subpage_t *subpage = container_of(mr, subpage_t, iomem);
879 memory_region_destroy(&subpage->iomem);
880 g_free(subpage);
881 }
882 }
883
884 static void phys_sections_free(PhysPageMap *map)
885 {
886 while (map->sections_nb > 0) {
887 MemoryRegionSection *section = &map->sections[--map->sections_nb];
888 phys_section_destroy(section->mr);
889 }
890 g_free(map->sections);
891 g_free(map->nodes);
892 }
893
894 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
895 {
896 subpage_t *subpage;
897 hwaddr base = section->offset_within_address_space
898 & TARGET_PAGE_MASK;
899 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
900 d->map.nodes, d->map.sections);
901 MemoryRegionSection subsection = {
902 .offset_within_address_space = base,
903 .size = int128_make64(TARGET_PAGE_SIZE),
904 };
905 hwaddr start, end;
906
907 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
908
909 if (!(existing->mr->subpage)) {
910 subpage = subpage_init(d->as, base);
911 subsection.address_space = d->as;
912 subsection.mr = &subpage->iomem;
913 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
914 phys_section_add(&d->map, &subsection));
915 } else {
916 subpage = container_of(existing->mr, subpage_t, iomem);
917 }
918 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
919 end = start + int128_get64(section->size) - 1;
920 subpage_register(subpage, start, end,
921 phys_section_add(&d->map, section));
922 }
923
924
925 static void register_multipage(AddressSpaceDispatch *d,
926 MemoryRegionSection *section)
927 {
928 hwaddr start_addr = section->offset_within_address_space;
929 uint16_t section_index = phys_section_add(&d->map, section);
930 uint64_t num_pages = int128_get64(int128_rshift(section->size,
931 TARGET_PAGE_BITS));
932
933 assert(num_pages);
934 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
935 }
936
937 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
938 {
939 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
940 AddressSpaceDispatch *d = as->next_dispatch;
941 MemoryRegionSection now = *section, remain = *section;
942 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
943
944 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
945 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
946 - now.offset_within_address_space;
947
948 now.size = int128_min(int128_make64(left), now.size);
949 register_subpage(d, &now);
950 } else {
951 now.size = int128_zero();
952 }
953 while (int128_ne(remain.size, now.size)) {
954 remain.size = int128_sub(remain.size, now.size);
955 remain.offset_within_address_space += int128_get64(now.size);
956 remain.offset_within_region += int128_get64(now.size);
957 now = remain;
958 if (int128_lt(remain.size, page_size)) {
959 register_subpage(d, &now);
960 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
961 now.size = page_size;
962 register_subpage(d, &now);
963 } else {
964 now.size = int128_and(now.size, int128_neg(page_size));
965 register_multipage(d, &now);
966 }
967 }
968 }
969
970 void qemu_flush_coalesced_mmio_buffer(void)
971 {
972 if (kvm_enabled())
973 kvm_flush_coalesced_mmio_buffer();
974 }
975
976 void qemu_mutex_lock_ramlist(void)
977 {
978 qemu_mutex_lock(&ram_list.mutex);
979 }
980
981 void qemu_mutex_unlock_ramlist(void)
982 {
983 qemu_mutex_unlock(&ram_list.mutex);
984 }
985
986 #ifdef __linux__
987
988 #include <sys/vfs.h>
989
990 #define HUGETLBFS_MAGIC 0x958458f6
991
992 static long gethugepagesize(const char *path)
993 {
994 struct statfs fs;
995 int ret;
996
997 do {
998 ret = statfs(path, &fs);
999 } while (ret != 0 && errno == EINTR);
1000
1001 if (ret != 0) {
1002 perror(path);
1003 return 0;
1004 }
1005
1006 if (fs.f_type != HUGETLBFS_MAGIC)
1007 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1008
1009 return fs.f_bsize;
1010 }
1011
1012 static sigjmp_buf sigjump;
1013
1014 static void sigbus_handler(int signal)
1015 {
1016 siglongjmp(sigjump, 1);
1017 }
1018
1019 static void *file_ram_alloc(RAMBlock *block,
1020 ram_addr_t memory,
1021 const char *path)
1022 {
1023 char *filename;
1024 char *sanitized_name;
1025 char *c;
1026 void *area;
1027 int fd;
1028 unsigned long hpagesize;
1029
1030 hpagesize = gethugepagesize(path);
1031 if (!hpagesize) {
1032 goto error;
1033 }
1034
1035 if (memory < hpagesize) {
1036 return NULL;
1037 }
1038
1039 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1040 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1041 goto error;
1042 }
1043
1044 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1045 sanitized_name = g_strdup(block->mr->name);
1046 for (c = sanitized_name; *c != '\0'; c++) {
1047 if (*c == '/')
1048 *c = '_';
1049 }
1050
1051 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1052 sanitized_name);
1053 g_free(sanitized_name);
1054
1055 fd = mkstemp(filename);
1056 if (fd < 0) {
1057 perror("unable to create backing store for hugepages");
1058 g_free(filename);
1059 goto error;
1060 }
1061 unlink(filename);
1062 g_free(filename);
1063
1064 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1065
1066 /*
1067 * ftruncate is not supported by hugetlbfs in older
1068 * hosts, so don't bother bailing out on errors.
1069 * If anything goes wrong with it under other filesystems,
1070 * mmap will fail.
1071 */
1072 if (ftruncate(fd, memory))
1073 perror("ftruncate");
1074
1075 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1076 if (area == MAP_FAILED) {
1077 perror("file_ram_alloc: can't mmap RAM pages");
1078 close(fd);
1079 goto error;
1080 }
1081
1082 if (mem_prealloc) {
1083 int ret, i;
1084 struct sigaction act, oldact;
1085 sigset_t set, oldset;
1086
1087 memset(&act, 0, sizeof(act));
1088 act.sa_handler = &sigbus_handler;
1089 act.sa_flags = 0;
1090
1091 ret = sigaction(SIGBUS, &act, &oldact);
1092 if (ret) {
1093 perror("file_ram_alloc: failed to install signal handler");
1094 exit(1);
1095 }
1096
1097 /* unblock SIGBUS */
1098 sigemptyset(&set);
1099 sigaddset(&set, SIGBUS);
1100 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1101
1102 if (sigsetjmp(sigjump, 1)) {
1103 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1104 exit(1);
1105 }
1106
1107 /* MAP_POPULATE silently ignores failures */
1108 for (i = 0; i < (memory/hpagesize); i++) {
1109 memset(area + (hpagesize*i), 0, 1);
1110 }
1111
1112 ret = sigaction(SIGBUS, &oldact, NULL);
1113 if (ret) {
1114 perror("file_ram_alloc: failed to reinstall signal handler");
1115 exit(1);
1116 }
1117
1118 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1119 }
1120
1121 block->fd = fd;
1122 return area;
1123
1124 error:
1125 if (mem_prealloc) {
1126 exit(1);
1127 }
1128 return NULL;
1129 }
1130 #else
1131 static void *file_ram_alloc(RAMBlock *block,
1132 ram_addr_t memory,
1133 const char *path)
1134 {
1135 fprintf(stderr, "-mem-path not supported on this host\n");
1136 exit(1);
1137 }
1138 #endif
1139
1140 static ram_addr_t find_ram_offset(ram_addr_t size)
1141 {
1142 RAMBlock *block, *next_block;
1143 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1144
1145 assert(size != 0); /* it would hand out same offset multiple times */
1146
1147 if (QTAILQ_EMPTY(&ram_list.blocks))
1148 return 0;
1149
1150 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1151 ram_addr_t end, next = RAM_ADDR_MAX;
1152
1153 end = block->offset + block->length;
1154
1155 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1156 if (next_block->offset >= end) {
1157 next = MIN(next, next_block->offset);
1158 }
1159 }
1160 if (next - end >= size && next - end < mingap) {
1161 offset = end;
1162 mingap = next - end;
1163 }
1164 }
1165
1166 if (offset == RAM_ADDR_MAX) {
1167 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1168 (uint64_t)size);
1169 abort();
1170 }
1171
1172 return offset;
1173 }
1174
1175 ram_addr_t last_ram_offset(void)
1176 {
1177 RAMBlock *block;
1178 ram_addr_t last = 0;
1179
1180 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1181 last = MAX(last, block->offset + block->length);
1182
1183 return last;
1184 }
1185
1186 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1187 {
1188 int ret;
1189
1190 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1191 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1192 "dump-guest-core", true)) {
1193 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1194 if (ret) {
1195 perror("qemu_madvise");
1196 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1197 "but dump_guest_core=off specified\n");
1198 }
1199 }
1200 }
1201
1202 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1203 {
1204 RAMBlock *new_block, *block;
1205
1206 new_block = NULL;
1207 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1208 if (block->offset == addr) {
1209 new_block = block;
1210 break;
1211 }
1212 }
1213 assert(new_block);
1214 assert(!new_block->idstr[0]);
1215
1216 if (dev) {
1217 char *id = qdev_get_dev_path(dev);
1218 if (id) {
1219 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1220 g_free(id);
1221 }
1222 }
1223 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1224
1225 /* This assumes the iothread lock is taken here too. */
1226 qemu_mutex_lock_ramlist();
1227 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1228 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1229 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1230 new_block->idstr);
1231 abort();
1232 }
1233 }
1234 qemu_mutex_unlock_ramlist();
1235 }
1236
1237 static int memory_try_enable_merging(void *addr, size_t len)
1238 {
1239 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1240 /* disabled by the user */
1241 return 0;
1242 }
1243
1244 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1245 }
1246
1247 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1248 MemoryRegion *mr)
1249 {
1250 RAMBlock *block, *new_block;
1251 ram_addr_t old_ram_size, new_ram_size;
1252
1253 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1254
1255 size = TARGET_PAGE_ALIGN(size);
1256 new_block = g_malloc0(sizeof(*new_block));
1257 new_block->fd = -1;
1258
1259 /* This assumes the iothread lock is taken here too. */
1260 qemu_mutex_lock_ramlist();
1261 new_block->mr = mr;
1262 new_block->offset = find_ram_offset(size);
1263 if (host) {
1264 new_block->host = host;
1265 new_block->flags |= RAM_PREALLOC_MASK;
1266 } else if (xen_enabled()) {
1267 if (mem_path) {
1268 fprintf(stderr, "-mem-path not supported with Xen\n");
1269 exit(1);
1270 }
1271 xen_ram_alloc(new_block->offset, size, mr);
1272 } else {
1273 if (mem_path) {
1274 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1275 /*
1276 * file_ram_alloc() needs to allocate just like
1277 * phys_mem_alloc, but we haven't bothered to provide
1278 * a hook there.
1279 */
1280 fprintf(stderr,
1281 "-mem-path not supported with this accelerator\n");
1282 exit(1);
1283 }
1284 new_block->host = file_ram_alloc(new_block, size, mem_path);
1285 }
1286 if (!new_block->host) {
1287 new_block->host = phys_mem_alloc(size);
1288 if (!new_block->host) {
1289 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1290 new_block->mr->name, strerror(errno));
1291 exit(1);
1292 }
1293 memory_try_enable_merging(new_block->host, size);
1294 }
1295 }
1296 new_block->length = size;
1297
1298 /* Keep the list sorted from biggest to smallest block. */
1299 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1300 if (block->length < new_block->length) {
1301 break;
1302 }
1303 }
1304 if (block) {
1305 QTAILQ_INSERT_BEFORE(block, new_block, next);
1306 } else {
1307 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1308 }
1309 ram_list.mru_block = NULL;
1310
1311 ram_list.version++;
1312 qemu_mutex_unlock_ramlist();
1313
1314 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1315
1316 if (new_ram_size > old_ram_size) {
1317 int i;
1318 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1319 ram_list.dirty_memory[i] =
1320 bitmap_zero_extend(ram_list.dirty_memory[i],
1321 old_ram_size, new_ram_size);
1322 }
1323 }
1324 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1325
1326 qemu_ram_setup_dump(new_block->host, size);
1327 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1328 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1329
1330 if (kvm_enabled())
1331 kvm_setup_guest_memory(new_block->host, size);
1332
1333 return new_block->offset;
1334 }
1335
1336 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1337 {
1338 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1339 }
1340
1341 void qemu_ram_free_from_ptr(ram_addr_t addr)
1342 {
1343 RAMBlock *block;
1344
1345 /* This assumes the iothread lock is taken here too. */
1346 qemu_mutex_lock_ramlist();
1347 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1348 if (addr == block->offset) {
1349 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1350 ram_list.mru_block = NULL;
1351 ram_list.version++;
1352 g_free(block);
1353 break;
1354 }
1355 }
1356 qemu_mutex_unlock_ramlist();
1357 }
1358
1359 void qemu_ram_free(ram_addr_t addr)
1360 {
1361 RAMBlock *block;
1362
1363 /* This assumes the iothread lock is taken here too. */
1364 qemu_mutex_lock_ramlist();
1365 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1366 if (addr == block->offset) {
1367 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1368 ram_list.mru_block = NULL;
1369 ram_list.version++;
1370 if (block->flags & RAM_PREALLOC_MASK) {
1371 ;
1372 } else if (xen_enabled()) {
1373 xen_invalidate_map_cache_entry(block->host);
1374 #ifndef _WIN32
1375 } else if (block->fd >= 0) {
1376 munmap(block->host, block->length);
1377 close(block->fd);
1378 #endif
1379 } else {
1380 qemu_anon_ram_free(block->host, block->length);
1381 }
1382 g_free(block);
1383 break;
1384 }
1385 }
1386 qemu_mutex_unlock_ramlist();
1387
1388 }
1389
1390 #ifndef _WIN32
1391 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1392 {
1393 RAMBlock *block;
1394 ram_addr_t offset;
1395 int flags;
1396 void *area, *vaddr;
1397
1398 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1399 offset = addr - block->offset;
1400 if (offset < block->length) {
1401 vaddr = block->host + offset;
1402 if (block->flags & RAM_PREALLOC_MASK) {
1403 ;
1404 } else if (xen_enabled()) {
1405 abort();
1406 } else {
1407 flags = MAP_FIXED;
1408 munmap(vaddr, length);
1409 if (block->fd >= 0) {
1410 #ifdef MAP_POPULATE
1411 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1412 MAP_PRIVATE;
1413 #else
1414 flags |= MAP_PRIVATE;
1415 #endif
1416 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1417 flags, block->fd, offset);
1418 } else {
1419 /*
1420 * Remap needs to match alloc. Accelerators that
1421 * set phys_mem_alloc never remap. If they did,
1422 * we'd need a remap hook here.
1423 */
1424 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1425
1426 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1427 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1428 flags, -1, 0);
1429 }
1430 if (area != vaddr) {
1431 fprintf(stderr, "Could not remap addr: "
1432 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1433 length, addr);
1434 exit(1);
1435 }
1436 memory_try_enable_merging(vaddr, length);
1437 qemu_ram_setup_dump(vaddr, length);
1438 }
1439 return;
1440 }
1441 }
1442 }
1443 #endif /* !_WIN32 */
1444
1445 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1446 With the exception of the softmmu code in this file, this should
1447 only be used for local memory (e.g. video ram) that the device owns,
1448 and knows it isn't going to access beyond the end of the block.
1449
1450 It should not be used for general purpose DMA.
1451 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1452 */
1453 void *qemu_get_ram_ptr(ram_addr_t addr)
1454 {
1455 RAMBlock *block = qemu_get_ram_block(addr);
1456
1457 if (xen_enabled()) {
1458 /* We need to check if the requested address is in the RAM
1459 * because we don't want to map the entire memory in QEMU.
1460 * In that case just map until the end of the page.
1461 */
1462 if (block->offset == 0) {
1463 return xen_map_cache(addr, 0, 0);
1464 } else if (block->host == NULL) {
1465 block->host =
1466 xen_map_cache(block->offset, block->length, 1);
1467 }
1468 }
1469 return block->host + (addr - block->offset);
1470 }
1471
1472 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1473 * but takes a size argument */
1474 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1475 {
1476 if (*size == 0) {
1477 return NULL;
1478 }
1479 if (xen_enabled()) {
1480 return xen_map_cache(addr, *size, 1);
1481 } else {
1482 RAMBlock *block;
1483
1484 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1485 if (addr - block->offset < block->length) {
1486 if (addr - block->offset + *size > block->length)
1487 *size = block->length - addr + block->offset;
1488 return block->host + (addr - block->offset);
1489 }
1490 }
1491
1492 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1493 abort();
1494 }
1495 }
1496
1497 /* Some of the softmmu routines need to translate from a host pointer
1498 (typically a TLB entry) back to a ram offset. */
1499 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1500 {
1501 RAMBlock *block;
1502 uint8_t *host = ptr;
1503
1504 if (xen_enabled()) {
1505 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1506 return qemu_get_ram_block(*ram_addr)->mr;
1507 }
1508
1509 block = ram_list.mru_block;
1510 if (block && block->host && host - block->host < block->length) {
1511 goto found;
1512 }
1513
1514 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1515 /* This case append when the block is not mapped. */
1516 if (block->host == NULL) {
1517 continue;
1518 }
1519 if (host - block->host < block->length) {
1520 goto found;
1521 }
1522 }
1523
1524 return NULL;
1525
1526 found:
1527 *ram_addr = block->offset + (host - block->host);
1528 return block->mr;
1529 }
1530
1531 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1532 uint64_t val, unsigned size)
1533 {
1534 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1535 tb_invalidate_phys_page_fast(ram_addr, size);
1536 }
1537 switch (size) {
1538 case 1:
1539 stb_p(qemu_get_ram_ptr(ram_addr), val);
1540 break;
1541 case 2:
1542 stw_p(qemu_get_ram_ptr(ram_addr), val);
1543 break;
1544 case 4:
1545 stl_p(qemu_get_ram_ptr(ram_addr), val);
1546 break;
1547 default:
1548 abort();
1549 }
1550 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1551 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1552 /* we remove the notdirty callback only if the code has been
1553 flushed */
1554 if (!cpu_physical_memory_is_clean(ram_addr)) {
1555 CPUArchState *env = current_cpu->env_ptr;
1556 tlb_set_dirty(env, env->mem_io_vaddr);
1557 }
1558 }
1559
1560 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1561 unsigned size, bool is_write)
1562 {
1563 return is_write;
1564 }
1565
1566 static const MemoryRegionOps notdirty_mem_ops = {
1567 .write = notdirty_mem_write,
1568 .valid.accepts = notdirty_mem_accepts,
1569 .endianness = DEVICE_NATIVE_ENDIAN,
1570 };
1571
1572 /* Generate a debug exception if a watchpoint has been hit. */
1573 static void check_watchpoint(int offset, int len_mask, int flags)
1574 {
1575 CPUArchState *env = current_cpu->env_ptr;
1576 target_ulong pc, cs_base;
1577 target_ulong vaddr;
1578 CPUWatchpoint *wp;
1579 int cpu_flags;
1580
1581 if (env->watchpoint_hit) {
1582 /* We re-entered the check after replacing the TB. Now raise
1583 * the debug interrupt so that is will trigger after the
1584 * current instruction. */
1585 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1586 return;
1587 }
1588 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1589 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1590 if ((vaddr == (wp->vaddr & len_mask) ||
1591 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1592 wp->flags |= BP_WATCHPOINT_HIT;
1593 if (!env->watchpoint_hit) {
1594 env->watchpoint_hit = wp;
1595 tb_check_watchpoint(env);
1596 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1597 env->exception_index = EXCP_DEBUG;
1598 cpu_loop_exit(env);
1599 } else {
1600 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1601 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1602 cpu_resume_from_signal(env, NULL);
1603 }
1604 }
1605 } else {
1606 wp->flags &= ~BP_WATCHPOINT_HIT;
1607 }
1608 }
1609 }
1610
1611 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1612 so these check for a hit then pass through to the normal out-of-line
1613 phys routines. */
1614 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1615 unsigned size)
1616 {
1617 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1618 switch (size) {
1619 case 1: return ldub_phys(&address_space_memory, addr);
1620 case 2: return lduw_phys(&address_space_memory, addr);
1621 case 4: return ldl_phys(&address_space_memory, addr);
1622 default: abort();
1623 }
1624 }
1625
1626 static void watch_mem_write(void *opaque, hwaddr addr,
1627 uint64_t val, unsigned size)
1628 {
1629 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1630 switch (size) {
1631 case 1:
1632 stb_phys(&address_space_memory, addr, val);
1633 break;
1634 case 2:
1635 stw_phys(&address_space_memory, addr, val);
1636 break;
1637 case 4:
1638 stl_phys(&address_space_memory, addr, val);
1639 break;
1640 default: abort();
1641 }
1642 }
1643
1644 static const MemoryRegionOps watch_mem_ops = {
1645 .read = watch_mem_read,
1646 .write = watch_mem_write,
1647 .endianness = DEVICE_NATIVE_ENDIAN,
1648 };
1649
1650 static uint64_t subpage_read(void *opaque, hwaddr addr,
1651 unsigned len)
1652 {
1653 subpage_t *subpage = opaque;
1654 uint8_t buf[4];
1655
1656 #if defined(DEBUG_SUBPAGE)
1657 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1658 subpage, len, addr);
1659 #endif
1660 address_space_read(subpage->as, addr + subpage->base, buf, len);
1661 switch (len) {
1662 case 1:
1663 return ldub_p(buf);
1664 case 2:
1665 return lduw_p(buf);
1666 case 4:
1667 return ldl_p(buf);
1668 default:
1669 abort();
1670 }
1671 }
1672
1673 static void subpage_write(void *opaque, hwaddr addr,
1674 uint64_t value, unsigned len)
1675 {
1676 subpage_t *subpage = opaque;
1677 uint8_t buf[4];
1678
1679 #if defined(DEBUG_SUBPAGE)
1680 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1681 " value %"PRIx64"\n",
1682 __func__, subpage, len, addr, value);
1683 #endif
1684 switch (len) {
1685 case 1:
1686 stb_p(buf, value);
1687 break;
1688 case 2:
1689 stw_p(buf, value);
1690 break;
1691 case 4:
1692 stl_p(buf, value);
1693 break;
1694 default:
1695 abort();
1696 }
1697 address_space_write(subpage->as, addr + subpage->base, buf, len);
1698 }
1699
1700 static bool subpage_accepts(void *opaque, hwaddr addr,
1701 unsigned len, bool is_write)
1702 {
1703 subpage_t *subpage = opaque;
1704 #if defined(DEBUG_SUBPAGE)
1705 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1706 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1707 #endif
1708
1709 return address_space_access_valid(subpage->as, addr + subpage->base,
1710 len, is_write);
1711 }
1712
1713 static const MemoryRegionOps subpage_ops = {
1714 .read = subpage_read,
1715 .write = subpage_write,
1716 .valid.accepts = subpage_accepts,
1717 .endianness = DEVICE_NATIVE_ENDIAN,
1718 };
1719
1720 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1721 uint16_t section)
1722 {
1723 int idx, eidx;
1724
1725 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1726 return -1;
1727 idx = SUBPAGE_IDX(start);
1728 eidx = SUBPAGE_IDX(end);
1729 #if defined(DEBUG_SUBPAGE)
1730 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1731 __func__, mmio, start, end, idx, eidx, section);
1732 #endif
1733 for (; idx <= eidx; idx++) {
1734 mmio->sub_section[idx] = section;
1735 }
1736
1737 return 0;
1738 }
1739
1740 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1741 {
1742 subpage_t *mmio;
1743
1744 mmio = g_malloc0(sizeof(subpage_t));
1745
1746 mmio->as = as;
1747 mmio->base = base;
1748 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1749 "subpage", TARGET_PAGE_SIZE);
1750 mmio->iomem.subpage = true;
1751 #if defined(DEBUG_SUBPAGE)
1752 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1753 mmio, base, TARGET_PAGE_SIZE);
1754 #endif
1755 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1756
1757 return mmio;
1758 }
1759
1760 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1761 {
1762 MemoryRegionSection section = {
1763 .address_space = &address_space_memory,
1764 .mr = mr,
1765 .offset_within_address_space = 0,
1766 .offset_within_region = 0,
1767 .size = int128_2_64(),
1768 };
1769
1770 return phys_section_add(map, &section);
1771 }
1772
1773 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1774 {
1775 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1776 }
1777
1778 static void io_mem_init(void)
1779 {
1780 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1781 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1782 "unassigned", UINT64_MAX);
1783 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1784 "notdirty", UINT64_MAX);
1785 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1786 "watch", UINT64_MAX);
1787 }
1788
1789 static void mem_begin(MemoryListener *listener)
1790 {
1791 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1792 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1793 uint16_t n;
1794
1795 n = dummy_section(&d->map, &io_mem_unassigned);
1796 assert(n == PHYS_SECTION_UNASSIGNED);
1797 n = dummy_section(&d->map, &io_mem_notdirty);
1798 assert(n == PHYS_SECTION_NOTDIRTY);
1799 n = dummy_section(&d->map, &io_mem_rom);
1800 assert(n == PHYS_SECTION_ROM);
1801 n = dummy_section(&d->map, &io_mem_watch);
1802 assert(n == PHYS_SECTION_WATCH);
1803
1804 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1805 d->as = as;
1806 as->next_dispatch = d;
1807 }
1808
1809 static void mem_commit(MemoryListener *listener)
1810 {
1811 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1812 AddressSpaceDispatch *cur = as->dispatch;
1813 AddressSpaceDispatch *next = as->next_dispatch;
1814
1815 phys_page_compact_all(next, next->map.nodes_nb);
1816
1817 as->dispatch = next;
1818
1819 if (cur) {
1820 phys_sections_free(&cur->map);
1821 g_free(cur);
1822 }
1823 }
1824
1825 static void tcg_commit(MemoryListener *listener)
1826 {
1827 CPUState *cpu;
1828
1829 /* since each CPU stores ram addresses in its TLB cache, we must
1830 reset the modified entries */
1831 /* XXX: slow ! */
1832 CPU_FOREACH(cpu) {
1833 CPUArchState *env = cpu->env_ptr;
1834
1835 /* FIXME: Disentangle the cpu.h circular files deps so we can
1836 directly get the right CPU from listener. */
1837 if (cpu->tcg_as_listener != listener) {
1838 continue;
1839 }
1840 tlb_flush(env, 1);
1841 }
1842 }
1843
1844 static void core_log_global_start(MemoryListener *listener)
1845 {
1846 cpu_physical_memory_set_dirty_tracking(true);
1847 }
1848
1849 static void core_log_global_stop(MemoryListener *listener)
1850 {
1851 cpu_physical_memory_set_dirty_tracking(false);
1852 }
1853
1854 static MemoryListener core_memory_listener = {
1855 .log_global_start = core_log_global_start,
1856 .log_global_stop = core_log_global_stop,
1857 .priority = 1,
1858 };
1859
1860 void address_space_init_dispatch(AddressSpace *as)
1861 {
1862 as->dispatch = NULL;
1863 as->dispatch_listener = (MemoryListener) {
1864 .begin = mem_begin,
1865 .commit = mem_commit,
1866 .region_add = mem_add,
1867 .region_nop = mem_add,
1868 .priority = 0,
1869 };
1870 memory_listener_register(&as->dispatch_listener, as);
1871 }
1872
1873 void address_space_destroy_dispatch(AddressSpace *as)
1874 {
1875 AddressSpaceDispatch *d = as->dispatch;
1876
1877 memory_listener_unregister(&as->dispatch_listener);
1878 g_free(d);
1879 as->dispatch = NULL;
1880 }
1881
1882 static void memory_map_init(void)
1883 {
1884 system_memory = g_malloc(sizeof(*system_memory));
1885
1886 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1887 address_space_init(&address_space_memory, system_memory, "memory");
1888
1889 system_io = g_malloc(sizeof(*system_io));
1890 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1891 65536);
1892 address_space_init(&address_space_io, system_io, "I/O");
1893
1894 memory_listener_register(&core_memory_listener, &address_space_memory);
1895 }
1896
1897 MemoryRegion *get_system_memory(void)
1898 {
1899 return system_memory;
1900 }
1901
1902 MemoryRegion *get_system_io(void)
1903 {
1904 return system_io;
1905 }
1906
1907 #endif /* !defined(CONFIG_USER_ONLY) */
1908
1909 /* physical memory access (slow version, mainly for debug) */
1910 #if defined(CONFIG_USER_ONLY)
1911 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1912 uint8_t *buf, int len, int is_write)
1913 {
1914 int l, flags;
1915 target_ulong page;
1916 void * p;
1917
1918 while (len > 0) {
1919 page = addr & TARGET_PAGE_MASK;
1920 l = (page + TARGET_PAGE_SIZE) - addr;
1921 if (l > len)
1922 l = len;
1923 flags = page_get_flags(page);
1924 if (!(flags & PAGE_VALID))
1925 return -1;
1926 if (is_write) {
1927 if (!(flags & PAGE_WRITE))
1928 return -1;
1929 /* XXX: this code should not depend on lock_user */
1930 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1931 return -1;
1932 memcpy(p, buf, l);
1933 unlock_user(p, addr, l);
1934 } else {
1935 if (!(flags & PAGE_READ))
1936 return -1;
1937 /* XXX: this code should not depend on lock_user */
1938 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1939 return -1;
1940 memcpy(buf, p, l);
1941 unlock_user(p, addr, 0);
1942 }
1943 len -= l;
1944 buf += l;
1945 addr += l;
1946 }
1947 return 0;
1948 }
1949
1950 #else
1951
1952 static void invalidate_and_set_dirty(hwaddr addr,
1953 hwaddr length)
1954 {
1955 if (cpu_physical_memory_is_clean(addr)) {
1956 /* invalidate code */
1957 tb_invalidate_phys_page_range(addr, addr + length, 0);
1958 /* set dirty bit */
1959 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1960 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1961 }
1962 xen_modified_memory(addr, length);
1963 }
1964
1965 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1966 {
1967 unsigned access_size_max = mr->ops->valid.max_access_size;
1968
1969 /* Regions are assumed to support 1-4 byte accesses unless
1970 otherwise specified. */
1971 if (access_size_max == 0) {
1972 access_size_max = 4;
1973 }
1974
1975 /* Bound the maximum access by the alignment of the address. */
1976 if (!mr->ops->impl.unaligned) {
1977 unsigned align_size_max = addr & -addr;
1978 if (align_size_max != 0 && align_size_max < access_size_max) {
1979 access_size_max = align_size_max;
1980 }
1981 }
1982
1983 /* Don't attempt accesses larger than the maximum. */
1984 if (l > access_size_max) {
1985 l = access_size_max;
1986 }
1987 if (l & (l - 1)) {
1988 l = 1 << (qemu_fls(l) - 1);
1989 }
1990
1991 return l;
1992 }
1993
1994 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1995 int len, bool is_write)
1996 {
1997 hwaddr l;
1998 uint8_t *ptr;
1999 uint64_t val;
2000 hwaddr addr1;
2001 MemoryRegion *mr;
2002 bool error = false;
2003
2004 while (len > 0) {
2005 l = len;
2006 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2007
2008 if (is_write) {
2009 if (!memory_access_is_direct(mr, is_write)) {
2010 l = memory_access_size(mr, l, addr1);
2011 /* XXX: could force current_cpu to NULL to avoid
2012 potential bugs */
2013 switch (l) {
2014 case 8:
2015 /* 64 bit write access */
2016 val = ldq_p(buf);
2017 error |= io_mem_write(mr, addr1, val, 8);
2018 break;
2019 case 4:
2020 /* 32 bit write access */
2021 val = ldl_p(buf);
2022 error |= io_mem_write(mr, addr1, val, 4);
2023 break;
2024 case 2:
2025 /* 16 bit write access */
2026 val = lduw_p(buf);
2027 error |= io_mem_write(mr, addr1, val, 2);
2028 break;
2029 case 1:
2030 /* 8 bit write access */
2031 val = ldub_p(buf);
2032 error |= io_mem_write(mr, addr1, val, 1);
2033 break;
2034 default:
2035 abort();
2036 }
2037 } else {
2038 addr1 += memory_region_get_ram_addr(mr);
2039 /* RAM case */
2040 ptr = qemu_get_ram_ptr(addr1);
2041 memcpy(ptr, buf, l);
2042 invalidate_and_set_dirty(addr1, l);
2043 }
2044 } else {
2045 if (!memory_access_is_direct(mr, is_write)) {
2046 /* I/O case */
2047 l = memory_access_size(mr, l, addr1);
2048 switch (l) {
2049 case 8:
2050 /* 64 bit read access */
2051 error |= io_mem_read(mr, addr1, &val, 8);
2052 stq_p(buf, val);
2053 break;
2054 case 4:
2055 /* 32 bit read access */
2056 error |= io_mem_read(mr, addr1, &val, 4);
2057 stl_p(buf, val);
2058 break;
2059 case 2:
2060 /* 16 bit read access */
2061 error |= io_mem_read(mr, addr1, &val, 2);
2062 stw_p(buf, val);
2063 break;
2064 case 1:
2065 /* 8 bit read access */
2066 error |= io_mem_read(mr, addr1, &val, 1);
2067 stb_p(buf, val);
2068 break;
2069 default:
2070 abort();
2071 }
2072 } else {
2073 /* RAM case */
2074 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2075 memcpy(buf, ptr, l);
2076 }
2077 }
2078 len -= l;
2079 buf += l;
2080 addr += l;
2081 }
2082
2083 return error;
2084 }
2085
2086 bool address_space_write(AddressSpace *as, hwaddr addr,
2087 const uint8_t *buf, int len)
2088 {
2089 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2090 }
2091
2092 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2093 {
2094 return address_space_rw(as, addr, buf, len, false);
2095 }
2096
2097
2098 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2099 int len, int is_write)
2100 {
2101 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2102 }
2103
2104 enum write_rom_type {
2105 WRITE_DATA,
2106 FLUSH_CACHE,
2107 };
2108
2109 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2110 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2111 {
2112 hwaddr l;
2113 uint8_t *ptr;
2114 hwaddr addr1;
2115 MemoryRegion *mr;
2116
2117 while (len > 0) {
2118 l = len;
2119 mr = address_space_translate(as, addr, &addr1, &l, true);
2120
2121 if (!(memory_region_is_ram(mr) ||
2122 memory_region_is_romd(mr))) {
2123 /* do nothing */
2124 } else {
2125 addr1 += memory_region_get_ram_addr(mr);
2126 /* ROM/RAM case */
2127 ptr = qemu_get_ram_ptr(addr1);
2128 switch (type) {
2129 case WRITE_DATA:
2130 memcpy(ptr, buf, l);
2131 invalidate_and_set_dirty(addr1, l);
2132 break;
2133 case FLUSH_CACHE:
2134 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2135 break;
2136 }
2137 }
2138 len -= l;
2139 buf += l;
2140 addr += l;
2141 }
2142 }
2143
2144 /* used for ROM loading : can write in RAM and ROM */
2145 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2146 const uint8_t *buf, int len)
2147 {
2148 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2149 }
2150
2151 void cpu_flush_icache_range(hwaddr start, int len)
2152 {
2153 /*
2154 * This function should do the same thing as an icache flush that was
2155 * triggered from within the guest. For TCG we are always cache coherent,
2156 * so there is no need to flush anything. For KVM / Xen we need to flush
2157 * the host's instruction cache at least.
2158 */
2159 if (tcg_enabled()) {
2160 return;
2161 }
2162
2163 cpu_physical_memory_write_rom_internal(&address_space_memory,
2164 start, NULL, len, FLUSH_CACHE);
2165 }
2166
2167 typedef struct {
2168 MemoryRegion *mr;
2169 void *buffer;
2170 hwaddr addr;
2171 hwaddr len;
2172 } BounceBuffer;
2173
2174 static BounceBuffer bounce;
2175
2176 typedef struct MapClient {
2177 void *opaque;
2178 void (*callback)(void *opaque);
2179 QLIST_ENTRY(MapClient) link;
2180 } MapClient;
2181
2182 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2183 = QLIST_HEAD_INITIALIZER(map_client_list);
2184
2185 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2186 {
2187 MapClient *client = g_malloc(sizeof(*client));
2188
2189 client->opaque = opaque;
2190 client->callback = callback;
2191 QLIST_INSERT_HEAD(&map_client_list, client, link);
2192 return client;
2193 }
2194
2195 static void cpu_unregister_map_client(void *_client)
2196 {
2197 MapClient *client = (MapClient *)_client;
2198
2199 QLIST_REMOVE(client, link);
2200 g_free(client);
2201 }
2202
2203 static void cpu_notify_map_clients(void)
2204 {
2205 MapClient *client;
2206
2207 while (!QLIST_EMPTY(&map_client_list)) {
2208 client = QLIST_FIRST(&map_client_list);
2209 client->callback(client->opaque);
2210 cpu_unregister_map_client(client);
2211 }
2212 }
2213
2214 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2215 {
2216 MemoryRegion *mr;
2217 hwaddr l, xlat;
2218
2219 while (len > 0) {
2220 l = len;
2221 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2222 if (!memory_access_is_direct(mr, is_write)) {
2223 l = memory_access_size(mr, l, addr);
2224 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2225 return false;
2226 }
2227 }
2228
2229 len -= l;
2230 addr += l;
2231 }
2232 return true;
2233 }
2234
2235 /* Map a physical memory region into a host virtual address.
2236 * May map a subset of the requested range, given by and returned in *plen.
2237 * May return NULL if resources needed to perform the mapping are exhausted.
2238 * Use only for reads OR writes - not for read-modify-write operations.
2239 * Use cpu_register_map_client() to know when retrying the map operation is
2240 * likely to succeed.
2241 */
2242 void *address_space_map(AddressSpace *as,
2243 hwaddr addr,
2244 hwaddr *plen,
2245 bool is_write)
2246 {
2247 hwaddr len = *plen;
2248 hwaddr done = 0;
2249 hwaddr l, xlat, base;
2250 MemoryRegion *mr, *this_mr;
2251 ram_addr_t raddr;
2252
2253 if (len == 0) {
2254 return NULL;
2255 }
2256
2257 l = len;
2258 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2259 if (!memory_access_is_direct(mr, is_write)) {
2260 if (bounce.buffer) {
2261 return NULL;
2262 }
2263 /* Avoid unbounded allocations */
2264 l = MIN(l, TARGET_PAGE_SIZE);
2265 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2266 bounce.addr = addr;
2267 bounce.len = l;
2268
2269 memory_region_ref(mr);
2270 bounce.mr = mr;
2271 if (!is_write) {
2272 address_space_read(as, addr, bounce.buffer, l);
2273 }
2274
2275 *plen = l;
2276 return bounce.buffer;
2277 }
2278
2279 base = xlat;
2280 raddr = memory_region_get_ram_addr(mr);
2281
2282 for (;;) {
2283 len -= l;
2284 addr += l;
2285 done += l;
2286 if (len == 0) {
2287 break;
2288 }
2289
2290 l = len;
2291 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2292 if (this_mr != mr || xlat != base + done) {
2293 break;
2294 }
2295 }
2296
2297 memory_region_ref(mr);
2298 *plen = done;
2299 return qemu_ram_ptr_length(raddr + base, plen);
2300 }
2301
2302 /* Unmaps a memory region previously mapped by address_space_map().
2303 * Will also mark the memory as dirty if is_write == 1. access_len gives
2304 * the amount of memory that was actually read or written by the caller.
2305 */
2306 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2307 int is_write, hwaddr access_len)
2308 {
2309 if (buffer != bounce.buffer) {
2310 MemoryRegion *mr;
2311 ram_addr_t addr1;
2312
2313 mr = qemu_ram_addr_from_host(buffer, &addr1);
2314 assert(mr != NULL);
2315 if (is_write) {
2316 while (access_len) {
2317 unsigned l;
2318 l = TARGET_PAGE_SIZE;
2319 if (l > access_len)
2320 l = access_len;
2321 invalidate_and_set_dirty(addr1, l);
2322 addr1 += l;
2323 access_len -= l;
2324 }
2325 }
2326 if (xen_enabled()) {
2327 xen_invalidate_map_cache_entry(buffer);
2328 }
2329 memory_region_unref(mr);
2330 return;
2331 }
2332 if (is_write) {
2333 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2334 }
2335 qemu_vfree(bounce.buffer);
2336 bounce.buffer = NULL;
2337 memory_region_unref(bounce.mr);
2338 cpu_notify_map_clients();
2339 }
2340
2341 void *cpu_physical_memory_map(hwaddr addr,
2342 hwaddr *plen,
2343 int is_write)
2344 {
2345 return address_space_map(&address_space_memory, addr, plen, is_write);
2346 }
2347
2348 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2349 int is_write, hwaddr access_len)
2350 {
2351 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2352 }
2353
2354 /* warning: addr must be aligned */
2355 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2356 enum device_endian endian)
2357 {
2358 uint8_t *ptr;
2359 uint64_t val;
2360 MemoryRegion *mr;
2361 hwaddr l = 4;
2362 hwaddr addr1;
2363
2364 mr = address_space_translate(as, addr, &addr1, &l, false);
2365 if (l < 4 || !memory_access_is_direct(mr, false)) {
2366 /* I/O case */
2367 io_mem_read(mr, addr1, &val, 4);
2368 #if defined(TARGET_WORDS_BIGENDIAN)
2369 if (endian == DEVICE_LITTLE_ENDIAN) {
2370 val = bswap32(val);
2371 }
2372 #else
2373 if (endian == DEVICE_BIG_ENDIAN) {
2374 val = bswap32(val);
2375 }
2376 #endif
2377 } else {
2378 /* RAM case */
2379 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2380 & TARGET_PAGE_MASK)
2381 + addr1);
2382 switch (endian) {
2383 case DEVICE_LITTLE_ENDIAN:
2384 val = ldl_le_p(ptr);
2385 break;
2386 case DEVICE_BIG_ENDIAN:
2387 val = ldl_be_p(ptr);
2388 break;
2389 default:
2390 val = ldl_p(ptr);
2391 break;
2392 }
2393 }
2394 return val;
2395 }
2396
2397 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2398 {
2399 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2400 }
2401
2402 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2403 {
2404 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2405 }
2406
2407 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2408 {
2409 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2410 }
2411
2412 /* warning: addr must be aligned */
2413 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2414 enum device_endian endian)
2415 {
2416 uint8_t *ptr;
2417 uint64_t val;
2418 MemoryRegion *mr;
2419 hwaddr l = 8;
2420 hwaddr addr1;
2421
2422 mr = address_space_translate(as, addr, &addr1, &l,
2423 false);
2424 if (l < 8 || !memory_access_is_direct(mr, false)) {
2425 /* I/O case */
2426 io_mem_read(mr, addr1, &val, 8);
2427 #if defined(TARGET_WORDS_BIGENDIAN)
2428 if (endian == DEVICE_LITTLE_ENDIAN) {
2429 val = bswap64(val);
2430 }
2431 #else
2432 if (endian == DEVICE_BIG_ENDIAN) {
2433 val = bswap64(val);
2434 }
2435 #endif
2436 } else {
2437 /* RAM case */
2438 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2439 & TARGET_PAGE_MASK)
2440 + addr1);
2441 switch (endian) {
2442 case DEVICE_LITTLE_ENDIAN:
2443 val = ldq_le_p(ptr);
2444 break;
2445 case DEVICE_BIG_ENDIAN:
2446 val = ldq_be_p(ptr);
2447 break;
2448 default:
2449 val = ldq_p(ptr);
2450 break;
2451 }
2452 }
2453 return val;
2454 }
2455
2456 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2457 {
2458 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2459 }
2460
2461 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2462 {
2463 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2464 }
2465
2466 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2467 {
2468 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2469 }
2470
2471 /* XXX: optimize */
2472 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2473 {
2474 uint8_t val;
2475 address_space_rw(as, addr, &val, 1, 0);
2476 return val;
2477 }
2478
2479 /* warning: addr must be aligned */
2480 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2481 enum device_endian endian)
2482 {
2483 uint8_t *ptr;
2484 uint64_t val;
2485 MemoryRegion *mr;
2486 hwaddr l = 2;
2487 hwaddr addr1;
2488
2489 mr = address_space_translate(as, addr, &addr1, &l,
2490 false);
2491 if (l < 2 || !memory_access_is_direct(mr, false)) {
2492 /* I/O case */
2493 io_mem_read(mr, addr1, &val, 2);
2494 #if defined(TARGET_WORDS_BIGENDIAN)
2495 if (endian == DEVICE_LITTLE_ENDIAN) {
2496 val = bswap16(val);
2497 }
2498 #else
2499 if (endian == DEVICE_BIG_ENDIAN) {
2500 val = bswap16(val);
2501 }
2502 #endif
2503 } else {
2504 /* RAM case */
2505 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2506 & TARGET_PAGE_MASK)
2507 + addr1);
2508 switch (endian) {
2509 case DEVICE_LITTLE_ENDIAN:
2510 val = lduw_le_p(ptr);
2511 break;
2512 case DEVICE_BIG_ENDIAN:
2513 val = lduw_be_p(ptr);
2514 break;
2515 default:
2516 val = lduw_p(ptr);
2517 break;
2518 }
2519 }
2520 return val;
2521 }
2522
2523 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2524 {
2525 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2526 }
2527
2528 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2529 {
2530 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2531 }
2532
2533 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2534 {
2535 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2536 }
2537
2538 /* warning: addr must be aligned. The ram page is not masked as dirty
2539 and the code inside is not invalidated. It is useful if the dirty
2540 bits are used to track modified PTEs */
2541 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2542 {
2543 uint8_t *ptr;
2544 MemoryRegion *mr;
2545 hwaddr l = 4;
2546 hwaddr addr1;
2547
2548 mr = address_space_translate(as, addr, &addr1, &l,
2549 true);
2550 if (l < 4 || !memory_access_is_direct(mr, true)) {
2551 io_mem_write(mr, addr1, val, 4);
2552 } else {
2553 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2554 ptr = qemu_get_ram_ptr(addr1);
2555 stl_p(ptr, val);
2556
2557 if (unlikely(in_migration)) {
2558 if (cpu_physical_memory_is_clean(addr1)) {
2559 /* invalidate code */
2560 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2561 /* set dirty bit */
2562 cpu_physical_memory_set_dirty_flag(addr1,
2563 DIRTY_MEMORY_MIGRATION);
2564 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2565 }
2566 }
2567 }
2568 }
2569
2570 /* warning: addr must be aligned */
2571 static inline void stl_phys_internal(AddressSpace *as,
2572 hwaddr addr, uint32_t val,
2573 enum device_endian endian)
2574 {
2575 uint8_t *ptr;
2576 MemoryRegion *mr;
2577 hwaddr l = 4;
2578 hwaddr addr1;
2579
2580 mr = address_space_translate(as, addr, &addr1, &l,
2581 true);
2582 if (l < 4 || !memory_access_is_direct(mr, true)) {
2583 #if defined(TARGET_WORDS_BIGENDIAN)
2584 if (endian == DEVICE_LITTLE_ENDIAN) {
2585 val = bswap32(val);
2586 }
2587 #else
2588 if (endian == DEVICE_BIG_ENDIAN) {
2589 val = bswap32(val);
2590 }
2591 #endif
2592 io_mem_write(mr, addr1, val, 4);
2593 } else {
2594 /* RAM case */
2595 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2596 ptr = qemu_get_ram_ptr(addr1);
2597 switch (endian) {
2598 case DEVICE_LITTLE_ENDIAN:
2599 stl_le_p(ptr, val);
2600 break;
2601 case DEVICE_BIG_ENDIAN:
2602 stl_be_p(ptr, val);
2603 break;
2604 default:
2605 stl_p(ptr, val);
2606 break;
2607 }
2608 invalidate_and_set_dirty(addr1, 4);
2609 }
2610 }
2611
2612 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2613 {
2614 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2615 }
2616
2617 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2618 {
2619 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2620 }
2621
2622 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2623 {
2624 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2625 }
2626
2627 /* XXX: optimize */
2628 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2629 {
2630 uint8_t v = val;
2631 address_space_rw(as, addr, &v, 1, 1);
2632 }
2633
2634 /* warning: addr must be aligned */
2635 static inline void stw_phys_internal(AddressSpace *as,
2636 hwaddr addr, uint32_t val,
2637 enum device_endian endian)
2638 {
2639 uint8_t *ptr;
2640 MemoryRegion *mr;
2641 hwaddr l = 2;
2642 hwaddr addr1;
2643
2644 mr = address_space_translate(as, addr, &addr1, &l, true);
2645 if (l < 2 || !memory_access_is_direct(mr, true)) {
2646 #if defined(TARGET_WORDS_BIGENDIAN)
2647 if (endian == DEVICE_LITTLE_ENDIAN) {
2648 val = bswap16(val);
2649 }
2650 #else
2651 if (endian == DEVICE_BIG_ENDIAN) {
2652 val = bswap16(val);
2653 }
2654 #endif
2655 io_mem_write(mr, addr1, val, 2);
2656 } else {
2657 /* RAM case */
2658 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2659 ptr = qemu_get_ram_ptr(addr1);
2660 switch (endian) {
2661 case DEVICE_LITTLE_ENDIAN:
2662 stw_le_p(ptr, val);
2663 break;
2664 case DEVICE_BIG_ENDIAN:
2665 stw_be_p(ptr, val);
2666 break;
2667 default:
2668 stw_p(ptr, val);
2669 break;
2670 }
2671 invalidate_and_set_dirty(addr1, 2);
2672 }
2673 }
2674
2675 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2676 {
2677 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2678 }
2679
2680 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2681 {
2682 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2683 }
2684
2685 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2686 {
2687 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2688 }
2689
2690 /* XXX: optimize */
2691 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2692 {
2693 val = tswap64(val);
2694 address_space_rw(as, addr, (void *) &val, 8, 1);
2695 }
2696
2697 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2698 {
2699 val = cpu_to_le64(val);
2700 address_space_rw(as, addr, (void *) &val, 8, 1);
2701 }
2702
2703 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2704 {
2705 val = cpu_to_be64(val);
2706 address_space_rw(as, addr, (void *) &val, 8, 1);
2707 }
2708
2709 /* virtual memory access for debug (includes writing to ROM) */
2710 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2711 uint8_t *buf, int len, int is_write)
2712 {
2713 int l;
2714 hwaddr phys_addr;
2715 target_ulong page;
2716
2717 while (len > 0) {
2718 page = addr & TARGET_PAGE_MASK;
2719 phys_addr = cpu_get_phys_page_debug(cpu, page);
2720 /* if no physical page mapped, return an error */
2721 if (phys_addr == -1)
2722 return -1;
2723 l = (page + TARGET_PAGE_SIZE) - addr;
2724 if (l > len)
2725 l = len;
2726 phys_addr += (addr & ~TARGET_PAGE_MASK);
2727 if (is_write) {
2728 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2729 } else {
2730 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2731 }
2732 len -= l;
2733 buf += l;
2734 addr += l;
2735 }
2736 return 0;
2737 }
2738 #endif
2739
2740 #if !defined(CONFIG_USER_ONLY)
2741
2742 /*
2743 * A helper function for the _utterly broken_ virtio device model to find out if
2744 * it's running on a big endian machine. Don't do this at home kids!
2745 */
2746 bool virtio_is_big_endian(void);
2747 bool virtio_is_big_endian(void)
2748 {
2749 #if defined(TARGET_WORDS_BIGENDIAN)
2750 return true;
2751 #else
2752 return false;
2753 #endif
2754 }
2755
2756 #endif
2757
2758 #ifndef CONFIG_USER_ONLY
2759 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2760 {
2761 MemoryRegion*mr;
2762 hwaddr l = 1;
2763
2764 mr = address_space_translate(&address_space_memory,
2765 phys_addr, &phys_addr, &l, false);
2766
2767 return !(memory_region_is_ram(mr) ||
2768 memory_region_is_romd(mr));
2769 }
2770
2771 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2772 {
2773 RAMBlock *block;
2774
2775 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2776 func(block->host, block->offset, block->length, opaque);
2777 }
2778 }
2779 #endif