]> git.proxmox.com Git - qemu.git/blob - exec.c
78be4f52a2da7b54e3f9f49555cf430403b7e2c0
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_SUBPAGE
54
55 #if !defined(CONFIG_USER_ONLY)
56 int phys_ram_fd;
57 static int in_migration;
58
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
63
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66
67 MemoryRegion io_mem_rom, io_mem_notdirty;
68 static MemoryRegion io_mem_unassigned;
69
70 #endif
71
72 CPUArchState *first_cpu;
73 /* current CPU in the current thread. It is only valid inside
74 cpu_exec() */
75 DEFINE_TLS(CPUArchState *,cpu_single_env);
76 /* 0 = Do not count executed instructions.
77 1 = Precise instruction counting.
78 2 = Adaptive rate instruction counting. */
79 int use_icount;
80
81 #if !defined(CONFIG_USER_ONLY)
82
83 typedef struct PhysPageEntry PhysPageEntry;
84
85 struct PhysPageEntry {
86 uint16_t is_leaf : 1;
87 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
88 uint16_t ptr : 15;
89 };
90
91 struct AddressSpaceDispatch {
92 /* This is a multi-level map on the physical address space.
93 * The bottom level has pointers to MemoryRegionSections.
94 */
95 PhysPageEntry phys_map;
96 MemoryListener listener;
97 AddressSpace *as;
98 };
99
100 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
101 typedef struct subpage_t {
102 MemoryRegion iomem;
103 AddressSpace *as;
104 hwaddr base;
105 uint16_t sub_section[TARGET_PAGE_SIZE];
106 } subpage_t;
107
108 static MemoryRegionSection *phys_sections;
109 static unsigned phys_sections_nb, phys_sections_nb_alloc;
110 static uint16_t phys_section_unassigned;
111 static uint16_t phys_section_notdirty;
112 static uint16_t phys_section_rom;
113 static uint16_t phys_section_watch;
114
115 /* Simple allocator for PhysPageEntry nodes */
116 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
117 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
118
119 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
120
121 static void io_mem_init(void);
122 static void memory_map_init(void);
123 static void *qemu_safe_ram_ptr(ram_addr_t addr);
124
125 static MemoryRegion io_mem_watch;
126 #endif
127
128 #if !defined(CONFIG_USER_ONLY)
129
130 static void phys_map_node_reserve(unsigned nodes)
131 {
132 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
133 typedef PhysPageEntry Node[L2_SIZE];
134 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
135 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
136 phys_map_nodes_nb + nodes);
137 phys_map_nodes = g_renew(Node, phys_map_nodes,
138 phys_map_nodes_nb_alloc);
139 }
140 }
141
142 static uint16_t phys_map_node_alloc(void)
143 {
144 unsigned i;
145 uint16_t ret;
146
147 ret = phys_map_nodes_nb++;
148 assert(ret != PHYS_MAP_NODE_NIL);
149 assert(ret != phys_map_nodes_nb_alloc);
150 for (i = 0; i < L2_SIZE; ++i) {
151 phys_map_nodes[ret][i].is_leaf = 0;
152 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
153 }
154 return ret;
155 }
156
157 static void phys_map_nodes_reset(void)
158 {
159 phys_map_nodes_nb = 0;
160 }
161
162
163 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
164 hwaddr *nb, uint16_t leaf,
165 int level)
166 {
167 PhysPageEntry *p;
168 int i;
169 hwaddr step = (hwaddr)1 << (level * L2_BITS);
170
171 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
172 lp->ptr = phys_map_node_alloc();
173 p = phys_map_nodes[lp->ptr];
174 if (level == 0) {
175 for (i = 0; i < L2_SIZE; i++) {
176 p[i].is_leaf = 1;
177 p[i].ptr = phys_section_unassigned;
178 }
179 }
180 } else {
181 p = phys_map_nodes[lp->ptr];
182 }
183 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
184
185 while (*nb && lp < &p[L2_SIZE]) {
186 if ((*index & (step - 1)) == 0 && *nb >= step) {
187 lp->is_leaf = true;
188 lp->ptr = leaf;
189 *index += step;
190 *nb -= step;
191 } else {
192 phys_page_set_level(lp, index, nb, leaf, level - 1);
193 }
194 ++lp;
195 }
196 }
197
198 static void phys_page_set(AddressSpaceDispatch *d,
199 hwaddr index, hwaddr nb,
200 uint16_t leaf)
201 {
202 /* Wildly overreserve - it doesn't matter much. */
203 phys_map_node_reserve(3 * P_L2_LEVELS);
204
205 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
206 }
207
208 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
209 {
210 PhysPageEntry lp = d->phys_map;
211 PhysPageEntry *p;
212 int i;
213
214 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
215 if (lp.ptr == PHYS_MAP_NODE_NIL) {
216 return &phys_sections[phys_section_unassigned];
217 }
218 p = phys_map_nodes[lp.ptr];
219 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
220 }
221 return &phys_sections[lp.ptr];
222 }
223
224 bool memory_region_is_unassigned(MemoryRegion *mr)
225 {
226 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
227 && mr != &io_mem_watch;
228 }
229
230 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
231 hwaddr addr,
232 bool resolve_subpage)
233 {
234 MemoryRegionSection *section;
235 subpage_t *subpage;
236
237 section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
238 if (resolve_subpage && section->mr->subpage) {
239 subpage = container_of(section->mr, subpage_t, iomem);
240 section = &phys_sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
241 }
242 return section;
243 }
244
245 static MemoryRegionSection *
246 address_space_translate_internal(AddressSpace *as, hwaddr addr, hwaddr *xlat,
247 hwaddr *plen, bool resolve_subpage)
248 {
249 MemoryRegionSection *section;
250 Int128 diff;
251
252 section = address_space_lookup_region(as, addr, resolve_subpage);
253 /* Compute offset within MemoryRegionSection */
254 addr -= section->offset_within_address_space;
255
256 /* Compute offset within MemoryRegion */
257 *xlat = addr + section->offset_within_region;
258
259 diff = int128_sub(section->mr->size, int128_make64(addr));
260 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
261 return section;
262 }
263
264 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
265 hwaddr *xlat, hwaddr *plen,
266 bool is_write)
267 {
268 IOMMUTLBEntry iotlb;
269 MemoryRegionSection *section;
270 MemoryRegion *mr;
271 hwaddr len = *plen;
272
273 for (;;) {
274 section = address_space_translate_internal(as, addr, &addr, plen, true);
275 mr = section->mr;
276
277 if (!mr->iommu_ops) {
278 break;
279 }
280
281 iotlb = mr->iommu_ops->translate(mr, addr);
282 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
283 | (addr & iotlb.addr_mask));
284 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
285 if (!(iotlb.perm & (1 << is_write))) {
286 mr = &io_mem_unassigned;
287 break;
288 }
289
290 as = iotlb.target_as;
291 }
292
293 *plen = len;
294 *xlat = addr;
295 return mr;
296 }
297
298 MemoryRegionSection *
299 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
300 hwaddr *plen)
301 {
302 MemoryRegionSection *section;
303 section = address_space_translate_internal(as, addr, xlat, plen, false);
304
305 assert(!section->mr->iommu_ops);
306 return section;
307 }
308 #endif
309
310 void cpu_exec_init_all(void)
311 {
312 #if !defined(CONFIG_USER_ONLY)
313 qemu_mutex_init(&ram_list.mutex);
314 memory_map_init();
315 io_mem_init();
316 #endif
317 }
318
319 #if !defined(CONFIG_USER_ONLY)
320
321 static int cpu_common_post_load(void *opaque, int version_id)
322 {
323 CPUState *cpu = opaque;
324
325 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
326 version_id is increased. */
327 cpu->interrupt_request &= ~0x01;
328 tlb_flush(cpu->env_ptr, 1);
329
330 return 0;
331 }
332
333 const VMStateDescription vmstate_cpu_common = {
334 .name = "cpu_common",
335 .version_id = 1,
336 .minimum_version_id = 1,
337 .minimum_version_id_old = 1,
338 .post_load = cpu_common_post_load,
339 .fields = (VMStateField []) {
340 VMSTATE_UINT32(halted, CPUState),
341 VMSTATE_UINT32(interrupt_request, CPUState),
342 VMSTATE_END_OF_LIST()
343 }
344 };
345
346 #endif
347
348 CPUState *qemu_get_cpu(int index)
349 {
350 CPUArchState *env = first_cpu;
351 CPUState *cpu = NULL;
352
353 while (env) {
354 cpu = ENV_GET_CPU(env);
355 if (cpu->cpu_index == index) {
356 break;
357 }
358 env = env->next_cpu;
359 }
360
361 return env ? cpu : NULL;
362 }
363
364 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
365 {
366 CPUArchState *env = first_cpu;
367
368 while (env) {
369 func(ENV_GET_CPU(env), data);
370 env = env->next_cpu;
371 }
372 }
373
374 void cpu_exec_init(CPUArchState *env)
375 {
376 CPUState *cpu = ENV_GET_CPU(env);
377 CPUClass *cc = CPU_GET_CLASS(cpu);
378 CPUArchState **penv;
379 int cpu_index;
380
381 #if defined(CONFIG_USER_ONLY)
382 cpu_list_lock();
383 #endif
384 env->next_cpu = NULL;
385 penv = &first_cpu;
386 cpu_index = 0;
387 while (*penv != NULL) {
388 penv = &(*penv)->next_cpu;
389 cpu_index++;
390 }
391 cpu->cpu_index = cpu_index;
392 cpu->numa_node = 0;
393 QTAILQ_INIT(&env->breakpoints);
394 QTAILQ_INIT(&env->watchpoints);
395 #ifndef CONFIG_USER_ONLY
396 cpu->thread_id = qemu_get_thread_id();
397 #endif
398 *penv = env;
399 #if defined(CONFIG_USER_ONLY)
400 cpu_list_unlock();
401 #endif
402 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
403 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
404 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
405 cpu_save, cpu_load, env);
406 assert(cc->vmsd == NULL);
407 #endif
408 if (cc->vmsd != NULL) {
409 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
410 }
411 }
412
413 #if defined(TARGET_HAS_ICE)
414 #if defined(CONFIG_USER_ONLY)
415 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
416 {
417 tb_invalidate_phys_page_range(pc, pc + 1, 0);
418 }
419 #else
420 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
421 {
422 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
423 (pc & ~TARGET_PAGE_MASK));
424 }
425 #endif
426 #endif /* TARGET_HAS_ICE */
427
428 #if defined(CONFIG_USER_ONLY)
429 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
430
431 {
432 }
433
434 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
435 int flags, CPUWatchpoint **watchpoint)
436 {
437 return -ENOSYS;
438 }
439 #else
440 /* Add a watchpoint. */
441 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
442 int flags, CPUWatchpoint **watchpoint)
443 {
444 target_ulong len_mask = ~(len - 1);
445 CPUWatchpoint *wp;
446
447 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
448 if ((len & (len - 1)) || (addr & ~len_mask) ||
449 len == 0 || len > TARGET_PAGE_SIZE) {
450 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
451 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
452 return -EINVAL;
453 }
454 wp = g_malloc(sizeof(*wp));
455
456 wp->vaddr = addr;
457 wp->len_mask = len_mask;
458 wp->flags = flags;
459
460 /* keep all GDB-injected watchpoints in front */
461 if (flags & BP_GDB)
462 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
463 else
464 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
465
466 tlb_flush_page(env, addr);
467
468 if (watchpoint)
469 *watchpoint = wp;
470 return 0;
471 }
472
473 /* Remove a specific watchpoint. */
474 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
475 int flags)
476 {
477 target_ulong len_mask = ~(len - 1);
478 CPUWatchpoint *wp;
479
480 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
481 if (addr == wp->vaddr && len_mask == wp->len_mask
482 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
483 cpu_watchpoint_remove_by_ref(env, wp);
484 return 0;
485 }
486 }
487 return -ENOENT;
488 }
489
490 /* Remove a specific watchpoint by reference. */
491 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
492 {
493 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
494
495 tlb_flush_page(env, watchpoint->vaddr);
496
497 g_free(watchpoint);
498 }
499
500 /* Remove all matching watchpoints. */
501 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
502 {
503 CPUWatchpoint *wp, *next;
504
505 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
506 if (wp->flags & mask)
507 cpu_watchpoint_remove_by_ref(env, wp);
508 }
509 }
510 #endif
511
512 /* Add a breakpoint. */
513 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
514 CPUBreakpoint **breakpoint)
515 {
516 #if defined(TARGET_HAS_ICE)
517 CPUBreakpoint *bp;
518
519 bp = g_malloc(sizeof(*bp));
520
521 bp->pc = pc;
522 bp->flags = flags;
523
524 /* keep all GDB-injected breakpoints in front */
525 if (flags & BP_GDB)
526 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
527 else
528 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
529
530 breakpoint_invalidate(env, pc);
531
532 if (breakpoint)
533 *breakpoint = bp;
534 return 0;
535 #else
536 return -ENOSYS;
537 #endif
538 }
539
540 /* Remove a specific breakpoint. */
541 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
542 {
543 #if defined(TARGET_HAS_ICE)
544 CPUBreakpoint *bp;
545
546 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
547 if (bp->pc == pc && bp->flags == flags) {
548 cpu_breakpoint_remove_by_ref(env, bp);
549 return 0;
550 }
551 }
552 return -ENOENT;
553 #else
554 return -ENOSYS;
555 #endif
556 }
557
558 /* Remove a specific breakpoint by reference. */
559 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
560 {
561 #if defined(TARGET_HAS_ICE)
562 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
563
564 breakpoint_invalidate(env, breakpoint->pc);
565
566 g_free(breakpoint);
567 #endif
568 }
569
570 /* Remove all matching breakpoints. */
571 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
572 {
573 #if defined(TARGET_HAS_ICE)
574 CPUBreakpoint *bp, *next;
575
576 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
577 if (bp->flags & mask)
578 cpu_breakpoint_remove_by_ref(env, bp);
579 }
580 #endif
581 }
582
583 /* enable or disable single step mode. EXCP_DEBUG is returned by the
584 CPU loop after each instruction */
585 void cpu_single_step(CPUArchState *env, int enabled)
586 {
587 #if defined(TARGET_HAS_ICE)
588 if (env->singlestep_enabled != enabled) {
589 env->singlestep_enabled = enabled;
590 if (kvm_enabled())
591 kvm_update_guest_debug(env, 0);
592 else {
593 /* must flush all the translated code to avoid inconsistencies */
594 /* XXX: only flush what is necessary */
595 tb_flush(env);
596 }
597 }
598 #endif
599 }
600
601 void cpu_abort(CPUArchState *env, const char *fmt, ...)
602 {
603 CPUState *cpu = ENV_GET_CPU(env);
604 va_list ap;
605 va_list ap2;
606
607 va_start(ap, fmt);
608 va_copy(ap2, ap);
609 fprintf(stderr, "qemu: fatal: ");
610 vfprintf(stderr, fmt, ap);
611 fprintf(stderr, "\n");
612 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
613 if (qemu_log_enabled()) {
614 qemu_log("qemu: fatal: ");
615 qemu_log_vprintf(fmt, ap2);
616 qemu_log("\n");
617 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
618 qemu_log_flush();
619 qemu_log_close();
620 }
621 va_end(ap2);
622 va_end(ap);
623 #if defined(CONFIG_USER_ONLY)
624 {
625 struct sigaction act;
626 sigfillset(&act.sa_mask);
627 act.sa_handler = SIG_DFL;
628 sigaction(SIGABRT, &act, NULL);
629 }
630 #endif
631 abort();
632 }
633
634 CPUArchState *cpu_copy(CPUArchState *env)
635 {
636 CPUArchState *new_env = cpu_init(env->cpu_model_str);
637 CPUArchState *next_cpu = new_env->next_cpu;
638 #if defined(TARGET_HAS_ICE)
639 CPUBreakpoint *bp;
640 CPUWatchpoint *wp;
641 #endif
642
643 memcpy(new_env, env, sizeof(CPUArchState));
644
645 /* Preserve chaining. */
646 new_env->next_cpu = next_cpu;
647
648 /* Clone all break/watchpoints.
649 Note: Once we support ptrace with hw-debug register access, make sure
650 BP_CPU break/watchpoints are handled correctly on clone. */
651 QTAILQ_INIT(&env->breakpoints);
652 QTAILQ_INIT(&env->watchpoints);
653 #if defined(TARGET_HAS_ICE)
654 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
655 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
656 }
657 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
658 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
659 wp->flags, NULL);
660 }
661 #endif
662
663 return new_env;
664 }
665
666 #if !defined(CONFIG_USER_ONLY)
667 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
668 uintptr_t length)
669 {
670 uintptr_t start1;
671
672 /* we modify the TLB cache so that the dirty bit will be set again
673 when accessing the range */
674 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
675 /* Check that we don't span multiple blocks - this breaks the
676 address comparisons below. */
677 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
678 != (end - 1) - start) {
679 abort();
680 }
681 cpu_tlb_reset_dirty_all(start1, length);
682
683 }
684
685 /* Note: start and end must be within the same ram block. */
686 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
687 int dirty_flags)
688 {
689 uintptr_t length;
690
691 start &= TARGET_PAGE_MASK;
692 end = TARGET_PAGE_ALIGN(end);
693
694 length = end - start;
695 if (length == 0)
696 return;
697 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
698
699 if (tcg_enabled()) {
700 tlb_reset_dirty_range_all(start, end, length);
701 }
702 }
703
704 static int cpu_physical_memory_set_dirty_tracking(int enable)
705 {
706 int ret = 0;
707 in_migration = enable;
708 return ret;
709 }
710
711 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
712 MemoryRegionSection *section,
713 target_ulong vaddr,
714 hwaddr paddr, hwaddr xlat,
715 int prot,
716 target_ulong *address)
717 {
718 hwaddr iotlb;
719 CPUWatchpoint *wp;
720
721 if (memory_region_is_ram(section->mr)) {
722 /* Normal RAM. */
723 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
724 + xlat;
725 if (!section->readonly) {
726 iotlb |= phys_section_notdirty;
727 } else {
728 iotlb |= phys_section_rom;
729 }
730 } else {
731 iotlb = section - phys_sections;
732 iotlb += xlat;
733 }
734
735 /* Make accesses to pages with watchpoints go via the
736 watchpoint trap routines. */
737 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
738 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
739 /* Avoid trapping reads of pages with a write breakpoint. */
740 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
741 iotlb = phys_section_watch + paddr;
742 *address |= TLB_MMIO;
743 break;
744 }
745 }
746 }
747
748 return iotlb;
749 }
750 #endif /* defined(CONFIG_USER_ONLY) */
751
752 #if !defined(CONFIG_USER_ONLY)
753
754 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
755 uint16_t section);
756 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
757
758 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
759 {
760 unsigned i;
761 PhysPageEntry *p;
762
763 if (lp->ptr == PHYS_MAP_NODE_NIL) {
764 return;
765 }
766
767 p = phys_map_nodes[lp->ptr];
768 for (i = 0; i < L2_SIZE; ++i) {
769 if (!p[i].is_leaf) {
770 destroy_l2_mapping(&p[i], level - 1);
771 }
772 }
773 lp->is_leaf = 0;
774 lp->ptr = PHYS_MAP_NODE_NIL;
775 }
776
777 static void destroy_all_mappings(AddressSpaceDispatch *d)
778 {
779 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
780 phys_map_nodes_reset();
781 }
782
783 static uint16_t phys_section_add(MemoryRegionSection *section)
784 {
785 /* The physical section number is ORed with a page-aligned
786 * pointer to produce the iotlb entries. Thus it should
787 * never overflow into the page-aligned value.
788 */
789 assert(phys_sections_nb < TARGET_PAGE_SIZE);
790
791 if (phys_sections_nb == phys_sections_nb_alloc) {
792 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
793 phys_sections = g_renew(MemoryRegionSection, phys_sections,
794 phys_sections_nb_alloc);
795 }
796 phys_sections[phys_sections_nb] = *section;
797 return phys_sections_nb++;
798 }
799
800 static void phys_section_destroy(MemoryRegion *mr)
801 {
802 if (mr->subpage) {
803 subpage_t *subpage = container_of(mr, subpage_t, iomem);
804 memory_region_destroy(&subpage->iomem);
805 g_free(subpage);
806 }
807 }
808
809 static void phys_sections_clear(void)
810 {
811 while (phys_sections_nb > 0) {
812 MemoryRegionSection *section = &phys_sections[--phys_sections_nb];
813 phys_section_destroy(section->mr);
814 }
815 }
816
817 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
818 {
819 subpage_t *subpage;
820 hwaddr base = section->offset_within_address_space
821 & TARGET_PAGE_MASK;
822 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
823 MemoryRegionSection subsection = {
824 .offset_within_address_space = base,
825 .size = int128_make64(TARGET_PAGE_SIZE),
826 };
827 hwaddr start, end;
828
829 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
830
831 if (!(existing->mr->subpage)) {
832 subpage = subpage_init(d->as, base);
833 subsection.mr = &subpage->iomem;
834 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
835 phys_section_add(&subsection));
836 } else {
837 subpage = container_of(existing->mr, subpage_t, iomem);
838 }
839 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
840 end = start + int128_get64(section->size) - 1;
841 subpage_register(subpage, start, end, phys_section_add(section));
842 }
843
844
845 static void register_multipage(AddressSpaceDispatch *d,
846 MemoryRegionSection *section)
847 {
848 hwaddr start_addr = section->offset_within_address_space;
849 uint16_t section_index = phys_section_add(section);
850 uint64_t num_pages = int128_get64(int128_rshift(section->size,
851 TARGET_PAGE_BITS));
852
853 assert(num_pages);
854 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
855 }
856
857 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
858 {
859 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
860 MemoryRegionSection now = *section, remain = *section;
861 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
862
863 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
864 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
865 - now.offset_within_address_space;
866
867 now.size = int128_min(int128_make64(left), now.size);
868 register_subpage(d, &now);
869 } else {
870 now.size = int128_zero();
871 }
872 while (int128_ne(remain.size, now.size)) {
873 remain.size = int128_sub(remain.size, now.size);
874 remain.offset_within_address_space += int128_get64(now.size);
875 remain.offset_within_region += int128_get64(now.size);
876 now = remain;
877 if (int128_lt(remain.size, page_size)) {
878 register_subpage(d, &now);
879 } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
880 now.size = page_size;
881 register_subpage(d, &now);
882 } else {
883 now.size = int128_and(now.size, int128_neg(page_size));
884 register_multipage(d, &now);
885 }
886 }
887 }
888
889 void qemu_flush_coalesced_mmio_buffer(void)
890 {
891 if (kvm_enabled())
892 kvm_flush_coalesced_mmio_buffer();
893 }
894
895 void qemu_mutex_lock_ramlist(void)
896 {
897 qemu_mutex_lock(&ram_list.mutex);
898 }
899
900 void qemu_mutex_unlock_ramlist(void)
901 {
902 qemu_mutex_unlock(&ram_list.mutex);
903 }
904
905 #if defined(__linux__) && !defined(TARGET_S390X)
906
907 #include <sys/vfs.h>
908
909 #define HUGETLBFS_MAGIC 0x958458f6
910
911 static long gethugepagesize(const char *path)
912 {
913 struct statfs fs;
914 int ret;
915
916 do {
917 ret = statfs(path, &fs);
918 } while (ret != 0 && errno == EINTR);
919
920 if (ret != 0) {
921 perror(path);
922 return 0;
923 }
924
925 if (fs.f_type != HUGETLBFS_MAGIC)
926 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
927
928 return fs.f_bsize;
929 }
930
931 static void *file_ram_alloc(RAMBlock *block,
932 ram_addr_t memory,
933 const char *path)
934 {
935 char *filename;
936 char *sanitized_name;
937 char *c;
938 void *area;
939 int fd;
940 #ifdef MAP_POPULATE
941 int flags;
942 #endif
943 unsigned long hpagesize;
944
945 hpagesize = gethugepagesize(path);
946 if (!hpagesize) {
947 return NULL;
948 }
949
950 if (memory < hpagesize) {
951 return NULL;
952 }
953
954 if (kvm_enabled() && !kvm_has_sync_mmu()) {
955 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
956 return NULL;
957 }
958
959 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
960 sanitized_name = g_strdup(block->mr->name);
961 for (c = sanitized_name; *c != '\0'; c++) {
962 if (*c == '/')
963 *c = '_';
964 }
965
966 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
967 sanitized_name);
968 g_free(sanitized_name);
969
970 fd = mkstemp(filename);
971 if (fd < 0) {
972 perror("unable to create backing store for hugepages");
973 g_free(filename);
974 return NULL;
975 }
976 unlink(filename);
977 g_free(filename);
978
979 memory = (memory+hpagesize-1) & ~(hpagesize-1);
980
981 /*
982 * ftruncate is not supported by hugetlbfs in older
983 * hosts, so don't bother bailing out on errors.
984 * If anything goes wrong with it under other filesystems,
985 * mmap will fail.
986 */
987 if (ftruncate(fd, memory))
988 perror("ftruncate");
989
990 #ifdef MAP_POPULATE
991 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
992 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
993 * to sidestep this quirk.
994 */
995 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
996 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
997 #else
998 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
999 #endif
1000 if (area == MAP_FAILED) {
1001 perror("file_ram_alloc: can't mmap RAM pages");
1002 close(fd);
1003 return (NULL);
1004 }
1005 block->fd = fd;
1006 return area;
1007 }
1008 #endif
1009
1010 static ram_addr_t find_ram_offset(ram_addr_t size)
1011 {
1012 RAMBlock *block, *next_block;
1013 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1014
1015 assert(size != 0); /* it would hand out same offset multiple times */
1016
1017 if (QTAILQ_EMPTY(&ram_list.blocks))
1018 return 0;
1019
1020 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1021 ram_addr_t end, next = RAM_ADDR_MAX;
1022
1023 end = block->offset + block->length;
1024
1025 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1026 if (next_block->offset >= end) {
1027 next = MIN(next, next_block->offset);
1028 }
1029 }
1030 if (next - end >= size && next - end < mingap) {
1031 offset = end;
1032 mingap = next - end;
1033 }
1034 }
1035
1036 if (offset == RAM_ADDR_MAX) {
1037 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1038 (uint64_t)size);
1039 abort();
1040 }
1041
1042 return offset;
1043 }
1044
1045 ram_addr_t last_ram_offset(void)
1046 {
1047 RAMBlock *block;
1048 ram_addr_t last = 0;
1049
1050 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1051 last = MAX(last, block->offset + block->length);
1052
1053 return last;
1054 }
1055
1056 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1057 {
1058 int ret;
1059 QemuOpts *machine_opts;
1060
1061 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1062 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1063 if (machine_opts &&
1064 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1065 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1066 if (ret) {
1067 perror("qemu_madvise");
1068 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1069 "but dump_guest_core=off specified\n");
1070 }
1071 }
1072 }
1073
1074 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1075 {
1076 RAMBlock *new_block, *block;
1077
1078 new_block = NULL;
1079 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1080 if (block->offset == addr) {
1081 new_block = block;
1082 break;
1083 }
1084 }
1085 assert(new_block);
1086 assert(!new_block->idstr[0]);
1087
1088 if (dev) {
1089 char *id = qdev_get_dev_path(dev);
1090 if (id) {
1091 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1092 g_free(id);
1093 }
1094 }
1095 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1096
1097 /* This assumes the iothread lock is taken here too. */
1098 qemu_mutex_lock_ramlist();
1099 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1100 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1101 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1102 new_block->idstr);
1103 abort();
1104 }
1105 }
1106 qemu_mutex_unlock_ramlist();
1107 }
1108
1109 static int memory_try_enable_merging(void *addr, size_t len)
1110 {
1111 QemuOpts *opts;
1112
1113 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1114 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1115 /* disabled by the user */
1116 return 0;
1117 }
1118
1119 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1120 }
1121
1122 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1123 MemoryRegion *mr)
1124 {
1125 RAMBlock *block, *new_block;
1126
1127 size = TARGET_PAGE_ALIGN(size);
1128 new_block = g_malloc0(sizeof(*new_block));
1129
1130 /* This assumes the iothread lock is taken here too. */
1131 qemu_mutex_lock_ramlist();
1132 new_block->mr = mr;
1133 new_block->offset = find_ram_offset(size);
1134 if (host) {
1135 new_block->host = host;
1136 new_block->flags |= RAM_PREALLOC_MASK;
1137 } else {
1138 if (mem_path) {
1139 #if defined (__linux__) && !defined(TARGET_S390X)
1140 new_block->host = file_ram_alloc(new_block, size, mem_path);
1141 if (!new_block->host) {
1142 new_block->host = qemu_anon_ram_alloc(size);
1143 memory_try_enable_merging(new_block->host, size);
1144 }
1145 #else
1146 fprintf(stderr, "-mem-path option unsupported\n");
1147 exit(1);
1148 #endif
1149 } else {
1150 if (xen_enabled()) {
1151 xen_ram_alloc(new_block->offset, size, mr);
1152 } else if (kvm_enabled()) {
1153 /* some s390/kvm configurations have special constraints */
1154 new_block->host = kvm_ram_alloc(size);
1155 } else {
1156 new_block->host = qemu_anon_ram_alloc(size);
1157 }
1158 memory_try_enable_merging(new_block->host, size);
1159 }
1160 }
1161 new_block->length = size;
1162
1163 /* Keep the list sorted from biggest to smallest block. */
1164 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1165 if (block->length < new_block->length) {
1166 break;
1167 }
1168 }
1169 if (block) {
1170 QTAILQ_INSERT_BEFORE(block, new_block, next);
1171 } else {
1172 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1173 }
1174 ram_list.mru_block = NULL;
1175
1176 ram_list.version++;
1177 qemu_mutex_unlock_ramlist();
1178
1179 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1180 last_ram_offset() >> TARGET_PAGE_BITS);
1181 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1182 0, size >> TARGET_PAGE_BITS);
1183 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1184
1185 qemu_ram_setup_dump(new_block->host, size);
1186 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1187
1188 if (kvm_enabled())
1189 kvm_setup_guest_memory(new_block->host, size);
1190
1191 return new_block->offset;
1192 }
1193
1194 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1195 {
1196 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1197 }
1198
1199 void qemu_ram_free_from_ptr(ram_addr_t addr)
1200 {
1201 RAMBlock *block;
1202
1203 /* This assumes the iothread lock is taken here too. */
1204 qemu_mutex_lock_ramlist();
1205 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1206 if (addr == block->offset) {
1207 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1208 ram_list.mru_block = NULL;
1209 ram_list.version++;
1210 g_free(block);
1211 break;
1212 }
1213 }
1214 qemu_mutex_unlock_ramlist();
1215 }
1216
1217 void qemu_ram_free(ram_addr_t addr)
1218 {
1219 RAMBlock *block;
1220
1221 /* This assumes the iothread lock is taken here too. */
1222 qemu_mutex_lock_ramlist();
1223 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1224 if (addr == block->offset) {
1225 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1226 ram_list.mru_block = NULL;
1227 ram_list.version++;
1228 if (block->flags & RAM_PREALLOC_MASK) {
1229 ;
1230 } else if (mem_path) {
1231 #if defined (__linux__) && !defined(TARGET_S390X)
1232 if (block->fd) {
1233 munmap(block->host, block->length);
1234 close(block->fd);
1235 } else {
1236 qemu_anon_ram_free(block->host, block->length);
1237 }
1238 #else
1239 abort();
1240 #endif
1241 } else {
1242 if (xen_enabled()) {
1243 xen_invalidate_map_cache_entry(block->host);
1244 } else {
1245 qemu_anon_ram_free(block->host, block->length);
1246 }
1247 }
1248 g_free(block);
1249 break;
1250 }
1251 }
1252 qemu_mutex_unlock_ramlist();
1253
1254 }
1255
1256 #ifndef _WIN32
1257 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1258 {
1259 RAMBlock *block;
1260 ram_addr_t offset;
1261 int flags;
1262 void *area, *vaddr;
1263
1264 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1265 offset = addr - block->offset;
1266 if (offset < block->length) {
1267 vaddr = block->host + offset;
1268 if (block->flags & RAM_PREALLOC_MASK) {
1269 ;
1270 } else {
1271 flags = MAP_FIXED;
1272 munmap(vaddr, length);
1273 if (mem_path) {
1274 #if defined(__linux__) && !defined(TARGET_S390X)
1275 if (block->fd) {
1276 #ifdef MAP_POPULATE
1277 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1278 MAP_PRIVATE;
1279 #else
1280 flags |= MAP_PRIVATE;
1281 #endif
1282 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1283 flags, block->fd, offset);
1284 } else {
1285 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1286 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1287 flags, -1, 0);
1288 }
1289 #else
1290 abort();
1291 #endif
1292 } else {
1293 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1294 flags |= MAP_SHARED | MAP_ANONYMOUS;
1295 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1296 flags, -1, 0);
1297 #else
1298 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1299 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1300 flags, -1, 0);
1301 #endif
1302 }
1303 if (area != vaddr) {
1304 fprintf(stderr, "Could not remap addr: "
1305 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1306 length, addr);
1307 exit(1);
1308 }
1309 memory_try_enable_merging(vaddr, length);
1310 qemu_ram_setup_dump(vaddr, length);
1311 }
1312 return;
1313 }
1314 }
1315 }
1316 #endif /* !_WIN32 */
1317
1318 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1319 With the exception of the softmmu code in this file, this should
1320 only be used for local memory (e.g. video ram) that the device owns,
1321 and knows it isn't going to access beyond the end of the block.
1322
1323 It should not be used for general purpose DMA.
1324 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1325 */
1326 void *qemu_get_ram_ptr(ram_addr_t addr)
1327 {
1328 RAMBlock *block;
1329
1330 /* The list is protected by the iothread lock here. */
1331 block = ram_list.mru_block;
1332 if (block && addr - block->offset < block->length) {
1333 goto found;
1334 }
1335 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1336 if (addr - block->offset < block->length) {
1337 goto found;
1338 }
1339 }
1340
1341 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1342 abort();
1343
1344 found:
1345 ram_list.mru_block = block;
1346 if (xen_enabled()) {
1347 /* We need to check if the requested address is in the RAM
1348 * because we don't want to map the entire memory in QEMU.
1349 * In that case just map until the end of the page.
1350 */
1351 if (block->offset == 0) {
1352 return xen_map_cache(addr, 0, 0);
1353 } else if (block->host == NULL) {
1354 block->host =
1355 xen_map_cache(block->offset, block->length, 1);
1356 }
1357 }
1358 return block->host + (addr - block->offset);
1359 }
1360
1361 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1362 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1363 *
1364 * ??? Is this still necessary?
1365 */
1366 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1367 {
1368 RAMBlock *block;
1369
1370 /* The list is protected by the iothread lock here. */
1371 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1372 if (addr - block->offset < block->length) {
1373 if (xen_enabled()) {
1374 /* We need to check if the requested address is in the RAM
1375 * because we don't want to map the entire memory in QEMU.
1376 * In that case just map until the end of the page.
1377 */
1378 if (block->offset == 0) {
1379 return xen_map_cache(addr, 0, 0);
1380 } else if (block->host == NULL) {
1381 block->host =
1382 xen_map_cache(block->offset, block->length, 1);
1383 }
1384 }
1385 return block->host + (addr - block->offset);
1386 }
1387 }
1388
1389 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1390 abort();
1391
1392 return NULL;
1393 }
1394
1395 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1396 * but takes a size argument */
1397 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1398 {
1399 if (*size == 0) {
1400 return NULL;
1401 }
1402 if (xen_enabled()) {
1403 return xen_map_cache(addr, *size, 1);
1404 } else {
1405 RAMBlock *block;
1406
1407 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1408 if (addr - block->offset < block->length) {
1409 if (addr - block->offset + *size > block->length)
1410 *size = block->length - addr + block->offset;
1411 return block->host + (addr - block->offset);
1412 }
1413 }
1414
1415 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1416 abort();
1417 }
1418 }
1419
1420 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1421 {
1422 RAMBlock *block;
1423 uint8_t *host = ptr;
1424
1425 if (xen_enabled()) {
1426 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1427 return 0;
1428 }
1429
1430 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1431 /* This case append when the block is not mapped. */
1432 if (block->host == NULL) {
1433 continue;
1434 }
1435 if (host - block->host < block->length) {
1436 *ram_addr = block->offset + (host - block->host);
1437 return 0;
1438 }
1439 }
1440
1441 return -1;
1442 }
1443
1444 /* Some of the softmmu routines need to translate from a host pointer
1445 (typically a TLB entry) back to a ram offset. */
1446 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1447 {
1448 ram_addr_t ram_addr;
1449
1450 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1451 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1452 abort();
1453 }
1454 return ram_addr;
1455 }
1456
1457 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1458 uint64_t val, unsigned size)
1459 {
1460 int dirty_flags;
1461 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1462 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1463 tb_invalidate_phys_page_fast(ram_addr, size);
1464 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1465 }
1466 switch (size) {
1467 case 1:
1468 stb_p(qemu_get_ram_ptr(ram_addr), val);
1469 break;
1470 case 2:
1471 stw_p(qemu_get_ram_ptr(ram_addr), val);
1472 break;
1473 case 4:
1474 stl_p(qemu_get_ram_ptr(ram_addr), val);
1475 break;
1476 default:
1477 abort();
1478 }
1479 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1480 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1481 /* we remove the notdirty callback only if the code has been
1482 flushed */
1483 if (dirty_flags == 0xff)
1484 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1485 }
1486
1487 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1488 unsigned size, bool is_write)
1489 {
1490 return is_write;
1491 }
1492
1493 static const MemoryRegionOps notdirty_mem_ops = {
1494 .write = notdirty_mem_write,
1495 .valid.accepts = notdirty_mem_accepts,
1496 .endianness = DEVICE_NATIVE_ENDIAN,
1497 };
1498
1499 /* Generate a debug exception if a watchpoint has been hit. */
1500 static void check_watchpoint(int offset, int len_mask, int flags)
1501 {
1502 CPUArchState *env = cpu_single_env;
1503 target_ulong pc, cs_base;
1504 target_ulong vaddr;
1505 CPUWatchpoint *wp;
1506 int cpu_flags;
1507
1508 if (env->watchpoint_hit) {
1509 /* We re-entered the check after replacing the TB. Now raise
1510 * the debug interrupt so that is will trigger after the
1511 * current instruction. */
1512 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1513 return;
1514 }
1515 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1516 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1517 if ((vaddr == (wp->vaddr & len_mask) ||
1518 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1519 wp->flags |= BP_WATCHPOINT_HIT;
1520 if (!env->watchpoint_hit) {
1521 env->watchpoint_hit = wp;
1522 tb_check_watchpoint(env);
1523 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1524 env->exception_index = EXCP_DEBUG;
1525 cpu_loop_exit(env);
1526 } else {
1527 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1528 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1529 cpu_resume_from_signal(env, NULL);
1530 }
1531 }
1532 } else {
1533 wp->flags &= ~BP_WATCHPOINT_HIT;
1534 }
1535 }
1536 }
1537
1538 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1539 so these check for a hit then pass through to the normal out-of-line
1540 phys routines. */
1541 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1542 unsigned size)
1543 {
1544 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1545 switch (size) {
1546 case 1: return ldub_phys(addr);
1547 case 2: return lduw_phys(addr);
1548 case 4: return ldl_phys(addr);
1549 default: abort();
1550 }
1551 }
1552
1553 static void watch_mem_write(void *opaque, hwaddr addr,
1554 uint64_t val, unsigned size)
1555 {
1556 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1557 switch (size) {
1558 case 1:
1559 stb_phys(addr, val);
1560 break;
1561 case 2:
1562 stw_phys(addr, val);
1563 break;
1564 case 4:
1565 stl_phys(addr, val);
1566 break;
1567 default: abort();
1568 }
1569 }
1570
1571 static const MemoryRegionOps watch_mem_ops = {
1572 .read = watch_mem_read,
1573 .write = watch_mem_write,
1574 .endianness = DEVICE_NATIVE_ENDIAN,
1575 };
1576
1577 static uint64_t subpage_read(void *opaque, hwaddr addr,
1578 unsigned len)
1579 {
1580 subpage_t *subpage = opaque;
1581 uint8_t buf[4];
1582
1583 #if defined(DEBUG_SUBPAGE)
1584 printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1585 subpage, len, addr);
1586 #endif
1587 address_space_read(subpage->as, addr + subpage->base, buf, len);
1588 switch (len) {
1589 case 1:
1590 return ldub_p(buf);
1591 case 2:
1592 return lduw_p(buf);
1593 case 4:
1594 return ldl_p(buf);
1595 default:
1596 abort();
1597 }
1598 }
1599
1600 static void subpage_write(void *opaque, hwaddr addr,
1601 uint64_t value, unsigned len)
1602 {
1603 subpage_t *subpage = opaque;
1604 uint8_t buf[4];
1605
1606 #if defined(DEBUG_SUBPAGE)
1607 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1608 " value %"PRIx64"\n",
1609 __func__, subpage, len, addr, value);
1610 #endif
1611 switch (len) {
1612 case 1:
1613 stb_p(buf, value);
1614 break;
1615 case 2:
1616 stw_p(buf, value);
1617 break;
1618 case 4:
1619 stl_p(buf, value);
1620 break;
1621 default:
1622 abort();
1623 }
1624 address_space_write(subpage->as, addr + subpage->base, buf, len);
1625 }
1626
1627 static bool subpage_accepts(void *opaque, hwaddr addr,
1628 unsigned size, bool is_write)
1629 {
1630 subpage_t *subpage = opaque;
1631 #if defined(DEBUG_SUBPAGE)
1632 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1633 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1634 #endif
1635
1636 return address_space_access_valid(subpage->as, addr + subpage->base,
1637 size, is_write);
1638 }
1639
1640 static const MemoryRegionOps subpage_ops = {
1641 .read = subpage_read,
1642 .write = subpage_write,
1643 .valid.accepts = subpage_accepts,
1644 .endianness = DEVICE_NATIVE_ENDIAN,
1645 };
1646
1647 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1648 uint16_t section)
1649 {
1650 int idx, eidx;
1651
1652 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1653 return -1;
1654 idx = SUBPAGE_IDX(start);
1655 eidx = SUBPAGE_IDX(end);
1656 #if defined(DEBUG_SUBPAGE)
1657 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1658 mmio, start, end, idx, eidx, memory);
1659 #endif
1660 for (; idx <= eidx; idx++) {
1661 mmio->sub_section[idx] = section;
1662 }
1663
1664 return 0;
1665 }
1666
1667 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1668 {
1669 subpage_t *mmio;
1670
1671 mmio = g_malloc0(sizeof(subpage_t));
1672
1673 mmio->as = as;
1674 mmio->base = base;
1675 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1676 "subpage", TARGET_PAGE_SIZE);
1677 mmio->iomem.subpage = true;
1678 #if defined(DEBUG_SUBPAGE)
1679 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1680 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1681 #endif
1682 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1683
1684 return mmio;
1685 }
1686
1687 static uint16_t dummy_section(MemoryRegion *mr)
1688 {
1689 MemoryRegionSection section = {
1690 .mr = mr,
1691 .offset_within_address_space = 0,
1692 .offset_within_region = 0,
1693 .size = int128_2_64(),
1694 };
1695
1696 return phys_section_add(&section);
1697 }
1698
1699 MemoryRegion *iotlb_to_region(hwaddr index)
1700 {
1701 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1702 }
1703
1704 static void io_mem_init(void)
1705 {
1706 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1707 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1708 "unassigned", UINT64_MAX);
1709 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1710 "notdirty", UINT64_MAX);
1711 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1712 "watch", UINT64_MAX);
1713 }
1714
1715 static void mem_begin(MemoryListener *listener)
1716 {
1717 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1718
1719 destroy_all_mappings(d);
1720 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1721 }
1722
1723 static void core_begin(MemoryListener *listener)
1724 {
1725 phys_sections_clear();
1726 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1727 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1728 phys_section_rom = dummy_section(&io_mem_rom);
1729 phys_section_watch = dummy_section(&io_mem_watch);
1730 }
1731
1732 static void tcg_commit(MemoryListener *listener)
1733 {
1734 CPUArchState *env;
1735
1736 /* since each CPU stores ram addresses in its TLB cache, we must
1737 reset the modified entries */
1738 /* XXX: slow ! */
1739 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1740 tlb_flush(env, 1);
1741 }
1742 }
1743
1744 static void core_log_global_start(MemoryListener *listener)
1745 {
1746 cpu_physical_memory_set_dirty_tracking(1);
1747 }
1748
1749 static void core_log_global_stop(MemoryListener *listener)
1750 {
1751 cpu_physical_memory_set_dirty_tracking(0);
1752 }
1753
1754 static MemoryListener core_memory_listener = {
1755 .begin = core_begin,
1756 .log_global_start = core_log_global_start,
1757 .log_global_stop = core_log_global_stop,
1758 .priority = 1,
1759 };
1760
1761 static MemoryListener tcg_memory_listener = {
1762 .commit = tcg_commit,
1763 };
1764
1765 void address_space_init_dispatch(AddressSpace *as)
1766 {
1767 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1768
1769 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1770 d->listener = (MemoryListener) {
1771 .begin = mem_begin,
1772 .region_add = mem_add,
1773 .region_nop = mem_add,
1774 .priority = 0,
1775 };
1776 d->as = as;
1777 as->dispatch = d;
1778 memory_listener_register(&d->listener, as);
1779 }
1780
1781 void address_space_destroy_dispatch(AddressSpace *as)
1782 {
1783 AddressSpaceDispatch *d = as->dispatch;
1784
1785 memory_listener_unregister(&d->listener);
1786 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1787 g_free(d);
1788 as->dispatch = NULL;
1789 }
1790
1791 static void memory_map_init(void)
1792 {
1793 system_memory = g_malloc(sizeof(*system_memory));
1794 memory_region_init(system_memory, NULL, "system", INT64_MAX);
1795 address_space_init(&address_space_memory, system_memory, "memory");
1796
1797 system_io = g_malloc(sizeof(*system_io));
1798 memory_region_init(system_io, NULL, "io", 65536);
1799 address_space_init(&address_space_io, system_io, "I/O");
1800
1801 memory_listener_register(&core_memory_listener, &address_space_memory);
1802 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1803 }
1804
1805 MemoryRegion *get_system_memory(void)
1806 {
1807 return system_memory;
1808 }
1809
1810 MemoryRegion *get_system_io(void)
1811 {
1812 return system_io;
1813 }
1814
1815 #endif /* !defined(CONFIG_USER_ONLY) */
1816
1817 /* physical memory access (slow version, mainly for debug) */
1818 #if defined(CONFIG_USER_ONLY)
1819 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1820 uint8_t *buf, int len, int is_write)
1821 {
1822 int l, flags;
1823 target_ulong page;
1824 void * p;
1825
1826 while (len > 0) {
1827 page = addr & TARGET_PAGE_MASK;
1828 l = (page + TARGET_PAGE_SIZE) - addr;
1829 if (l > len)
1830 l = len;
1831 flags = page_get_flags(page);
1832 if (!(flags & PAGE_VALID))
1833 return -1;
1834 if (is_write) {
1835 if (!(flags & PAGE_WRITE))
1836 return -1;
1837 /* XXX: this code should not depend on lock_user */
1838 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1839 return -1;
1840 memcpy(p, buf, l);
1841 unlock_user(p, addr, l);
1842 } else {
1843 if (!(flags & PAGE_READ))
1844 return -1;
1845 /* XXX: this code should not depend on lock_user */
1846 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1847 return -1;
1848 memcpy(buf, p, l);
1849 unlock_user(p, addr, 0);
1850 }
1851 len -= l;
1852 buf += l;
1853 addr += l;
1854 }
1855 return 0;
1856 }
1857
1858 #else
1859
1860 static void invalidate_and_set_dirty(hwaddr addr,
1861 hwaddr length)
1862 {
1863 if (!cpu_physical_memory_is_dirty(addr)) {
1864 /* invalidate code */
1865 tb_invalidate_phys_page_range(addr, addr + length, 0);
1866 /* set dirty bit */
1867 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1868 }
1869 xen_modified_memory(addr, length);
1870 }
1871
1872 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1873 {
1874 if (memory_region_is_ram(mr)) {
1875 return !(is_write && mr->readonly);
1876 }
1877 if (memory_region_is_romd(mr)) {
1878 return !is_write;
1879 }
1880
1881 return false;
1882 }
1883
1884 static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1885 {
1886 if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1887 return 4;
1888 }
1889 if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1890 return 2;
1891 }
1892 return 1;
1893 }
1894
1895 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1896 int len, bool is_write)
1897 {
1898 hwaddr l;
1899 uint8_t *ptr;
1900 uint64_t val;
1901 hwaddr addr1;
1902 MemoryRegion *mr;
1903 bool error = false;
1904
1905 while (len > 0) {
1906 l = len;
1907 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1908
1909 if (is_write) {
1910 if (!memory_access_is_direct(mr, is_write)) {
1911 l = memory_access_size(mr, l, addr1);
1912 /* XXX: could force cpu_single_env to NULL to avoid
1913 potential bugs */
1914 if (l == 4) {
1915 /* 32 bit write access */
1916 val = ldl_p(buf);
1917 error |= io_mem_write(mr, addr1, val, 4);
1918 } else if (l == 2) {
1919 /* 16 bit write access */
1920 val = lduw_p(buf);
1921 error |= io_mem_write(mr, addr1, val, 2);
1922 } else {
1923 /* 8 bit write access */
1924 val = ldub_p(buf);
1925 error |= io_mem_write(mr, addr1, val, 1);
1926 }
1927 } else {
1928 addr1 += memory_region_get_ram_addr(mr);
1929 /* RAM case */
1930 ptr = qemu_get_ram_ptr(addr1);
1931 memcpy(ptr, buf, l);
1932 invalidate_and_set_dirty(addr1, l);
1933 }
1934 } else {
1935 if (!memory_access_is_direct(mr, is_write)) {
1936 /* I/O case */
1937 l = memory_access_size(mr, l, addr1);
1938 if (l == 4) {
1939 /* 32 bit read access */
1940 error |= io_mem_read(mr, addr1, &val, 4);
1941 stl_p(buf, val);
1942 } else if (l == 2) {
1943 /* 16 bit read access */
1944 error |= io_mem_read(mr, addr1, &val, 2);
1945 stw_p(buf, val);
1946 } else {
1947 /* 8 bit read access */
1948 error |= io_mem_read(mr, addr1, &val, 1);
1949 stb_p(buf, val);
1950 }
1951 } else {
1952 /* RAM case */
1953 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
1954 memcpy(buf, ptr, l);
1955 }
1956 }
1957 len -= l;
1958 buf += l;
1959 addr += l;
1960 }
1961
1962 return error;
1963 }
1964
1965 bool address_space_write(AddressSpace *as, hwaddr addr,
1966 const uint8_t *buf, int len)
1967 {
1968 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1969 }
1970
1971 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1972 {
1973 return address_space_rw(as, addr, buf, len, false);
1974 }
1975
1976
1977 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1978 int len, int is_write)
1979 {
1980 address_space_rw(&address_space_memory, addr, buf, len, is_write);
1981 }
1982
1983 /* used for ROM loading : can write in RAM and ROM */
1984 void cpu_physical_memory_write_rom(hwaddr addr,
1985 const uint8_t *buf, int len)
1986 {
1987 hwaddr l;
1988 uint8_t *ptr;
1989 hwaddr addr1;
1990 MemoryRegion *mr;
1991
1992 while (len > 0) {
1993 l = len;
1994 mr = address_space_translate(&address_space_memory,
1995 addr, &addr1, &l, true);
1996
1997 if (!(memory_region_is_ram(mr) ||
1998 memory_region_is_romd(mr))) {
1999 /* do nothing */
2000 } else {
2001 addr1 += memory_region_get_ram_addr(mr);
2002 /* ROM/RAM case */
2003 ptr = qemu_get_ram_ptr(addr1);
2004 memcpy(ptr, buf, l);
2005 invalidate_and_set_dirty(addr1, l);
2006 }
2007 len -= l;
2008 buf += l;
2009 addr += l;
2010 }
2011 }
2012
2013 typedef struct {
2014 void *buffer;
2015 hwaddr addr;
2016 hwaddr len;
2017 } BounceBuffer;
2018
2019 static BounceBuffer bounce;
2020
2021 typedef struct MapClient {
2022 void *opaque;
2023 void (*callback)(void *opaque);
2024 QLIST_ENTRY(MapClient) link;
2025 } MapClient;
2026
2027 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2028 = QLIST_HEAD_INITIALIZER(map_client_list);
2029
2030 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2031 {
2032 MapClient *client = g_malloc(sizeof(*client));
2033
2034 client->opaque = opaque;
2035 client->callback = callback;
2036 QLIST_INSERT_HEAD(&map_client_list, client, link);
2037 return client;
2038 }
2039
2040 static void cpu_unregister_map_client(void *_client)
2041 {
2042 MapClient *client = (MapClient *)_client;
2043
2044 QLIST_REMOVE(client, link);
2045 g_free(client);
2046 }
2047
2048 static void cpu_notify_map_clients(void)
2049 {
2050 MapClient *client;
2051
2052 while (!QLIST_EMPTY(&map_client_list)) {
2053 client = QLIST_FIRST(&map_client_list);
2054 client->callback(client->opaque);
2055 cpu_unregister_map_client(client);
2056 }
2057 }
2058
2059 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2060 {
2061 MemoryRegion *mr;
2062 hwaddr l, xlat;
2063
2064 while (len > 0) {
2065 l = len;
2066 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2067 if (!memory_access_is_direct(mr, is_write)) {
2068 l = memory_access_size(mr, l, addr);
2069 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2070 return false;
2071 }
2072 }
2073
2074 len -= l;
2075 addr += l;
2076 }
2077 return true;
2078 }
2079
2080 /* Map a physical memory region into a host virtual address.
2081 * May map a subset of the requested range, given by and returned in *plen.
2082 * May return NULL if resources needed to perform the mapping are exhausted.
2083 * Use only for reads OR writes - not for read-modify-write operations.
2084 * Use cpu_register_map_client() to know when retrying the map operation is
2085 * likely to succeed.
2086 */
2087 void *address_space_map(AddressSpace *as,
2088 hwaddr addr,
2089 hwaddr *plen,
2090 bool is_write)
2091 {
2092 hwaddr len = *plen;
2093 hwaddr todo = 0;
2094 hwaddr l, xlat;
2095 MemoryRegion *mr;
2096 ram_addr_t raddr = RAM_ADDR_MAX;
2097 ram_addr_t rlen;
2098 void *ret;
2099
2100 while (len > 0) {
2101 l = len;
2102 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2103
2104 if (!memory_access_is_direct(mr, is_write)) {
2105 if (todo || bounce.buffer) {
2106 break;
2107 }
2108 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2109 bounce.addr = addr;
2110 bounce.len = l;
2111 if (!is_write) {
2112 address_space_read(as, addr, bounce.buffer, l);
2113 }
2114
2115 *plen = l;
2116 return bounce.buffer;
2117 }
2118 if (!todo) {
2119 raddr = memory_region_get_ram_addr(mr) + xlat;
2120 } else {
2121 if (memory_region_get_ram_addr(mr) + xlat != raddr + todo) {
2122 break;
2123 }
2124 }
2125
2126 len -= l;
2127 addr += l;
2128 todo += l;
2129 }
2130 rlen = todo;
2131 ret = qemu_ram_ptr_length(raddr, &rlen);
2132 *plen = rlen;
2133 return ret;
2134 }
2135
2136 /* Unmaps a memory region previously mapped by address_space_map().
2137 * Will also mark the memory as dirty if is_write == 1. access_len gives
2138 * the amount of memory that was actually read or written by the caller.
2139 */
2140 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2141 int is_write, hwaddr access_len)
2142 {
2143 if (buffer != bounce.buffer) {
2144 if (is_write) {
2145 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2146 while (access_len) {
2147 unsigned l;
2148 l = TARGET_PAGE_SIZE;
2149 if (l > access_len)
2150 l = access_len;
2151 invalidate_and_set_dirty(addr1, l);
2152 addr1 += l;
2153 access_len -= l;
2154 }
2155 }
2156 if (xen_enabled()) {
2157 xen_invalidate_map_cache_entry(buffer);
2158 }
2159 return;
2160 }
2161 if (is_write) {
2162 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2163 }
2164 qemu_vfree(bounce.buffer);
2165 bounce.buffer = NULL;
2166 cpu_notify_map_clients();
2167 }
2168
2169 void *cpu_physical_memory_map(hwaddr addr,
2170 hwaddr *plen,
2171 int is_write)
2172 {
2173 return address_space_map(&address_space_memory, addr, plen, is_write);
2174 }
2175
2176 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2177 int is_write, hwaddr access_len)
2178 {
2179 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2180 }
2181
2182 /* warning: addr must be aligned */
2183 static inline uint32_t ldl_phys_internal(hwaddr addr,
2184 enum device_endian endian)
2185 {
2186 uint8_t *ptr;
2187 uint64_t val;
2188 MemoryRegion *mr;
2189 hwaddr l = 4;
2190 hwaddr addr1;
2191
2192 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2193 false);
2194 if (l < 4 || !memory_access_is_direct(mr, false)) {
2195 /* I/O case */
2196 io_mem_read(mr, addr1, &val, 4);
2197 #if defined(TARGET_WORDS_BIGENDIAN)
2198 if (endian == DEVICE_LITTLE_ENDIAN) {
2199 val = bswap32(val);
2200 }
2201 #else
2202 if (endian == DEVICE_BIG_ENDIAN) {
2203 val = bswap32(val);
2204 }
2205 #endif
2206 } else {
2207 /* RAM case */
2208 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2209 & TARGET_PAGE_MASK)
2210 + addr1);
2211 switch (endian) {
2212 case DEVICE_LITTLE_ENDIAN:
2213 val = ldl_le_p(ptr);
2214 break;
2215 case DEVICE_BIG_ENDIAN:
2216 val = ldl_be_p(ptr);
2217 break;
2218 default:
2219 val = ldl_p(ptr);
2220 break;
2221 }
2222 }
2223 return val;
2224 }
2225
2226 uint32_t ldl_phys(hwaddr addr)
2227 {
2228 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2229 }
2230
2231 uint32_t ldl_le_phys(hwaddr addr)
2232 {
2233 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2234 }
2235
2236 uint32_t ldl_be_phys(hwaddr addr)
2237 {
2238 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2239 }
2240
2241 /* warning: addr must be aligned */
2242 static inline uint64_t ldq_phys_internal(hwaddr addr,
2243 enum device_endian endian)
2244 {
2245 uint8_t *ptr;
2246 uint64_t val;
2247 MemoryRegion *mr;
2248 hwaddr l = 8;
2249 hwaddr addr1;
2250
2251 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2252 false);
2253 if (l < 8 || !memory_access_is_direct(mr, false)) {
2254 /* I/O case */
2255 io_mem_read(mr, addr1, &val, 8);
2256 #if defined(TARGET_WORDS_BIGENDIAN)
2257 if (endian == DEVICE_LITTLE_ENDIAN) {
2258 val = bswap64(val);
2259 }
2260 #else
2261 if (endian == DEVICE_BIG_ENDIAN) {
2262 val = bswap64(val);
2263 }
2264 #endif
2265 } else {
2266 /* RAM case */
2267 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2268 & TARGET_PAGE_MASK)
2269 + addr1);
2270 switch (endian) {
2271 case DEVICE_LITTLE_ENDIAN:
2272 val = ldq_le_p(ptr);
2273 break;
2274 case DEVICE_BIG_ENDIAN:
2275 val = ldq_be_p(ptr);
2276 break;
2277 default:
2278 val = ldq_p(ptr);
2279 break;
2280 }
2281 }
2282 return val;
2283 }
2284
2285 uint64_t ldq_phys(hwaddr addr)
2286 {
2287 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2288 }
2289
2290 uint64_t ldq_le_phys(hwaddr addr)
2291 {
2292 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2293 }
2294
2295 uint64_t ldq_be_phys(hwaddr addr)
2296 {
2297 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2298 }
2299
2300 /* XXX: optimize */
2301 uint32_t ldub_phys(hwaddr addr)
2302 {
2303 uint8_t val;
2304 cpu_physical_memory_read(addr, &val, 1);
2305 return val;
2306 }
2307
2308 /* warning: addr must be aligned */
2309 static inline uint32_t lduw_phys_internal(hwaddr addr,
2310 enum device_endian endian)
2311 {
2312 uint8_t *ptr;
2313 uint64_t val;
2314 MemoryRegion *mr;
2315 hwaddr l = 2;
2316 hwaddr addr1;
2317
2318 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2319 false);
2320 if (l < 2 || !memory_access_is_direct(mr, false)) {
2321 /* I/O case */
2322 io_mem_read(mr, addr1, &val, 2);
2323 #if defined(TARGET_WORDS_BIGENDIAN)
2324 if (endian == DEVICE_LITTLE_ENDIAN) {
2325 val = bswap16(val);
2326 }
2327 #else
2328 if (endian == DEVICE_BIG_ENDIAN) {
2329 val = bswap16(val);
2330 }
2331 #endif
2332 } else {
2333 /* RAM case */
2334 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2335 & TARGET_PAGE_MASK)
2336 + addr1);
2337 switch (endian) {
2338 case DEVICE_LITTLE_ENDIAN:
2339 val = lduw_le_p(ptr);
2340 break;
2341 case DEVICE_BIG_ENDIAN:
2342 val = lduw_be_p(ptr);
2343 break;
2344 default:
2345 val = lduw_p(ptr);
2346 break;
2347 }
2348 }
2349 return val;
2350 }
2351
2352 uint32_t lduw_phys(hwaddr addr)
2353 {
2354 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2355 }
2356
2357 uint32_t lduw_le_phys(hwaddr addr)
2358 {
2359 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2360 }
2361
2362 uint32_t lduw_be_phys(hwaddr addr)
2363 {
2364 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2365 }
2366
2367 /* warning: addr must be aligned. The ram page is not masked as dirty
2368 and the code inside is not invalidated. It is useful if the dirty
2369 bits are used to track modified PTEs */
2370 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2371 {
2372 uint8_t *ptr;
2373 MemoryRegion *mr;
2374 hwaddr l = 4;
2375 hwaddr addr1;
2376
2377 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2378 true);
2379 if (l < 4 || !memory_access_is_direct(mr, true)) {
2380 io_mem_write(mr, addr1, val, 4);
2381 } else {
2382 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2383 ptr = qemu_get_ram_ptr(addr1);
2384 stl_p(ptr, val);
2385
2386 if (unlikely(in_migration)) {
2387 if (!cpu_physical_memory_is_dirty(addr1)) {
2388 /* invalidate code */
2389 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2390 /* set dirty bit */
2391 cpu_physical_memory_set_dirty_flags(
2392 addr1, (0xff & ~CODE_DIRTY_FLAG));
2393 }
2394 }
2395 }
2396 }
2397
2398 /* warning: addr must be aligned */
2399 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2400 enum device_endian endian)
2401 {
2402 uint8_t *ptr;
2403 MemoryRegion *mr;
2404 hwaddr l = 4;
2405 hwaddr addr1;
2406
2407 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2408 true);
2409 if (l < 4 || !memory_access_is_direct(mr, true)) {
2410 #if defined(TARGET_WORDS_BIGENDIAN)
2411 if (endian == DEVICE_LITTLE_ENDIAN) {
2412 val = bswap32(val);
2413 }
2414 #else
2415 if (endian == DEVICE_BIG_ENDIAN) {
2416 val = bswap32(val);
2417 }
2418 #endif
2419 io_mem_write(mr, addr1, val, 4);
2420 } else {
2421 /* RAM case */
2422 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2423 ptr = qemu_get_ram_ptr(addr1);
2424 switch (endian) {
2425 case DEVICE_LITTLE_ENDIAN:
2426 stl_le_p(ptr, val);
2427 break;
2428 case DEVICE_BIG_ENDIAN:
2429 stl_be_p(ptr, val);
2430 break;
2431 default:
2432 stl_p(ptr, val);
2433 break;
2434 }
2435 invalidate_and_set_dirty(addr1, 4);
2436 }
2437 }
2438
2439 void stl_phys(hwaddr addr, uint32_t val)
2440 {
2441 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2442 }
2443
2444 void stl_le_phys(hwaddr addr, uint32_t val)
2445 {
2446 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2447 }
2448
2449 void stl_be_phys(hwaddr addr, uint32_t val)
2450 {
2451 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2452 }
2453
2454 /* XXX: optimize */
2455 void stb_phys(hwaddr addr, uint32_t val)
2456 {
2457 uint8_t v = val;
2458 cpu_physical_memory_write(addr, &v, 1);
2459 }
2460
2461 /* warning: addr must be aligned */
2462 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2463 enum device_endian endian)
2464 {
2465 uint8_t *ptr;
2466 MemoryRegion *mr;
2467 hwaddr l = 2;
2468 hwaddr addr1;
2469
2470 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2471 true);
2472 if (l < 2 || !memory_access_is_direct(mr, true)) {
2473 #if defined(TARGET_WORDS_BIGENDIAN)
2474 if (endian == DEVICE_LITTLE_ENDIAN) {
2475 val = bswap16(val);
2476 }
2477 #else
2478 if (endian == DEVICE_BIG_ENDIAN) {
2479 val = bswap16(val);
2480 }
2481 #endif
2482 io_mem_write(mr, addr1, val, 2);
2483 } else {
2484 /* RAM case */
2485 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2486 ptr = qemu_get_ram_ptr(addr1);
2487 switch (endian) {
2488 case DEVICE_LITTLE_ENDIAN:
2489 stw_le_p(ptr, val);
2490 break;
2491 case DEVICE_BIG_ENDIAN:
2492 stw_be_p(ptr, val);
2493 break;
2494 default:
2495 stw_p(ptr, val);
2496 break;
2497 }
2498 invalidate_and_set_dirty(addr1, 2);
2499 }
2500 }
2501
2502 void stw_phys(hwaddr addr, uint32_t val)
2503 {
2504 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2505 }
2506
2507 void stw_le_phys(hwaddr addr, uint32_t val)
2508 {
2509 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2510 }
2511
2512 void stw_be_phys(hwaddr addr, uint32_t val)
2513 {
2514 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2515 }
2516
2517 /* XXX: optimize */
2518 void stq_phys(hwaddr addr, uint64_t val)
2519 {
2520 val = tswap64(val);
2521 cpu_physical_memory_write(addr, &val, 8);
2522 }
2523
2524 void stq_le_phys(hwaddr addr, uint64_t val)
2525 {
2526 val = cpu_to_le64(val);
2527 cpu_physical_memory_write(addr, &val, 8);
2528 }
2529
2530 void stq_be_phys(hwaddr addr, uint64_t val)
2531 {
2532 val = cpu_to_be64(val);
2533 cpu_physical_memory_write(addr, &val, 8);
2534 }
2535
2536 /* virtual memory access for debug (includes writing to ROM) */
2537 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2538 uint8_t *buf, int len, int is_write)
2539 {
2540 int l;
2541 hwaddr phys_addr;
2542 target_ulong page;
2543
2544 while (len > 0) {
2545 page = addr & TARGET_PAGE_MASK;
2546 phys_addr = cpu_get_phys_page_debug(env, page);
2547 /* if no physical page mapped, return an error */
2548 if (phys_addr == -1)
2549 return -1;
2550 l = (page + TARGET_PAGE_SIZE) - addr;
2551 if (l > len)
2552 l = len;
2553 phys_addr += (addr & ~TARGET_PAGE_MASK);
2554 if (is_write)
2555 cpu_physical_memory_write_rom(phys_addr, buf, l);
2556 else
2557 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2558 len -= l;
2559 buf += l;
2560 addr += l;
2561 }
2562 return 0;
2563 }
2564 #endif
2565
2566 #if !defined(CONFIG_USER_ONLY)
2567
2568 /*
2569 * A helper function for the _utterly broken_ virtio device model to find out if
2570 * it's running on a big endian machine. Don't do this at home kids!
2571 */
2572 bool virtio_is_big_endian(void);
2573 bool virtio_is_big_endian(void)
2574 {
2575 #if defined(TARGET_WORDS_BIGENDIAN)
2576 return true;
2577 #else
2578 return false;
2579 #endif
2580 }
2581
2582 #endif
2583
2584 #ifndef CONFIG_USER_ONLY
2585 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2586 {
2587 MemoryRegion*mr;
2588 hwaddr l = 1;
2589
2590 mr = address_space_translate(&address_space_memory,
2591 phys_addr, &phys_addr, &l, false);
2592
2593 return !(memory_region_is_ram(mr) ||
2594 memory_region_is_romd(mr));
2595 }
2596
2597 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2598 {
2599 RAMBlock *block;
2600
2601 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2602 func(block->host, block->offset, block->length, opaque);
2603 }
2604 }
2605 #endif