]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
Merge remote-tracking branch 'mjt/trivial-patches' into staging
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_SUBPAGE
54
55 #if !defined(CONFIG_USER_ONLY)
56 int phys_ram_fd;
57 static int in_migration;
58
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
63
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66
67 MemoryRegion io_mem_rom, io_mem_notdirty;
68 static MemoryRegion io_mem_unassigned;
69
70 #endif
71
72 CPUArchState *first_cpu;
73 /* current CPU in the current thread. It is only valid inside
74 cpu_exec() */
75 DEFINE_TLS(CPUArchState *,cpu_single_env);
76 /* 0 = Do not count executed instructions.
77 1 = Precise instruction counting.
78 2 = Adaptive rate instruction counting. */
79 int use_icount;
80
81 #if !defined(CONFIG_USER_ONLY)
82
83 typedef struct PhysPageEntry PhysPageEntry;
84
85 struct PhysPageEntry {
86 uint16_t is_leaf : 1;
87 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
88 uint16_t ptr : 15;
89 };
90
91 struct AddressSpaceDispatch {
92 /* This is a multi-level map on the physical address space.
93 * The bottom level has pointers to MemoryRegionSections.
94 */
95 PhysPageEntry phys_map;
96 MemoryListener listener;
97 AddressSpace *as;
98 };
99
100 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
101 typedef struct subpage_t {
102 MemoryRegion iomem;
103 AddressSpace *as;
104 hwaddr base;
105 uint16_t sub_section[TARGET_PAGE_SIZE];
106 } subpage_t;
107
108 static MemoryRegionSection *phys_sections;
109 static unsigned phys_sections_nb, phys_sections_nb_alloc;
110 static uint16_t phys_section_unassigned;
111 static uint16_t phys_section_notdirty;
112 static uint16_t phys_section_rom;
113 static uint16_t phys_section_watch;
114
115 /* Simple allocator for PhysPageEntry nodes */
116 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
117 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
118
119 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
120
121 static void io_mem_init(void);
122 static void memory_map_init(void);
123 static void *qemu_safe_ram_ptr(ram_addr_t addr);
124
125 static MemoryRegion io_mem_watch;
126 #endif
127
128 #if !defined(CONFIG_USER_ONLY)
129
130 static void phys_map_node_reserve(unsigned nodes)
131 {
132 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
133 typedef PhysPageEntry Node[L2_SIZE];
134 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
135 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
136 phys_map_nodes_nb + nodes);
137 phys_map_nodes = g_renew(Node, phys_map_nodes,
138 phys_map_nodes_nb_alloc);
139 }
140 }
141
142 static uint16_t phys_map_node_alloc(void)
143 {
144 unsigned i;
145 uint16_t ret;
146
147 ret = phys_map_nodes_nb++;
148 assert(ret != PHYS_MAP_NODE_NIL);
149 assert(ret != phys_map_nodes_nb_alloc);
150 for (i = 0; i < L2_SIZE; ++i) {
151 phys_map_nodes[ret][i].is_leaf = 0;
152 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
153 }
154 return ret;
155 }
156
157 static void phys_map_nodes_reset(void)
158 {
159 phys_map_nodes_nb = 0;
160 }
161
162
163 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
164 hwaddr *nb, uint16_t leaf,
165 int level)
166 {
167 PhysPageEntry *p;
168 int i;
169 hwaddr step = (hwaddr)1 << (level * L2_BITS);
170
171 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
172 lp->ptr = phys_map_node_alloc();
173 p = phys_map_nodes[lp->ptr];
174 if (level == 0) {
175 for (i = 0; i < L2_SIZE; i++) {
176 p[i].is_leaf = 1;
177 p[i].ptr = phys_section_unassigned;
178 }
179 }
180 } else {
181 p = phys_map_nodes[lp->ptr];
182 }
183 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
184
185 while (*nb && lp < &p[L2_SIZE]) {
186 if ((*index & (step - 1)) == 0 && *nb >= step) {
187 lp->is_leaf = true;
188 lp->ptr = leaf;
189 *index += step;
190 *nb -= step;
191 } else {
192 phys_page_set_level(lp, index, nb, leaf, level - 1);
193 }
194 ++lp;
195 }
196 }
197
198 static void phys_page_set(AddressSpaceDispatch *d,
199 hwaddr index, hwaddr nb,
200 uint16_t leaf)
201 {
202 /* Wildly overreserve - it doesn't matter much. */
203 phys_map_node_reserve(3 * P_L2_LEVELS);
204
205 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
206 }
207
208 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
209 {
210 PhysPageEntry lp = d->phys_map;
211 PhysPageEntry *p;
212 int i;
213
214 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
215 if (lp.ptr == PHYS_MAP_NODE_NIL) {
216 return &phys_sections[phys_section_unassigned];
217 }
218 p = phys_map_nodes[lp.ptr];
219 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
220 }
221 return &phys_sections[lp.ptr];
222 }
223
224 bool memory_region_is_unassigned(MemoryRegion *mr)
225 {
226 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
227 && mr != &io_mem_watch;
228 }
229
230 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
231 hwaddr addr,
232 bool resolve_subpage)
233 {
234 MemoryRegionSection *section;
235 subpage_t *subpage;
236
237 section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
238 if (resolve_subpage && section->mr->subpage) {
239 subpage = container_of(section->mr, subpage_t, iomem);
240 section = &phys_sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
241 }
242 return section;
243 }
244
245 static MemoryRegionSection *
246 address_space_translate_internal(AddressSpace *as, hwaddr addr, hwaddr *xlat,
247 hwaddr *plen, bool resolve_subpage)
248 {
249 MemoryRegionSection *section;
250 Int128 diff;
251
252 section = address_space_lookup_region(as, addr, resolve_subpage);
253 /* Compute offset within MemoryRegionSection */
254 addr -= section->offset_within_address_space;
255
256 /* Compute offset within MemoryRegion */
257 *xlat = addr + section->offset_within_region;
258
259 diff = int128_sub(section->mr->size, int128_make64(addr));
260 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
261 return section;
262 }
263
264 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
265 hwaddr *xlat, hwaddr *plen,
266 bool is_write)
267 {
268 IOMMUTLBEntry iotlb;
269 MemoryRegionSection *section;
270 MemoryRegion *mr;
271 hwaddr len = *plen;
272
273 for (;;) {
274 section = address_space_translate_internal(as, addr, &addr, plen, true);
275 mr = section->mr;
276
277 if (!mr->iommu_ops) {
278 break;
279 }
280
281 iotlb = mr->iommu_ops->translate(mr, addr);
282 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
283 | (addr & iotlb.addr_mask));
284 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
285 if (!(iotlb.perm & (1 << is_write))) {
286 mr = &io_mem_unassigned;
287 break;
288 }
289
290 as = iotlb.target_as;
291 }
292
293 *plen = len;
294 *xlat = addr;
295 return mr;
296 }
297
298 MemoryRegionSection *
299 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
300 hwaddr *plen)
301 {
302 MemoryRegionSection *section;
303 section = address_space_translate_internal(as, addr, xlat, plen, false);
304
305 assert(!section->mr->iommu_ops);
306 return section;
307 }
308 #endif
309
310 void cpu_exec_init_all(void)
311 {
312 #if !defined(CONFIG_USER_ONLY)
313 qemu_mutex_init(&ram_list.mutex);
314 memory_map_init();
315 io_mem_init();
316 #endif
317 }
318
319 #if !defined(CONFIG_USER_ONLY)
320
321 static int cpu_common_post_load(void *opaque, int version_id)
322 {
323 CPUState *cpu = opaque;
324
325 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
326 version_id is increased. */
327 cpu->interrupt_request &= ~0x01;
328 tlb_flush(cpu->env_ptr, 1);
329
330 return 0;
331 }
332
333 const VMStateDescription vmstate_cpu_common = {
334 .name = "cpu_common",
335 .version_id = 1,
336 .minimum_version_id = 1,
337 .minimum_version_id_old = 1,
338 .post_load = cpu_common_post_load,
339 .fields = (VMStateField []) {
340 VMSTATE_UINT32(halted, CPUState),
341 VMSTATE_UINT32(interrupt_request, CPUState),
342 VMSTATE_END_OF_LIST()
343 }
344 };
345
346 #endif
347
348 CPUState *qemu_get_cpu(int index)
349 {
350 CPUArchState *env = first_cpu;
351 CPUState *cpu = NULL;
352
353 while (env) {
354 cpu = ENV_GET_CPU(env);
355 if (cpu->cpu_index == index) {
356 break;
357 }
358 env = env->next_cpu;
359 }
360
361 return env ? cpu : NULL;
362 }
363
364 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
365 {
366 CPUArchState *env = first_cpu;
367
368 while (env) {
369 func(ENV_GET_CPU(env), data);
370 env = env->next_cpu;
371 }
372 }
373
374 void cpu_exec_init(CPUArchState *env)
375 {
376 CPUState *cpu = ENV_GET_CPU(env);
377 CPUClass *cc = CPU_GET_CLASS(cpu);
378 CPUArchState **penv;
379 int cpu_index;
380
381 #if defined(CONFIG_USER_ONLY)
382 cpu_list_lock();
383 #endif
384 env->next_cpu = NULL;
385 penv = &first_cpu;
386 cpu_index = 0;
387 while (*penv != NULL) {
388 penv = &(*penv)->next_cpu;
389 cpu_index++;
390 }
391 cpu->cpu_index = cpu_index;
392 cpu->numa_node = 0;
393 QTAILQ_INIT(&env->breakpoints);
394 QTAILQ_INIT(&env->watchpoints);
395 #ifndef CONFIG_USER_ONLY
396 cpu->thread_id = qemu_get_thread_id();
397 #endif
398 *penv = env;
399 #if defined(CONFIG_USER_ONLY)
400 cpu_list_unlock();
401 #endif
402 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
403 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
404 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
405 cpu_save, cpu_load, env);
406 assert(cc->vmsd == NULL);
407 #endif
408 if (cc->vmsd != NULL) {
409 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
410 }
411 }
412
413 #if defined(TARGET_HAS_ICE)
414 #if defined(CONFIG_USER_ONLY)
415 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
416 {
417 tb_invalidate_phys_page_range(pc, pc + 1, 0);
418 }
419 #else
420 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
421 {
422 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
423 (pc & ~TARGET_PAGE_MASK));
424 }
425 #endif
426 #endif /* TARGET_HAS_ICE */
427
428 #if defined(CONFIG_USER_ONLY)
429 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
430
431 {
432 }
433
434 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
435 int flags, CPUWatchpoint **watchpoint)
436 {
437 return -ENOSYS;
438 }
439 #else
440 /* Add a watchpoint. */
441 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
442 int flags, CPUWatchpoint **watchpoint)
443 {
444 target_ulong len_mask = ~(len - 1);
445 CPUWatchpoint *wp;
446
447 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
448 if ((len & (len - 1)) || (addr & ~len_mask) ||
449 len == 0 || len > TARGET_PAGE_SIZE) {
450 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
451 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
452 return -EINVAL;
453 }
454 wp = g_malloc(sizeof(*wp));
455
456 wp->vaddr = addr;
457 wp->len_mask = len_mask;
458 wp->flags = flags;
459
460 /* keep all GDB-injected watchpoints in front */
461 if (flags & BP_GDB)
462 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
463 else
464 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
465
466 tlb_flush_page(env, addr);
467
468 if (watchpoint)
469 *watchpoint = wp;
470 return 0;
471 }
472
473 /* Remove a specific watchpoint. */
474 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
475 int flags)
476 {
477 target_ulong len_mask = ~(len - 1);
478 CPUWatchpoint *wp;
479
480 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
481 if (addr == wp->vaddr && len_mask == wp->len_mask
482 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
483 cpu_watchpoint_remove_by_ref(env, wp);
484 return 0;
485 }
486 }
487 return -ENOENT;
488 }
489
490 /* Remove a specific watchpoint by reference. */
491 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
492 {
493 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
494
495 tlb_flush_page(env, watchpoint->vaddr);
496
497 g_free(watchpoint);
498 }
499
500 /* Remove all matching watchpoints. */
501 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
502 {
503 CPUWatchpoint *wp, *next;
504
505 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
506 if (wp->flags & mask)
507 cpu_watchpoint_remove_by_ref(env, wp);
508 }
509 }
510 #endif
511
512 /* Add a breakpoint. */
513 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
514 CPUBreakpoint **breakpoint)
515 {
516 #if defined(TARGET_HAS_ICE)
517 CPUBreakpoint *bp;
518
519 bp = g_malloc(sizeof(*bp));
520
521 bp->pc = pc;
522 bp->flags = flags;
523
524 /* keep all GDB-injected breakpoints in front */
525 if (flags & BP_GDB)
526 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
527 else
528 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
529
530 breakpoint_invalidate(env, pc);
531
532 if (breakpoint)
533 *breakpoint = bp;
534 return 0;
535 #else
536 return -ENOSYS;
537 #endif
538 }
539
540 /* Remove a specific breakpoint. */
541 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
542 {
543 #if defined(TARGET_HAS_ICE)
544 CPUBreakpoint *bp;
545
546 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
547 if (bp->pc == pc && bp->flags == flags) {
548 cpu_breakpoint_remove_by_ref(env, bp);
549 return 0;
550 }
551 }
552 return -ENOENT;
553 #else
554 return -ENOSYS;
555 #endif
556 }
557
558 /* Remove a specific breakpoint by reference. */
559 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
560 {
561 #if defined(TARGET_HAS_ICE)
562 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
563
564 breakpoint_invalidate(env, breakpoint->pc);
565
566 g_free(breakpoint);
567 #endif
568 }
569
570 /* Remove all matching breakpoints. */
571 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
572 {
573 #if defined(TARGET_HAS_ICE)
574 CPUBreakpoint *bp, *next;
575
576 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
577 if (bp->flags & mask)
578 cpu_breakpoint_remove_by_ref(env, bp);
579 }
580 #endif
581 }
582
583 /* enable or disable single step mode. EXCP_DEBUG is returned by the
584 CPU loop after each instruction */
585 void cpu_single_step(CPUArchState *env, int enabled)
586 {
587 #if defined(TARGET_HAS_ICE)
588 if (env->singlestep_enabled != enabled) {
589 env->singlestep_enabled = enabled;
590 if (kvm_enabled())
591 kvm_update_guest_debug(env, 0);
592 else {
593 /* must flush all the translated code to avoid inconsistencies */
594 /* XXX: only flush what is necessary */
595 tb_flush(env);
596 }
597 }
598 #endif
599 }
600
601 void cpu_abort(CPUArchState *env, const char *fmt, ...)
602 {
603 CPUState *cpu = ENV_GET_CPU(env);
604 va_list ap;
605 va_list ap2;
606
607 va_start(ap, fmt);
608 va_copy(ap2, ap);
609 fprintf(stderr, "qemu: fatal: ");
610 vfprintf(stderr, fmt, ap);
611 fprintf(stderr, "\n");
612 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
613 if (qemu_log_enabled()) {
614 qemu_log("qemu: fatal: ");
615 qemu_log_vprintf(fmt, ap2);
616 qemu_log("\n");
617 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
618 qemu_log_flush();
619 qemu_log_close();
620 }
621 va_end(ap2);
622 va_end(ap);
623 #if defined(CONFIG_USER_ONLY)
624 {
625 struct sigaction act;
626 sigfillset(&act.sa_mask);
627 act.sa_handler = SIG_DFL;
628 sigaction(SIGABRT, &act, NULL);
629 }
630 #endif
631 abort();
632 }
633
634 CPUArchState *cpu_copy(CPUArchState *env)
635 {
636 CPUArchState *new_env = cpu_init(env->cpu_model_str);
637 CPUArchState *next_cpu = new_env->next_cpu;
638 #if defined(TARGET_HAS_ICE)
639 CPUBreakpoint *bp;
640 CPUWatchpoint *wp;
641 #endif
642
643 memcpy(new_env, env, sizeof(CPUArchState));
644
645 /* Preserve chaining. */
646 new_env->next_cpu = next_cpu;
647
648 /* Clone all break/watchpoints.
649 Note: Once we support ptrace with hw-debug register access, make sure
650 BP_CPU break/watchpoints are handled correctly on clone. */
651 QTAILQ_INIT(&env->breakpoints);
652 QTAILQ_INIT(&env->watchpoints);
653 #if defined(TARGET_HAS_ICE)
654 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
655 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
656 }
657 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
658 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
659 wp->flags, NULL);
660 }
661 #endif
662
663 return new_env;
664 }
665
666 #if !defined(CONFIG_USER_ONLY)
667 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
668 uintptr_t length)
669 {
670 uintptr_t start1;
671
672 /* we modify the TLB cache so that the dirty bit will be set again
673 when accessing the range */
674 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
675 /* Check that we don't span multiple blocks - this breaks the
676 address comparisons below. */
677 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
678 != (end - 1) - start) {
679 abort();
680 }
681 cpu_tlb_reset_dirty_all(start1, length);
682
683 }
684
685 /* Note: start and end must be within the same ram block. */
686 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
687 int dirty_flags)
688 {
689 uintptr_t length;
690
691 start &= TARGET_PAGE_MASK;
692 end = TARGET_PAGE_ALIGN(end);
693
694 length = end - start;
695 if (length == 0)
696 return;
697 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
698
699 if (tcg_enabled()) {
700 tlb_reset_dirty_range_all(start, end, length);
701 }
702 }
703
704 static int cpu_physical_memory_set_dirty_tracking(int enable)
705 {
706 int ret = 0;
707 in_migration = enable;
708 return ret;
709 }
710
711 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
712 MemoryRegionSection *section,
713 target_ulong vaddr,
714 hwaddr paddr, hwaddr xlat,
715 int prot,
716 target_ulong *address)
717 {
718 hwaddr iotlb;
719 CPUWatchpoint *wp;
720
721 if (memory_region_is_ram(section->mr)) {
722 /* Normal RAM. */
723 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
724 + xlat;
725 if (!section->readonly) {
726 iotlb |= phys_section_notdirty;
727 } else {
728 iotlb |= phys_section_rom;
729 }
730 } else {
731 iotlb = section - phys_sections;
732 iotlb += xlat;
733 }
734
735 /* Make accesses to pages with watchpoints go via the
736 watchpoint trap routines. */
737 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
738 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
739 /* Avoid trapping reads of pages with a write breakpoint. */
740 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
741 iotlb = phys_section_watch + paddr;
742 *address |= TLB_MMIO;
743 break;
744 }
745 }
746 }
747
748 return iotlb;
749 }
750 #endif /* defined(CONFIG_USER_ONLY) */
751
752 #if !defined(CONFIG_USER_ONLY)
753
754 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
755 uint16_t section);
756 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
757 static void destroy_page_desc(uint16_t section_index)
758 {
759 MemoryRegionSection *section = &phys_sections[section_index];
760 MemoryRegion *mr = section->mr;
761
762 if (mr->subpage) {
763 subpage_t *subpage = container_of(mr, subpage_t, iomem);
764 memory_region_destroy(&subpage->iomem);
765 g_free(subpage);
766 }
767 }
768
769 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
770 {
771 unsigned i;
772 PhysPageEntry *p;
773
774 if (lp->ptr == PHYS_MAP_NODE_NIL) {
775 return;
776 }
777
778 p = phys_map_nodes[lp->ptr];
779 for (i = 0; i < L2_SIZE; ++i) {
780 if (!p[i].is_leaf) {
781 destroy_l2_mapping(&p[i], level - 1);
782 } else {
783 destroy_page_desc(p[i].ptr);
784 }
785 }
786 lp->is_leaf = 0;
787 lp->ptr = PHYS_MAP_NODE_NIL;
788 }
789
790 static void destroy_all_mappings(AddressSpaceDispatch *d)
791 {
792 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
793 phys_map_nodes_reset();
794 }
795
796 static uint16_t phys_section_add(MemoryRegionSection *section)
797 {
798 /* The physical section number is ORed with a page-aligned
799 * pointer to produce the iotlb entries. Thus it should
800 * never overflow into the page-aligned value.
801 */
802 assert(phys_sections_nb < TARGET_PAGE_SIZE);
803
804 if (phys_sections_nb == phys_sections_nb_alloc) {
805 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
806 phys_sections = g_renew(MemoryRegionSection, phys_sections,
807 phys_sections_nb_alloc);
808 }
809 phys_sections[phys_sections_nb] = *section;
810 return phys_sections_nb++;
811 }
812
813 static void phys_sections_clear(void)
814 {
815 phys_sections_nb = 0;
816 }
817
818 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
819 {
820 subpage_t *subpage;
821 hwaddr base = section->offset_within_address_space
822 & TARGET_PAGE_MASK;
823 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
824 MemoryRegionSection subsection = {
825 .offset_within_address_space = base,
826 .size = int128_make64(TARGET_PAGE_SIZE),
827 };
828 hwaddr start, end;
829
830 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
831
832 if (!(existing->mr->subpage)) {
833 subpage = subpage_init(d->as, base);
834 subsection.mr = &subpage->iomem;
835 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
836 phys_section_add(&subsection));
837 } else {
838 subpage = container_of(existing->mr, subpage_t, iomem);
839 }
840 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
841 end = start + int128_get64(section->size) - 1;
842 subpage_register(subpage, start, end, phys_section_add(section));
843 }
844
845
846 static void register_multipage(AddressSpaceDispatch *d,
847 MemoryRegionSection *section)
848 {
849 hwaddr start_addr = section->offset_within_address_space;
850 uint16_t section_index = phys_section_add(section);
851 uint64_t num_pages = int128_get64(int128_rshift(section->size,
852 TARGET_PAGE_BITS));
853
854 assert(num_pages);
855 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
856 }
857
858 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
859 {
860 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
861 MemoryRegionSection now = *section, remain = *section;
862 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
863
864 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
865 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
866 - now.offset_within_address_space;
867
868 now.size = int128_min(int128_make64(left), now.size);
869 register_subpage(d, &now);
870 } else {
871 now.size = int128_zero();
872 }
873 while (int128_ne(remain.size, now.size)) {
874 remain.size = int128_sub(remain.size, now.size);
875 remain.offset_within_address_space += int128_get64(now.size);
876 remain.offset_within_region += int128_get64(now.size);
877 now = remain;
878 if (int128_lt(remain.size, page_size)) {
879 register_subpage(d, &now);
880 } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
881 now.size = page_size;
882 register_subpage(d, &now);
883 } else {
884 now.size = int128_and(now.size, int128_neg(page_size));
885 register_multipage(d, &now);
886 }
887 }
888 }
889
890 void qemu_flush_coalesced_mmio_buffer(void)
891 {
892 if (kvm_enabled())
893 kvm_flush_coalesced_mmio_buffer();
894 }
895
896 void qemu_mutex_lock_ramlist(void)
897 {
898 qemu_mutex_lock(&ram_list.mutex);
899 }
900
901 void qemu_mutex_unlock_ramlist(void)
902 {
903 qemu_mutex_unlock(&ram_list.mutex);
904 }
905
906 #if defined(__linux__) && !defined(TARGET_S390X)
907
908 #include <sys/vfs.h>
909
910 #define HUGETLBFS_MAGIC 0x958458f6
911
912 static long gethugepagesize(const char *path)
913 {
914 struct statfs fs;
915 int ret;
916
917 do {
918 ret = statfs(path, &fs);
919 } while (ret != 0 && errno == EINTR);
920
921 if (ret != 0) {
922 perror(path);
923 return 0;
924 }
925
926 if (fs.f_type != HUGETLBFS_MAGIC)
927 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
928
929 return fs.f_bsize;
930 }
931
932 static void *file_ram_alloc(RAMBlock *block,
933 ram_addr_t memory,
934 const char *path)
935 {
936 char *filename;
937 char *sanitized_name;
938 char *c;
939 void *area;
940 int fd;
941 #ifdef MAP_POPULATE
942 int flags;
943 #endif
944 unsigned long hpagesize;
945
946 hpagesize = gethugepagesize(path);
947 if (!hpagesize) {
948 return NULL;
949 }
950
951 if (memory < hpagesize) {
952 return NULL;
953 }
954
955 if (kvm_enabled() && !kvm_has_sync_mmu()) {
956 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
957 return NULL;
958 }
959
960 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
961 sanitized_name = g_strdup(block->mr->name);
962 for (c = sanitized_name; *c != '\0'; c++) {
963 if (*c == '/')
964 *c = '_';
965 }
966
967 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
968 sanitized_name);
969 g_free(sanitized_name);
970
971 fd = mkstemp(filename);
972 if (fd < 0) {
973 perror("unable to create backing store for hugepages");
974 g_free(filename);
975 return NULL;
976 }
977 unlink(filename);
978 g_free(filename);
979
980 memory = (memory+hpagesize-1) & ~(hpagesize-1);
981
982 /*
983 * ftruncate is not supported by hugetlbfs in older
984 * hosts, so don't bother bailing out on errors.
985 * If anything goes wrong with it under other filesystems,
986 * mmap will fail.
987 */
988 if (ftruncate(fd, memory))
989 perror("ftruncate");
990
991 #ifdef MAP_POPULATE
992 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
993 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
994 * to sidestep this quirk.
995 */
996 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
997 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
998 #else
999 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1000 #endif
1001 if (area == MAP_FAILED) {
1002 perror("file_ram_alloc: can't mmap RAM pages");
1003 close(fd);
1004 return (NULL);
1005 }
1006 block->fd = fd;
1007 return area;
1008 }
1009 #endif
1010
1011 static ram_addr_t find_ram_offset(ram_addr_t size)
1012 {
1013 RAMBlock *block, *next_block;
1014 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1015
1016 assert(size != 0); /* it would hand out same offset multiple times */
1017
1018 if (QTAILQ_EMPTY(&ram_list.blocks))
1019 return 0;
1020
1021 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1022 ram_addr_t end, next = RAM_ADDR_MAX;
1023
1024 end = block->offset + block->length;
1025
1026 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1027 if (next_block->offset >= end) {
1028 next = MIN(next, next_block->offset);
1029 }
1030 }
1031 if (next - end >= size && next - end < mingap) {
1032 offset = end;
1033 mingap = next - end;
1034 }
1035 }
1036
1037 if (offset == RAM_ADDR_MAX) {
1038 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1039 (uint64_t)size);
1040 abort();
1041 }
1042
1043 return offset;
1044 }
1045
1046 ram_addr_t last_ram_offset(void)
1047 {
1048 RAMBlock *block;
1049 ram_addr_t last = 0;
1050
1051 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1052 last = MAX(last, block->offset + block->length);
1053
1054 return last;
1055 }
1056
1057 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1058 {
1059 int ret;
1060 QemuOpts *machine_opts;
1061
1062 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1063 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1064 if (machine_opts &&
1065 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1066 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1067 if (ret) {
1068 perror("qemu_madvise");
1069 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1070 "but dump_guest_core=off specified\n");
1071 }
1072 }
1073 }
1074
1075 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1076 {
1077 RAMBlock *new_block, *block;
1078
1079 new_block = NULL;
1080 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1081 if (block->offset == addr) {
1082 new_block = block;
1083 break;
1084 }
1085 }
1086 assert(new_block);
1087 assert(!new_block->idstr[0]);
1088
1089 if (dev) {
1090 char *id = qdev_get_dev_path(dev);
1091 if (id) {
1092 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1093 g_free(id);
1094 }
1095 }
1096 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1097
1098 /* This assumes the iothread lock is taken here too. */
1099 qemu_mutex_lock_ramlist();
1100 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1101 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1102 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1103 new_block->idstr);
1104 abort();
1105 }
1106 }
1107 qemu_mutex_unlock_ramlist();
1108 }
1109
1110 static int memory_try_enable_merging(void *addr, size_t len)
1111 {
1112 QemuOpts *opts;
1113
1114 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1115 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1116 /* disabled by the user */
1117 return 0;
1118 }
1119
1120 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1121 }
1122
1123 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1124 MemoryRegion *mr)
1125 {
1126 RAMBlock *block, *new_block;
1127
1128 size = TARGET_PAGE_ALIGN(size);
1129 new_block = g_malloc0(sizeof(*new_block));
1130
1131 /* This assumes the iothread lock is taken here too. */
1132 qemu_mutex_lock_ramlist();
1133 new_block->mr = mr;
1134 new_block->offset = find_ram_offset(size);
1135 if (host) {
1136 new_block->host = host;
1137 new_block->flags |= RAM_PREALLOC_MASK;
1138 } else {
1139 if (mem_path) {
1140 #if defined (__linux__) && !defined(TARGET_S390X)
1141 new_block->host = file_ram_alloc(new_block, size, mem_path);
1142 if (!new_block->host) {
1143 new_block->host = qemu_anon_ram_alloc(size);
1144 memory_try_enable_merging(new_block->host, size);
1145 }
1146 #else
1147 fprintf(stderr, "-mem-path option unsupported\n");
1148 exit(1);
1149 #endif
1150 } else {
1151 if (xen_enabled()) {
1152 xen_ram_alloc(new_block->offset, size, mr);
1153 } else if (kvm_enabled()) {
1154 /* some s390/kvm configurations have special constraints */
1155 new_block->host = kvm_ram_alloc(size);
1156 } else {
1157 new_block->host = qemu_anon_ram_alloc(size);
1158 }
1159 memory_try_enable_merging(new_block->host, size);
1160 }
1161 }
1162 new_block->length = size;
1163
1164 /* Keep the list sorted from biggest to smallest block. */
1165 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1166 if (block->length < new_block->length) {
1167 break;
1168 }
1169 }
1170 if (block) {
1171 QTAILQ_INSERT_BEFORE(block, new_block, next);
1172 } else {
1173 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1174 }
1175 ram_list.mru_block = NULL;
1176
1177 ram_list.version++;
1178 qemu_mutex_unlock_ramlist();
1179
1180 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1181 last_ram_offset() >> TARGET_PAGE_BITS);
1182 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1183 0, size >> TARGET_PAGE_BITS);
1184 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1185
1186 qemu_ram_setup_dump(new_block->host, size);
1187 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1188
1189 if (kvm_enabled())
1190 kvm_setup_guest_memory(new_block->host, size);
1191
1192 return new_block->offset;
1193 }
1194
1195 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1196 {
1197 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1198 }
1199
1200 void qemu_ram_free_from_ptr(ram_addr_t addr)
1201 {
1202 RAMBlock *block;
1203
1204 /* This assumes the iothread lock is taken here too. */
1205 qemu_mutex_lock_ramlist();
1206 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1207 if (addr == block->offset) {
1208 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1209 ram_list.mru_block = NULL;
1210 ram_list.version++;
1211 g_free(block);
1212 break;
1213 }
1214 }
1215 qemu_mutex_unlock_ramlist();
1216 }
1217
1218 void qemu_ram_free(ram_addr_t addr)
1219 {
1220 RAMBlock *block;
1221
1222 /* This assumes the iothread lock is taken here too. */
1223 qemu_mutex_lock_ramlist();
1224 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1225 if (addr == block->offset) {
1226 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1227 ram_list.mru_block = NULL;
1228 ram_list.version++;
1229 if (block->flags & RAM_PREALLOC_MASK) {
1230 ;
1231 } else if (mem_path) {
1232 #if defined (__linux__) && !defined(TARGET_S390X)
1233 if (block->fd) {
1234 munmap(block->host, block->length);
1235 close(block->fd);
1236 } else {
1237 qemu_anon_ram_free(block->host, block->length);
1238 }
1239 #else
1240 abort();
1241 #endif
1242 } else {
1243 if (xen_enabled()) {
1244 xen_invalidate_map_cache_entry(block->host);
1245 } else {
1246 qemu_anon_ram_free(block->host, block->length);
1247 }
1248 }
1249 g_free(block);
1250 break;
1251 }
1252 }
1253 qemu_mutex_unlock_ramlist();
1254
1255 }
1256
1257 #ifndef _WIN32
1258 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1259 {
1260 RAMBlock *block;
1261 ram_addr_t offset;
1262 int flags;
1263 void *area, *vaddr;
1264
1265 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1266 offset = addr - block->offset;
1267 if (offset < block->length) {
1268 vaddr = block->host + offset;
1269 if (block->flags & RAM_PREALLOC_MASK) {
1270 ;
1271 } else {
1272 flags = MAP_FIXED;
1273 munmap(vaddr, length);
1274 if (mem_path) {
1275 #if defined(__linux__) && !defined(TARGET_S390X)
1276 if (block->fd) {
1277 #ifdef MAP_POPULATE
1278 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1279 MAP_PRIVATE;
1280 #else
1281 flags |= MAP_PRIVATE;
1282 #endif
1283 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1284 flags, block->fd, offset);
1285 } else {
1286 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1287 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1288 flags, -1, 0);
1289 }
1290 #else
1291 abort();
1292 #endif
1293 } else {
1294 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1295 flags |= MAP_SHARED | MAP_ANONYMOUS;
1296 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1297 flags, -1, 0);
1298 #else
1299 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1300 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1301 flags, -1, 0);
1302 #endif
1303 }
1304 if (area != vaddr) {
1305 fprintf(stderr, "Could not remap addr: "
1306 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1307 length, addr);
1308 exit(1);
1309 }
1310 memory_try_enable_merging(vaddr, length);
1311 qemu_ram_setup_dump(vaddr, length);
1312 }
1313 return;
1314 }
1315 }
1316 }
1317 #endif /* !_WIN32 */
1318
1319 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1320 With the exception of the softmmu code in this file, this should
1321 only be used for local memory (e.g. video ram) that the device owns,
1322 and knows it isn't going to access beyond the end of the block.
1323
1324 It should not be used for general purpose DMA.
1325 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1326 */
1327 void *qemu_get_ram_ptr(ram_addr_t addr)
1328 {
1329 RAMBlock *block;
1330
1331 /* The list is protected by the iothread lock here. */
1332 block = ram_list.mru_block;
1333 if (block && addr - block->offset < block->length) {
1334 goto found;
1335 }
1336 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1337 if (addr - block->offset < block->length) {
1338 goto found;
1339 }
1340 }
1341
1342 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1343 abort();
1344
1345 found:
1346 ram_list.mru_block = block;
1347 if (xen_enabled()) {
1348 /* We need to check if the requested address is in the RAM
1349 * because we don't want to map the entire memory in QEMU.
1350 * In that case just map until the end of the page.
1351 */
1352 if (block->offset == 0) {
1353 return xen_map_cache(addr, 0, 0);
1354 } else if (block->host == NULL) {
1355 block->host =
1356 xen_map_cache(block->offset, block->length, 1);
1357 }
1358 }
1359 return block->host + (addr - block->offset);
1360 }
1361
1362 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1363 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1364 *
1365 * ??? Is this still necessary?
1366 */
1367 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1368 {
1369 RAMBlock *block;
1370
1371 /* The list is protected by the iothread lock here. */
1372 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1373 if (addr - block->offset < block->length) {
1374 if (xen_enabled()) {
1375 /* We need to check if the requested address is in the RAM
1376 * because we don't want to map the entire memory in QEMU.
1377 * In that case just map until the end of the page.
1378 */
1379 if (block->offset == 0) {
1380 return xen_map_cache(addr, 0, 0);
1381 } else if (block->host == NULL) {
1382 block->host =
1383 xen_map_cache(block->offset, block->length, 1);
1384 }
1385 }
1386 return block->host + (addr - block->offset);
1387 }
1388 }
1389
1390 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1391 abort();
1392
1393 return NULL;
1394 }
1395
1396 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1397 * but takes a size argument */
1398 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1399 {
1400 if (*size == 0) {
1401 return NULL;
1402 }
1403 if (xen_enabled()) {
1404 return xen_map_cache(addr, *size, 1);
1405 } else {
1406 RAMBlock *block;
1407
1408 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1409 if (addr - block->offset < block->length) {
1410 if (addr - block->offset + *size > block->length)
1411 *size = block->length - addr + block->offset;
1412 return block->host + (addr - block->offset);
1413 }
1414 }
1415
1416 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1417 abort();
1418 }
1419 }
1420
1421 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1422 {
1423 RAMBlock *block;
1424 uint8_t *host = ptr;
1425
1426 if (xen_enabled()) {
1427 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1428 return 0;
1429 }
1430
1431 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1432 /* This case append when the block is not mapped. */
1433 if (block->host == NULL) {
1434 continue;
1435 }
1436 if (host - block->host < block->length) {
1437 *ram_addr = block->offset + (host - block->host);
1438 return 0;
1439 }
1440 }
1441
1442 return -1;
1443 }
1444
1445 /* Some of the softmmu routines need to translate from a host pointer
1446 (typically a TLB entry) back to a ram offset. */
1447 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1448 {
1449 ram_addr_t ram_addr;
1450
1451 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1452 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1453 abort();
1454 }
1455 return ram_addr;
1456 }
1457
1458 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1459 uint64_t val, unsigned size)
1460 {
1461 int dirty_flags;
1462 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1463 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1464 tb_invalidate_phys_page_fast(ram_addr, size);
1465 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1466 }
1467 switch (size) {
1468 case 1:
1469 stb_p(qemu_get_ram_ptr(ram_addr), val);
1470 break;
1471 case 2:
1472 stw_p(qemu_get_ram_ptr(ram_addr), val);
1473 break;
1474 case 4:
1475 stl_p(qemu_get_ram_ptr(ram_addr), val);
1476 break;
1477 default:
1478 abort();
1479 }
1480 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1481 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1482 /* we remove the notdirty callback only if the code has been
1483 flushed */
1484 if (dirty_flags == 0xff)
1485 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1486 }
1487
1488 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1489 unsigned size, bool is_write)
1490 {
1491 return is_write;
1492 }
1493
1494 static const MemoryRegionOps notdirty_mem_ops = {
1495 .write = notdirty_mem_write,
1496 .valid.accepts = notdirty_mem_accepts,
1497 .endianness = DEVICE_NATIVE_ENDIAN,
1498 };
1499
1500 /* Generate a debug exception if a watchpoint has been hit. */
1501 static void check_watchpoint(int offset, int len_mask, int flags)
1502 {
1503 CPUArchState *env = cpu_single_env;
1504 target_ulong pc, cs_base;
1505 target_ulong vaddr;
1506 CPUWatchpoint *wp;
1507 int cpu_flags;
1508
1509 if (env->watchpoint_hit) {
1510 /* We re-entered the check after replacing the TB. Now raise
1511 * the debug interrupt so that is will trigger after the
1512 * current instruction. */
1513 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1514 return;
1515 }
1516 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1517 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1518 if ((vaddr == (wp->vaddr & len_mask) ||
1519 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1520 wp->flags |= BP_WATCHPOINT_HIT;
1521 if (!env->watchpoint_hit) {
1522 env->watchpoint_hit = wp;
1523 tb_check_watchpoint(env);
1524 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1525 env->exception_index = EXCP_DEBUG;
1526 cpu_loop_exit(env);
1527 } else {
1528 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1529 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1530 cpu_resume_from_signal(env, NULL);
1531 }
1532 }
1533 } else {
1534 wp->flags &= ~BP_WATCHPOINT_HIT;
1535 }
1536 }
1537 }
1538
1539 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1540 so these check for a hit then pass through to the normal out-of-line
1541 phys routines. */
1542 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1543 unsigned size)
1544 {
1545 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1546 switch (size) {
1547 case 1: return ldub_phys(addr);
1548 case 2: return lduw_phys(addr);
1549 case 4: return ldl_phys(addr);
1550 default: abort();
1551 }
1552 }
1553
1554 static void watch_mem_write(void *opaque, hwaddr addr,
1555 uint64_t val, unsigned size)
1556 {
1557 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1558 switch (size) {
1559 case 1:
1560 stb_phys(addr, val);
1561 break;
1562 case 2:
1563 stw_phys(addr, val);
1564 break;
1565 case 4:
1566 stl_phys(addr, val);
1567 break;
1568 default: abort();
1569 }
1570 }
1571
1572 static const MemoryRegionOps watch_mem_ops = {
1573 .read = watch_mem_read,
1574 .write = watch_mem_write,
1575 .endianness = DEVICE_NATIVE_ENDIAN,
1576 };
1577
1578 static uint64_t subpage_read(void *opaque, hwaddr addr,
1579 unsigned len)
1580 {
1581 subpage_t *subpage = opaque;
1582 uint8_t buf[4];
1583
1584 #if defined(DEBUG_SUBPAGE)
1585 printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1586 subpage, len, addr);
1587 #endif
1588 address_space_read(subpage->as, addr + subpage->base, buf, len);
1589 switch (len) {
1590 case 1:
1591 return ldub_p(buf);
1592 case 2:
1593 return lduw_p(buf);
1594 case 4:
1595 return ldl_p(buf);
1596 default:
1597 abort();
1598 }
1599 }
1600
1601 static void subpage_write(void *opaque, hwaddr addr,
1602 uint64_t value, unsigned len)
1603 {
1604 subpage_t *subpage = opaque;
1605 uint8_t buf[4];
1606
1607 #if defined(DEBUG_SUBPAGE)
1608 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1609 " value %"PRIx64"\n",
1610 __func__, subpage, len, addr, value);
1611 #endif
1612 switch (len) {
1613 case 1:
1614 stb_p(buf, value);
1615 break;
1616 case 2:
1617 stw_p(buf, value);
1618 break;
1619 case 4:
1620 stl_p(buf, value);
1621 break;
1622 default:
1623 abort();
1624 }
1625 address_space_write(subpage->as, addr + subpage->base, buf, len);
1626 }
1627
1628 static bool subpage_accepts(void *opaque, hwaddr addr,
1629 unsigned size, bool is_write)
1630 {
1631 subpage_t *subpage = opaque;
1632 #if defined(DEBUG_SUBPAGE)
1633 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1634 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1635 #endif
1636
1637 return address_space_access_valid(subpage->as, addr + subpage->base,
1638 size, is_write);
1639 }
1640
1641 static const MemoryRegionOps subpage_ops = {
1642 .read = subpage_read,
1643 .write = subpage_write,
1644 .valid.accepts = subpage_accepts,
1645 .endianness = DEVICE_NATIVE_ENDIAN,
1646 };
1647
1648 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1649 uint16_t section)
1650 {
1651 int idx, eidx;
1652
1653 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1654 return -1;
1655 idx = SUBPAGE_IDX(start);
1656 eidx = SUBPAGE_IDX(end);
1657 #if defined(DEBUG_SUBPAGE)
1658 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1659 mmio, start, end, idx, eidx, memory);
1660 #endif
1661 for (; idx <= eidx; idx++) {
1662 mmio->sub_section[idx] = section;
1663 }
1664
1665 return 0;
1666 }
1667
1668 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1669 {
1670 subpage_t *mmio;
1671
1672 mmio = g_malloc0(sizeof(subpage_t));
1673
1674 mmio->as = as;
1675 mmio->base = base;
1676 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1677 "subpage", TARGET_PAGE_SIZE);
1678 mmio->iomem.subpage = true;
1679 #if defined(DEBUG_SUBPAGE)
1680 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1681 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1682 #endif
1683 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1684
1685 return mmio;
1686 }
1687
1688 static uint16_t dummy_section(MemoryRegion *mr)
1689 {
1690 MemoryRegionSection section = {
1691 .mr = mr,
1692 .offset_within_address_space = 0,
1693 .offset_within_region = 0,
1694 .size = int128_2_64(),
1695 };
1696
1697 return phys_section_add(&section);
1698 }
1699
1700 MemoryRegion *iotlb_to_region(hwaddr index)
1701 {
1702 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1703 }
1704
1705 static void io_mem_init(void)
1706 {
1707 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1708 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1709 "unassigned", UINT64_MAX);
1710 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1711 "notdirty", UINT64_MAX);
1712 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1713 "watch", UINT64_MAX);
1714 }
1715
1716 static void mem_begin(MemoryListener *listener)
1717 {
1718 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1719
1720 destroy_all_mappings(d);
1721 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1722 }
1723
1724 static void core_begin(MemoryListener *listener)
1725 {
1726 phys_sections_clear();
1727 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1728 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1729 phys_section_rom = dummy_section(&io_mem_rom);
1730 phys_section_watch = dummy_section(&io_mem_watch);
1731 }
1732
1733 static void tcg_commit(MemoryListener *listener)
1734 {
1735 CPUArchState *env;
1736
1737 /* since each CPU stores ram addresses in its TLB cache, we must
1738 reset the modified entries */
1739 /* XXX: slow ! */
1740 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1741 tlb_flush(env, 1);
1742 }
1743 }
1744
1745 static void core_log_global_start(MemoryListener *listener)
1746 {
1747 cpu_physical_memory_set_dirty_tracking(1);
1748 }
1749
1750 static void core_log_global_stop(MemoryListener *listener)
1751 {
1752 cpu_physical_memory_set_dirty_tracking(0);
1753 }
1754
1755 static void io_region_add(MemoryListener *listener,
1756 MemoryRegionSection *section)
1757 {
1758 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1759
1760 mrio->mr = section->mr;
1761 mrio->offset = section->offset_within_region;
1762 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1763 section->offset_within_address_space,
1764 int128_get64(section->size));
1765 ioport_register(&mrio->iorange);
1766 }
1767
1768 static void io_region_del(MemoryListener *listener,
1769 MemoryRegionSection *section)
1770 {
1771 isa_unassign_ioport(section->offset_within_address_space,
1772 int128_get64(section->size));
1773 }
1774
1775 static MemoryListener core_memory_listener = {
1776 .begin = core_begin,
1777 .log_global_start = core_log_global_start,
1778 .log_global_stop = core_log_global_stop,
1779 .priority = 1,
1780 };
1781
1782 static MemoryListener io_memory_listener = {
1783 .region_add = io_region_add,
1784 .region_del = io_region_del,
1785 .priority = 0,
1786 };
1787
1788 static MemoryListener tcg_memory_listener = {
1789 .commit = tcg_commit,
1790 };
1791
1792 void address_space_init_dispatch(AddressSpace *as)
1793 {
1794 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1795
1796 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1797 d->listener = (MemoryListener) {
1798 .begin = mem_begin,
1799 .region_add = mem_add,
1800 .region_nop = mem_add,
1801 .priority = 0,
1802 };
1803 d->as = as;
1804 as->dispatch = d;
1805 memory_listener_register(&d->listener, as);
1806 }
1807
1808 void address_space_destroy_dispatch(AddressSpace *as)
1809 {
1810 AddressSpaceDispatch *d = as->dispatch;
1811
1812 memory_listener_unregister(&d->listener);
1813 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1814 g_free(d);
1815 as->dispatch = NULL;
1816 }
1817
1818 static void memory_map_init(void)
1819 {
1820 system_memory = g_malloc(sizeof(*system_memory));
1821 memory_region_init(system_memory, "system", INT64_MAX);
1822 address_space_init(&address_space_memory, system_memory, "memory");
1823
1824 system_io = g_malloc(sizeof(*system_io));
1825 memory_region_init(system_io, "io", 65536);
1826 address_space_init(&address_space_io, system_io, "I/O");
1827
1828 memory_listener_register(&core_memory_listener, &address_space_memory);
1829 memory_listener_register(&io_memory_listener, &address_space_io);
1830 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1831 }
1832
1833 MemoryRegion *get_system_memory(void)
1834 {
1835 return system_memory;
1836 }
1837
1838 MemoryRegion *get_system_io(void)
1839 {
1840 return system_io;
1841 }
1842
1843 #endif /* !defined(CONFIG_USER_ONLY) */
1844
1845 /* physical memory access (slow version, mainly for debug) */
1846 #if defined(CONFIG_USER_ONLY)
1847 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1848 uint8_t *buf, int len, int is_write)
1849 {
1850 int l, flags;
1851 target_ulong page;
1852 void * p;
1853
1854 while (len > 0) {
1855 page = addr & TARGET_PAGE_MASK;
1856 l = (page + TARGET_PAGE_SIZE) - addr;
1857 if (l > len)
1858 l = len;
1859 flags = page_get_flags(page);
1860 if (!(flags & PAGE_VALID))
1861 return -1;
1862 if (is_write) {
1863 if (!(flags & PAGE_WRITE))
1864 return -1;
1865 /* XXX: this code should not depend on lock_user */
1866 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1867 return -1;
1868 memcpy(p, buf, l);
1869 unlock_user(p, addr, l);
1870 } else {
1871 if (!(flags & PAGE_READ))
1872 return -1;
1873 /* XXX: this code should not depend on lock_user */
1874 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1875 return -1;
1876 memcpy(buf, p, l);
1877 unlock_user(p, addr, 0);
1878 }
1879 len -= l;
1880 buf += l;
1881 addr += l;
1882 }
1883 return 0;
1884 }
1885
1886 #else
1887
1888 static void invalidate_and_set_dirty(hwaddr addr,
1889 hwaddr length)
1890 {
1891 if (!cpu_physical_memory_is_dirty(addr)) {
1892 /* invalidate code */
1893 tb_invalidate_phys_page_range(addr, addr + length, 0);
1894 /* set dirty bit */
1895 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1896 }
1897 xen_modified_memory(addr, length);
1898 }
1899
1900 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1901 {
1902 if (memory_region_is_ram(mr)) {
1903 return !(is_write && mr->readonly);
1904 }
1905 if (memory_region_is_romd(mr)) {
1906 return !is_write;
1907 }
1908
1909 return false;
1910 }
1911
1912 static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1913 {
1914 if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1915 return 4;
1916 }
1917 if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1918 return 2;
1919 }
1920 return 1;
1921 }
1922
1923 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1924 int len, bool is_write)
1925 {
1926 hwaddr l;
1927 uint8_t *ptr;
1928 uint64_t val;
1929 hwaddr addr1;
1930 MemoryRegion *mr;
1931 bool error = false;
1932
1933 while (len > 0) {
1934 l = len;
1935 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1936
1937 if (is_write) {
1938 if (!memory_access_is_direct(mr, is_write)) {
1939 l = memory_access_size(mr, l, addr1);
1940 /* XXX: could force cpu_single_env to NULL to avoid
1941 potential bugs */
1942 if (l == 4) {
1943 /* 32 bit write access */
1944 val = ldl_p(buf);
1945 error |= io_mem_write(mr, addr1, val, 4);
1946 } else if (l == 2) {
1947 /* 16 bit write access */
1948 val = lduw_p(buf);
1949 error |= io_mem_write(mr, addr1, val, 2);
1950 } else {
1951 /* 8 bit write access */
1952 val = ldub_p(buf);
1953 error |= io_mem_write(mr, addr1, val, 1);
1954 }
1955 } else {
1956 addr1 += memory_region_get_ram_addr(mr);
1957 /* RAM case */
1958 ptr = qemu_get_ram_ptr(addr1);
1959 memcpy(ptr, buf, l);
1960 invalidate_and_set_dirty(addr1, l);
1961 }
1962 } else {
1963 if (!memory_access_is_direct(mr, is_write)) {
1964 /* I/O case */
1965 l = memory_access_size(mr, l, addr1);
1966 if (l == 4) {
1967 /* 32 bit read access */
1968 error |= io_mem_read(mr, addr1, &val, 4);
1969 stl_p(buf, val);
1970 } else if (l == 2) {
1971 /* 16 bit read access */
1972 error |= io_mem_read(mr, addr1, &val, 2);
1973 stw_p(buf, val);
1974 } else {
1975 /* 8 bit read access */
1976 error |= io_mem_read(mr, addr1, &val, 1);
1977 stb_p(buf, val);
1978 }
1979 } else {
1980 /* RAM case */
1981 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
1982 memcpy(buf, ptr, l);
1983 }
1984 }
1985 len -= l;
1986 buf += l;
1987 addr += l;
1988 }
1989
1990 return error;
1991 }
1992
1993 bool address_space_write(AddressSpace *as, hwaddr addr,
1994 const uint8_t *buf, int len)
1995 {
1996 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1997 }
1998
1999 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2000 {
2001 return address_space_rw(as, addr, buf, len, false);
2002 }
2003
2004
2005 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2006 int len, int is_write)
2007 {
2008 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2009 }
2010
2011 /* used for ROM loading : can write in RAM and ROM */
2012 void cpu_physical_memory_write_rom(hwaddr addr,
2013 const uint8_t *buf, int len)
2014 {
2015 hwaddr l;
2016 uint8_t *ptr;
2017 hwaddr addr1;
2018 MemoryRegion *mr;
2019
2020 while (len > 0) {
2021 l = len;
2022 mr = address_space_translate(&address_space_memory,
2023 addr, &addr1, &l, true);
2024
2025 if (!(memory_region_is_ram(mr) ||
2026 memory_region_is_romd(mr))) {
2027 /* do nothing */
2028 } else {
2029 addr1 += memory_region_get_ram_addr(mr);
2030 /* ROM/RAM case */
2031 ptr = qemu_get_ram_ptr(addr1);
2032 memcpy(ptr, buf, l);
2033 invalidate_and_set_dirty(addr1, l);
2034 }
2035 len -= l;
2036 buf += l;
2037 addr += l;
2038 }
2039 }
2040
2041 typedef struct {
2042 void *buffer;
2043 hwaddr addr;
2044 hwaddr len;
2045 } BounceBuffer;
2046
2047 static BounceBuffer bounce;
2048
2049 typedef struct MapClient {
2050 void *opaque;
2051 void (*callback)(void *opaque);
2052 QLIST_ENTRY(MapClient) link;
2053 } MapClient;
2054
2055 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2056 = QLIST_HEAD_INITIALIZER(map_client_list);
2057
2058 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2059 {
2060 MapClient *client = g_malloc(sizeof(*client));
2061
2062 client->opaque = opaque;
2063 client->callback = callback;
2064 QLIST_INSERT_HEAD(&map_client_list, client, link);
2065 return client;
2066 }
2067
2068 static void cpu_unregister_map_client(void *_client)
2069 {
2070 MapClient *client = (MapClient *)_client;
2071
2072 QLIST_REMOVE(client, link);
2073 g_free(client);
2074 }
2075
2076 static void cpu_notify_map_clients(void)
2077 {
2078 MapClient *client;
2079
2080 while (!QLIST_EMPTY(&map_client_list)) {
2081 client = QLIST_FIRST(&map_client_list);
2082 client->callback(client->opaque);
2083 cpu_unregister_map_client(client);
2084 }
2085 }
2086
2087 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2088 {
2089 MemoryRegion *mr;
2090 hwaddr l, xlat;
2091
2092 while (len > 0) {
2093 l = len;
2094 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2095 if (!memory_access_is_direct(mr, is_write)) {
2096 l = memory_access_size(mr, l, addr);
2097 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2098 return false;
2099 }
2100 }
2101
2102 len -= l;
2103 addr += l;
2104 }
2105 return true;
2106 }
2107
2108 /* Map a physical memory region into a host virtual address.
2109 * May map a subset of the requested range, given by and returned in *plen.
2110 * May return NULL if resources needed to perform the mapping are exhausted.
2111 * Use only for reads OR writes - not for read-modify-write operations.
2112 * Use cpu_register_map_client() to know when retrying the map operation is
2113 * likely to succeed.
2114 */
2115 void *address_space_map(AddressSpace *as,
2116 hwaddr addr,
2117 hwaddr *plen,
2118 bool is_write)
2119 {
2120 hwaddr len = *plen;
2121 hwaddr todo = 0;
2122 hwaddr l, xlat;
2123 MemoryRegion *mr;
2124 ram_addr_t raddr = RAM_ADDR_MAX;
2125 ram_addr_t rlen;
2126 void *ret;
2127
2128 while (len > 0) {
2129 l = len;
2130 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2131
2132 if (!memory_access_is_direct(mr, is_write)) {
2133 if (todo || bounce.buffer) {
2134 break;
2135 }
2136 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2137 bounce.addr = addr;
2138 bounce.len = l;
2139 if (!is_write) {
2140 address_space_read(as, addr, bounce.buffer, l);
2141 }
2142
2143 *plen = l;
2144 return bounce.buffer;
2145 }
2146 if (!todo) {
2147 raddr = memory_region_get_ram_addr(mr) + xlat;
2148 } else {
2149 if (memory_region_get_ram_addr(mr) + xlat != raddr + todo) {
2150 break;
2151 }
2152 }
2153
2154 len -= l;
2155 addr += l;
2156 todo += l;
2157 }
2158 rlen = todo;
2159 ret = qemu_ram_ptr_length(raddr, &rlen);
2160 *plen = rlen;
2161 return ret;
2162 }
2163
2164 /* Unmaps a memory region previously mapped by address_space_map().
2165 * Will also mark the memory as dirty if is_write == 1. access_len gives
2166 * the amount of memory that was actually read or written by the caller.
2167 */
2168 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2169 int is_write, hwaddr access_len)
2170 {
2171 if (buffer != bounce.buffer) {
2172 if (is_write) {
2173 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2174 while (access_len) {
2175 unsigned l;
2176 l = TARGET_PAGE_SIZE;
2177 if (l > access_len)
2178 l = access_len;
2179 invalidate_and_set_dirty(addr1, l);
2180 addr1 += l;
2181 access_len -= l;
2182 }
2183 }
2184 if (xen_enabled()) {
2185 xen_invalidate_map_cache_entry(buffer);
2186 }
2187 return;
2188 }
2189 if (is_write) {
2190 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2191 }
2192 qemu_vfree(bounce.buffer);
2193 bounce.buffer = NULL;
2194 cpu_notify_map_clients();
2195 }
2196
2197 void *cpu_physical_memory_map(hwaddr addr,
2198 hwaddr *plen,
2199 int is_write)
2200 {
2201 return address_space_map(&address_space_memory, addr, plen, is_write);
2202 }
2203
2204 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2205 int is_write, hwaddr access_len)
2206 {
2207 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2208 }
2209
2210 /* warning: addr must be aligned */
2211 static inline uint32_t ldl_phys_internal(hwaddr addr,
2212 enum device_endian endian)
2213 {
2214 uint8_t *ptr;
2215 uint64_t val;
2216 MemoryRegion *mr;
2217 hwaddr l = 4;
2218 hwaddr addr1;
2219
2220 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2221 false);
2222 if (l < 4 || !memory_access_is_direct(mr, false)) {
2223 /* I/O case */
2224 io_mem_read(mr, addr1, &val, 4);
2225 #if defined(TARGET_WORDS_BIGENDIAN)
2226 if (endian == DEVICE_LITTLE_ENDIAN) {
2227 val = bswap32(val);
2228 }
2229 #else
2230 if (endian == DEVICE_BIG_ENDIAN) {
2231 val = bswap32(val);
2232 }
2233 #endif
2234 } else {
2235 /* RAM case */
2236 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2237 & TARGET_PAGE_MASK)
2238 + addr1);
2239 switch (endian) {
2240 case DEVICE_LITTLE_ENDIAN:
2241 val = ldl_le_p(ptr);
2242 break;
2243 case DEVICE_BIG_ENDIAN:
2244 val = ldl_be_p(ptr);
2245 break;
2246 default:
2247 val = ldl_p(ptr);
2248 break;
2249 }
2250 }
2251 return val;
2252 }
2253
2254 uint32_t ldl_phys(hwaddr addr)
2255 {
2256 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2257 }
2258
2259 uint32_t ldl_le_phys(hwaddr addr)
2260 {
2261 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2262 }
2263
2264 uint32_t ldl_be_phys(hwaddr addr)
2265 {
2266 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2267 }
2268
2269 /* warning: addr must be aligned */
2270 static inline uint64_t ldq_phys_internal(hwaddr addr,
2271 enum device_endian endian)
2272 {
2273 uint8_t *ptr;
2274 uint64_t val;
2275 MemoryRegion *mr;
2276 hwaddr l = 8;
2277 hwaddr addr1;
2278
2279 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2280 false);
2281 if (l < 8 || !memory_access_is_direct(mr, false)) {
2282 /* I/O case */
2283 io_mem_read(mr, addr1, &val, 8);
2284 #if defined(TARGET_WORDS_BIGENDIAN)
2285 if (endian == DEVICE_LITTLE_ENDIAN) {
2286 val = bswap64(val);
2287 }
2288 #else
2289 if (endian == DEVICE_BIG_ENDIAN) {
2290 val = bswap64(val);
2291 }
2292 #endif
2293 } else {
2294 /* RAM case */
2295 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2296 & TARGET_PAGE_MASK)
2297 + addr1);
2298 switch (endian) {
2299 case DEVICE_LITTLE_ENDIAN:
2300 val = ldq_le_p(ptr);
2301 break;
2302 case DEVICE_BIG_ENDIAN:
2303 val = ldq_be_p(ptr);
2304 break;
2305 default:
2306 val = ldq_p(ptr);
2307 break;
2308 }
2309 }
2310 return val;
2311 }
2312
2313 uint64_t ldq_phys(hwaddr addr)
2314 {
2315 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2316 }
2317
2318 uint64_t ldq_le_phys(hwaddr addr)
2319 {
2320 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2321 }
2322
2323 uint64_t ldq_be_phys(hwaddr addr)
2324 {
2325 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2326 }
2327
2328 /* XXX: optimize */
2329 uint32_t ldub_phys(hwaddr addr)
2330 {
2331 uint8_t val;
2332 cpu_physical_memory_read(addr, &val, 1);
2333 return val;
2334 }
2335
2336 /* warning: addr must be aligned */
2337 static inline uint32_t lduw_phys_internal(hwaddr addr,
2338 enum device_endian endian)
2339 {
2340 uint8_t *ptr;
2341 uint64_t val;
2342 MemoryRegion *mr;
2343 hwaddr l = 2;
2344 hwaddr addr1;
2345
2346 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2347 false);
2348 if (l < 2 || !memory_access_is_direct(mr, false)) {
2349 /* I/O case */
2350 io_mem_read(mr, addr1, &val, 2);
2351 #if defined(TARGET_WORDS_BIGENDIAN)
2352 if (endian == DEVICE_LITTLE_ENDIAN) {
2353 val = bswap16(val);
2354 }
2355 #else
2356 if (endian == DEVICE_BIG_ENDIAN) {
2357 val = bswap16(val);
2358 }
2359 #endif
2360 } else {
2361 /* RAM case */
2362 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2363 & TARGET_PAGE_MASK)
2364 + addr1);
2365 switch (endian) {
2366 case DEVICE_LITTLE_ENDIAN:
2367 val = lduw_le_p(ptr);
2368 break;
2369 case DEVICE_BIG_ENDIAN:
2370 val = lduw_be_p(ptr);
2371 break;
2372 default:
2373 val = lduw_p(ptr);
2374 break;
2375 }
2376 }
2377 return val;
2378 }
2379
2380 uint32_t lduw_phys(hwaddr addr)
2381 {
2382 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2383 }
2384
2385 uint32_t lduw_le_phys(hwaddr addr)
2386 {
2387 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2388 }
2389
2390 uint32_t lduw_be_phys(hwaddr addr)
2391 {
2392 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2393 }
2394
2395 /* warning: addr must be aligned. The ram page is not masked as dirty
2396 and the code inside is not invalidated. It is useful if the dirty
2397 bits are used to track modified PTEs */
2398 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2399 {
2400 uint8_t *ptr;
2401 MemoryRegion *mr;
2402 hwaddr l = 4;
2403 hwaddr addr1;
2404
2405 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2406 true);
2407 if (l < 4 || !memory_access_is_direct(mr, true)) {
2408 io_mem_write(mr, addr1, val, 4);
2409 } else {
2410 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2411 ptr = qemu_get_ram_ptr(addr1);
2412 stl_p(ptr, val);
2413
2414 if (unlikely(in_migration)) {
2415 if (!cpu_physical_memory_is_dirty(addr1)) {
2416 /* invalidate code */
2417 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2418 /* set dirty bit */
2419 cpu_physical_memory_set_dirty_flags(
2420 addr1, (0xff & ~CODE_DIRTY_FLAG));
2421 }
2422 }
2423 }
2424 }
2425
2426 /* warning: addr must be aligned */
2427 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2428 enum device_endian endian)
2429 {
2430 uint8_t *ptr;
2431 MemoryRegion *mr;
2432 hwaddr l = 4;
2433 hwaddr addr1;
2434
2435 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2436 true);
2437 if (l < 4 || !memory_access_is_direct(mr, true)) {
2438 #if defined(TARGET_WORDS_BIGENDIAN)
2439 if (endian == DEVICE_LITTLE_ENDIAN) {
2440 val = bswap32(val);
2441 }
2442 #else
2443 if (endian == DEVICE_BIG_ENDIAN) {
2444 val = bswap32(val);
2445 }
2446 #endif
2447 io_mem_write(mr, addr1, val, 4);
2448 } else {
2449 /* RAM case */
2450 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2451 ptr = qemu_get_ram_ptr(addr1);
2452 switch (endian) {
2453 case DEVICE_LITTLE_ENDIAN:
2454 stl_le_p(ptr, val);
2455 break;
2456 case DEVICE_BIG_ENDIAN:
2457 stl_be_p(ptr, val);
2458 break;
2459 default:
2460 stl_p(ptr, val);
2461 break;
2462 }
2463 invalidate_and_set_dirty(addr1, 4);
2464 }
2465 }
2466
2467 void stl_phys(hwaddr addr, uint32_t val)
2468 {
2469 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2470 }
2471
2472 void stl_le_phys(hwaddr addr, uint32_t val)
2473 {
2474 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2475 }
2476
2477 void stl_be_phys(hwaddr addr, uint32_t val)
2478 {
2479 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2480 }
2481
2482 /* XXX: optimize */
2483 void stb_phys(hwaddr addr, uint32_t val)
2484 {
2485 uint8_t v = val;
2486 cpu_physical_memory_write(addr, &v, 1);
2487 }
2488
2489 /* warning: addr must be aligned */
2490 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2491 enum device_endian endian)
2492 {
2493 uint8_t *ptr;
2494 MemoryRegion *mr;
2495 hwaddr l = 2;
2496 hwaddr addr1;
2497
2498 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2499 true);
2500 if (l < 2 || !memory_access_is_direct(mr, true)) {
2501 #if defined(TARGET_WORDS_BIGENDIAN)
2502 if (endian == DEVICE_LITTLE_ENDIAN) {
2503 val = bswap16(val);
2504 }
2505 #else
2506 if (endian == DEVICE_BIG_ENDIAN) {
2507 val = bswap16(val);
2508 }
2509 #endif
2510 io_mem_write(mr, addr1, val, 2);
2511 } else {
2512 /* RAM case */
2513 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2514 ptr = qemu_get_ram_ptr(addr1);
2515 switch (endian) {
2516 case DEVICE_LITTLE_ENDIAN:
2517 stw_le_p(ptr, val);
2518 break;
2519 case DEVICE_BIG_ENDIAN:
2520 stw_be_p(ptr, val);
2521 break;
2522 default:
2523 stw_p(ptr, val);
2524 break;
2525 }
2526 invalidate_and_set_dirty(addr1, 2);
2527 }
2528 }
2529
2530 void stw_phys(hwaddr addr, uint32_t val)
2531 {
2532 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2533 }
2534
2535 void stw_le_phys(hwaddr addr, uint32_t val)
2536 {
2537 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2538 }
2539
2540 void stw_be_phys(hwaddr addr, uint32_t val)
2541 {
2542 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2543 }
2544
2545 /* XXX: optimize */
2546 void stq_phys(hwaddr addr, uint64_t val)
2547 {
2548 val = tswap64(val);
2549 cpu_physical_memory_write(addr, &val, 8);
2550 }
2551
2552 void stq_le_phys(hwaddr addr, uint64_t val)
2553 {
2554 val = cpu_to_le64(val);
2555 cpu_physical_memory_write(addr, &val, 8);
2556 }
2557
2558 void stq_be_phys(hwaddr addr, uint64_t val)
2559 {
2560 val = cpu_to_be64(val);
2561 cpu_physical_memory_write(addr, &val, 8);
2562 }
2563
2564 /* virtual memory access for debug (includes writing to ROM) */
2565 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2566 uint8_t *buf, int len, int is_write)
2567 {
2568 int l;
2569 hwaddr phys_addr;
2570 target_ulong page;
2571
2572 while (len > 0) {
2573 page = addr & TARGET_PAGE_MASK;
2574 phys_addr = cpu_get_phys_page_debug(env, page);
2575 /* if no physical page mapped, return an error */
2576 if (phys_addr == -1)
2577 return -1;
2578 l = (page + TARGET_PAGE_SIZE) - addr;
2579 if (l > len)
2580 l = len;
2581 phys_addr += (addr & ~TARGET_PAGE_MASK);
2582 if (is_write)
2583 cpu_physical_memory_write_rom(phys_addr, buf, l);
2584 else
2585 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2586 len -= l;
2587 buf += l;
2588 addr += l;
2589 }
2590 return 0;
2591 }
2592 #endif
2593
2594 #if !defined(CONFIG_USER_ONLY)
2595
2596 /*
2597 * A helper function for the _utterly broken_ virtio device model to find out if
2598 * it's running on a big endian machine. Don't do this at home kids!
2599 */
2600 bool virtio_is_big_endian(void);
2601 bool virtio_is_big_endian(void)
2602 {
2603 #if defined(TARGET_WORDS_BIGENDIAN)
2604 return true;
2605 #else
2606 return false;
2607 #endif
2608 }
2609
2610 #endif
2611
2612 #ifndef CONFIG_USER_ONLY
2613 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2614 {
2615 MemoryRegion*mr;
2616 hwaddr l = 1;
2617
2618 mr = address_space_translate(&address_space_memory,
2619 phys_addr, &phys_addr, &l, false);
2620
2621 return !(memory_region_is_ram(mr) ||
2622 memory_region_is_romd(mr));
2623 }
2624
2625 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2626 {
2627 RAMBlock *block;
2628
2629 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2630 func(block->host, block->offset, block->length, opaque);
2631 }
2632 }
2633 #endif