]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec: Resolve subpages in one step except for IOTLB fills
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_SUBPAGE
54
55 #if !defined(CONFIG_USER_ONLY)
56 int phys_ram_fd;
57 static int in_migration;
58
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
63
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
67
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
70
71 #endif
72
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
75 cpu_exec() */
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
80 int use_icount;
81
82 #if !defined(CONFIG_USER_ONLY)
83
84 typedef struct PhysPageEntry PhysPageEntry;
85
86 struct PhysPageEntry {
87 uint16_t is_leaf : 1;
88 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
89 uint16_t ptr : 15;
90 };
91
92 struct AddressSpaceDispatch {
93 /* This is a multi-level map on the physical address space.
94 * The bottom level has pointers to MemoryRegionSections.
95 */
96 PhysPageEntry phys_map;
97 MemoryListener listener;
98 };
99
100 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
101 typedef struct subpage_t {
102 MemoryRegion iomem;
103 hwaddr base;
104 uint16_t sub_section[TARGET_PAGE_SIZE];
105 } subpage_t;
106
107 static MemoryRegionSection *phys_sections;
108 static unsigned phys_sections_nb, phys_sections_nb_alloc;
109 static uint16_t phys_section_unassigned;
110 static uint16_t phys_section_notdirty;
111 static uint16_t phys_section_rom;
112 static uint16_t phys_section_watch;
113
114 /* Simple allocator for PhysPageEntry nodes */
115 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
116 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
117
118 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
119
120 static void io_mem_init(void);
121 static void memory_map_init(void);
122 static void *qemu_safe_ram_ptr(ram_addr_t addr);
123
124 static MemoryRegion io_mem_watch;
125 #endif
126
127 #if !defined(CONFIG_USER_ONLY)
128
129 static void phys_map_node_reserve(unsigned nodes)
130 {
131 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
132 typedef PhysPageEntry Node[L2_SIZE];
133 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
134 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
135 phys_map_nodes_nb + nodes);
136 phys_map_nodes = g_renew(Node, phys_map_nodes,
137 phys_map_nodes_nb_alloc);
138 }
139 }
140
141 static uint16_t phys_map_node_alloc(void)
142 {
143 unsigned i;
144 uint16_t ret;
145
146 ret = phys_map_nodes_nb++;
147 assert(ret != PHYS_MAP_NODE_NIL);
148 assert(ret != phys_map_nodes_nb_alloc);
149 for (i = 0; i < L2_SIZE; ++i) {
150 phys_map_nodes[ret][i].is_leaf = 0;
151 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
152 }
153 return ret;
154 }
155
156 static void phys_map_nodes_reset(void)
157 {
158 phys_map_nodes_nb = 0;
159 }
160
161
162 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
163 hwaddr *nb, uint16_t leaf,
164 int level)
165 {
166 PhysPageEntry *p;
167 int i;
168 hwaddr step = (hwaddr)1 << (level * L2_BITS);
169
170 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
171 lp->ptr = phys_map_node_alloc();
172 p = phys_map_nodes[lp->ptr];
173 if (level == 0) {
174 for (i = 0; i < L2_SIZE; i++) {
175 p[i].is_leaf = 1;
176 p[i].ptr = phys_section_unassigned;
177 }
178 }
179 } else {
180 p = phys_map_nodes[lp->ptr];
181 }
182 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
183
184 while (*nb && lp < &p[L2_SIZE]) {
185 if ((*index & (step - 1)) == 0 && *nb >= step) {
186 lp->is_leaf = true;
187 lp->ptr = leaf;
188 *index += step;
189 *nb -= step;
190 } else {
191 phys_page_set_level(lp, index, nb, leaf, level - 1);
192 }
193 ++lp;
194 }
195 }
196
197 static void phys_page_set(AddressSpaceDispatch *d,
198 hwaddr index, hwaddr nb,
199 uint16_t leaf)
200 {
201 /* Wildly overreserve - it doesn't matter much. */
202 phys_map_node_reserve(3 * P_L2_LEVELS);
203
204 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
205 }
206
207 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
208 {
209 PhysPageEntry lp = d->phys_map;
210 PhysPageEntry *p;
211 int i;
212
213 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
214 if (lp.ptr == PHYS_MAP_NODE_NIL) {
215 return &phys_sections[phys_section_unassigned];
216 }
217 p = phys_map_nodes[lp.ptr];
218 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
219 }
220 return &phys_sections[lp.ptr];
221 }
222
223 bool memory_region_is_unassigned(MemoryRegion *mr)
224 {
225 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
226 && mr != &io_mem_watch;
227 }
228
229 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
230 hwaddr addr,
231 bool resolve_subpage)
232 {
233 MemoryRegionSection *section;
234 subpage_t *subpage;
235
236 section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
237 if (resolve_subpage && section->mr->subpage) {
238 subpage = container_of(section->mr, subpage_t, iomem);
239 section = &phys_sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
240 }
241 return section;
242 }
243
244 static MemoryRegionSection *
245 address_space_translate_internal(AddressSpace *as, hwaddr addr, hwaddr *xlat,
246 hwaddr *plen, bool resolve_subpage)
247 {
248 MemoryRegionSection *section;
249 Int128 diff;
250
251 section = address_space_lookup_region(as, addr, resolve_subpage);
252 /* Compute offset within MemoryRegionSection */
253 addr -= section->offset_within_address_space;
254
255 /* Compute offset within MemoryRegion */
256 *xlat = addr + section->offset_within_region;
257
258 diff = int128_sub(section->mr->size, int128_make64(addr));
259 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
260 return section;
261 }
262
263 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
264 hwaddr *xlat, hwaddr *plen,
265 bool is_write)
266 {
267 return address_space_translate_internal(as, addr, xlat, plen, true);
268 }
269
270 MemoryRegionSection *
271 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
272 hwaddr *plen)
273 {
274 return address_space_translate_internal(as, addr, xlat, plen, false);
275 }
276 #endif
277
278 void cpu_exec_init_all(void)
279 {
280 #if !defined(CONFIG_USER_ONLY)
281 qemu_mutex_init(&ram_list.mutex);
282 memory_map_init();
283 io_mem_init();
284 #endif
285 }
286
287 #if !defined(CONFIG_USER_ONLY)
288
289 static int cpu_common_post_load(void *opaque, int version_id)
290 {
291 CPUState *cpu = opaque;
292
293 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
294 version_id is increased. */
295 cpu->interrupt_request &= ~0x01;
296 tlb_flush(cpu->env_ptr, 1);
297
298 return 0;
299 }
300
301 static const VMStateDescription vmstate_cpu_common = {
302 .name = "cpu_common",
303 .version_id = 1,
304 .minimum_version_id = 1,
305 .minimum_version_id_old = 1,
306 .post_load = cpu_common_post_load,
307 .fields = (VMStateField []) {
308 VMSTATE_UINT32(halted, CPUState),
309 VMSTATE_UINT32(interrupt_request, CPUState),
310 VMSTATE_END_OF_LIST()
311 }
312 };
313 #else
314 #define vmstate_cpu_common vmstate_dummy
315 #endif
316
317 CPUState *qemu_get_cpu(int index)
318 {
319 CPUArchState *env = first_cpu;
320 CPUState *cpu = NULL;
321
322 while (env) {
323 cpu = ENV_GET_CPU(env);
324 if (cpu->cpu_index == index) {
325 break;
326 }
327 env = env->next_cpu;
328 }
329
330 return env ? cpu : NULL;
331 }
332
333 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
334 {
335 CPUArchState *env = first_cpu;
336
337 while (env) {
338 func(ENV_GET_CPU(env), data);
339 env = env->next_cpu;
340 }
341 }
342
343 void cpu_exec_init(CPUArchState *env)
344 {
345 CPUState *cpu = ENV_GET_CPU(env);
346 CPUClass *cc = CPU_GET_CLASS(cpu);
347 CPUArchState **penv;
348 int cpu_index;
349
350 #if defined(CONFIG_USER_ONLY)
351 cpu_list_lock();
352 #endif
353 env->next_cpu = NULL;
354 penv = &first_cpu;
355 cpu_index = 0;
356 while (*penv != NULL) {
357 penv = &(*penv)->next_cpu;
358 cpu_index++;
359 }
360 cpu->cpu_index = cpu_index;
361 cpu->numa_node = 0;
362 QTAILQ_INIT(&env->breakpoints);
363 QTAILQ_INIT(&env->watchpoints);
364 #ifndef CONFIG_USER_ONLY
365 cpu->thread_id = qemu_get_thread_id();
366 #endif
367 *penv = env;
368 #if defined(CONFIG_USER_ONLY)
369 cpu_list_unlock();
370 #endif
371 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
372 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
373 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
374 cpu_save, cpu_load, env);
375 assert(cc->vmsd == NULL);
376 #endif
377 if (cc->vmsd != NULL) {
378 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
379 }
380 }
381
382 #if defined(TARGET_HAS_ICE)
383 #if defined(CONFIG_USER_ONLY)
384 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
385 {
386 tb_invalidate_phys_page_range(pc, pc + 1, 0);
387 }
388 #else
389 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
390 {
391 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
392 (pc & ~TARGET_PAGE_MASK));
393 }
394 #endif
395 #endif /* TARGET_HAS_ICE */
396
397 #if defined(CONFIG_USER_ONLY)
398 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
399
400 {
401 }
402
403 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
404 int flags, CPUWatchpoint **watchpoint)
405 {
406 return -ENOSYS;
407 }
408 #else
409 /* Add a watchpoint. */
410 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
411 int flags, CPUWatchpoint **watchpoint)
412 {
413 target_ulong len_mask = ~(len - 1);
414 CPUWatchpoint *wp;
415
416 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
417 if ((len & (len - 1)) || (addr & ~len_mask) ||
418 len == 0 || len > TARGET_PAGE_SIZE) {
419 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
420 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
421 return -EINVAL;
422 }
423 wp = g_malloc(sizeof(*wp));
424
425 wp->vaddr = addr;
426 wp->len_mask = len_mask;
427 wp->flags = flags;
428
429 /* keep all GDB-injected watchpoints in front */
430 if (flags & BP_GDB)
431 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
432 else
433 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
434
435 tlb_flush_page(env, addr);
436
437 if (watchpoint)
438 *watchpoint = wp;
439 return 0;
440 }
441
442 /* Remove a specific watchpoint. */
443 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
444 int flags)
445 {
446 target_ulong len_mask = ~(len - 1);
447 CPUWatchpoint *wp;
448
449 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
450 if (addr == wp->vaddr && len_mask == wp->len_mask
451 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
452 cpu_watchpoint_remove_by_ref(env, wp);
453 return 0;
454 }
455 }
456 return -ENOENT;
457 }
458
459 /* Remove a specific watchpoint by reference. */
460 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
461 {
462 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
463
464 tlb_flush_page(env, watchpoint->vaddr);
465
466 g_free(watchpoint);
467 }
468
469 /* Remove all matching watchpoints. */
470 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
471 {
472 CPUWatchpoint *wp, *next;
473
474 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
475 if (wp->flags & mask)
476 cpu_watchpoint_remove_by_ref(env, wp);
477 }
478 }
479 #endif
480
481 /* Add a breakpoint. */
482 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
483 CPUBreakpoint **breakpoint)
484 {
485 #if defined(TARGET_HAS_ICE)
486 CPUBreakpoint *bp;
487
488 bp = g_malloc(sizeof(*bp));
489
490 bp->pc = pc;
491 bp->flags = flags;
492
493 /* keep all GDB-injected breakpoints in front */
494 if (flags & BP_GDB)
495 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
496 else
497 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
498
499 breakpoint_invalidate(env, pc);
500
501 if (breakpoint)
502 *breakpoint = bp;
503 return 0;
504 #else
505 return -ENOSYS;
506 #endif
507 }
508
509 /* Remove a specific breakpoint. */
510 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
511 {
512 #if defined(TARGET_HAS_ICE)
513 CPUBreakpoint *bp;
514
515 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
516 if (bp->pc == pc && bp->flags == flags) {
517 cpu_breakpoint_remove_by_ref(env, bp);
518 return 0;
519 }
520 }
521 return -ENOENT;
522 #else
523 return -ENOSYS;
524 #endif
525 }
526
527 /* Remove a specific breakpoint by reference. */
528 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
529 {
530 #if defined(TARGET_HAS_ICE)
531 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
532
533 breakpoint_invalidate(env, breakpoint->pc);
534
535 g_free(breakpoint);
536 #endif
537 }
538
539 /* Remove all matching breakpoints. */
540 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
541 {
542 #if defined(TARGET_HAS_ICE)
543 CPUBreakpoint *bp, *next;
544
545 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
546 if (bp->flags & mask)
547 cpu_breakpoint_remove_by_ref(env, bp);
548 }
549 #endif
550 }
551
552 /* enable or disable single step mode. EXCP_DEBUG is returned by the
553 CPU loop after each instruction */
554 void cpu_single_step(CPUArchState *env, int enabled)
555 {
556 #if defined(TARGET_HAS_ICE)
557 if (env->singlestep_enabled != enabled) {
558 env->singlestep_enabled = enabled;
559 if (kvm_enabled())
560 kvm_update_guest_debug(env, 0);
561 else {
562 /* must flush all the translated code to avoid inconsistencies */
563 /* XXX: only flush what is necessary */
564 tb_flush(env);
565 }
566 }
567 #endif
568 }
569
570 void cpu_exit(CPUArchState *env)
571 {
572 CPUState *cpu = ENV_GET_CPU(env);
573
574 cpu->exit_request = 1;
575 cpu->tcg_exit_req = 1;
576 }
577
578 void cpu_abort(CPUArchState *env, const char *fmt, ...)
579 {
580 va_list ap;
581 va_list ap2;
582
583 va_start(ap, fmt);
584 va_copy(ap2, ap);
585 fprintf(stderr, "qemu: fatal: ");
586 vfprintf(stderr, fmt, ap);
587 fprintf(stderr, "\n");
588 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
589 if (qemu_log_enabled()) {
590 qemu_log("qemu: fatal: ");
591 qemu_log_vprintf(fmt, ap2);
592 qemu_log("\n");
593 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
594 qemu_log_flush();
595 qemu_log_close();
596 }
597 va_end(ap2);
598 va_end(ap);
599 #if defined(CONFIG_USER_ONLY)
600 {
601 struct sigaction act;
602 sigfillset(&act.sa_mask);
603 act.sa_handler = SIG_DFL;
604 sigaction(SIGABRT, &act, NULL);
605 }
606 #endif
607 abort();
608 }
609
610 CPUArchState *cpu_copy(CPUArchState *env)
611 {
612 CPUArchState *new_env = cpu_init(env->cpu_model_str);
613 CPUArchState *next_cpu = new_env->next_cpu;
614 #if defined(TARGET_HAS_ICE)
615 CPUBreakpoint *bp;
616 CPUWatchpoint *wp;
617 #endif
618
619 memcpy(new_env, env, sizeof(CPUArchState));
620
621 /* Preserve chaining. */
622 new_env->next_cpu = next_cpu;
623
624 /* Clone all break/watchpoints.
625 Note: Once we support ptrace with hw-debug register access, make sure
626 BP_CPU break/watchpoints are handled correctly on clone. */
627 QTAILQ_INIT(&env->breakpoints);
628 QTAILQ_INIT(&env->watchpoints);
629 #if defined(TARGET_HAS_ICE)
630 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
631 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
632 }
633 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
634 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
635 wp->flags, NULL);
636 }
637 #endif
638
639 return new_env;
640 }
641
642 #if !defined(CONFIG_USER_ONLY)
643 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
644 uintptr_t length)
645 {
646 uintptr_t start1;
647
648 /* we modify the TLB cache so that the dirty bit will be set again
649 when accessing the range */
650 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
651 /* Check that we don't span multiple blocks - this breaks the
652 address comparisons below. */
653 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
654 != (end - 1) - start) {
655 abort();
656 }
657 cpu_tlb_reset_dirty_all(start1, length);
658
659 }
660
661 /* Note: start and end must be within the same ram block. */
662 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
663 int dirty_flags)
664 {
665 uintptr_t length;
666
667 start &= TARGET_PAGE_MASK;
668 end = TARGET_PAGE_ALIGN(end);
669
670 length = end - start;
671 if (length == 0)
672 return;
673 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
674
675 if (tcg_enabled()) {
676 tlb_reset_dirty_range_all(start, end, length);
677 }
678 }
679
680 static int cpu_physical_memory_set_dirty_tracking(int enable)
681 {
682 int ret = 0;
683 in_migration = enable;
684 return ret;
685 }
686
687 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
688 MemoryRegionSection *section,
689 target_ulong vaddr,
690 hwaddr paddr, hwaddr xlat,
691 int prot,
692 target_ulong *address)
693 {
694 hwaddr iotlb;
695 CPUWatchpoint *wp;
696
697 if (memory_region_is_ram(section->mr)) {
698 /* Normal RAM. */
699 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
700 + xlat;
701 if (!section->readonly) {
702 iotlb |= phys_section_notdirty;
703 } else {
704 iotlb |= phys_section_rom;
705 }
706 } else {
707 iotlb = section - phys_sections;
708 iotlb += xlat;
709 }
710
711 /* Make accesses to pages with watchpoints go via the
712 watchpoint trap routines. */
713 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
714 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
715 /* Avoid trapping reads of pages with a write breakpoint. */
716 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
717 iotlb = phys_section_watch + paddr;
718 *address |= TLB_MMIO;
719 break;
720 }
721 }
722 }
723
724 return iotlb;
725 }
726 #endif /* defined(CONFIG_USER_ONLY) */
727
728 #if !defined(CONFIG_USER_ONLY)
729
730 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
731 uint16_t section);
732 static subpage_t *subpage_init(hwaddr base);
733 static void destroy_page_desc(uint16_t section_index)
734 {
735 MemoryRegionSection *section = &phys_sections[section_index];
736 MemoryRegion *mr = section->mr;
737
738 if (mr->subpage) {
739 subpage_t *subpage = container_of(mr, subpage_t, iomem);
740 memory_region_destroy(&subpage->iomem);
741 g_free(subpage);
742 }
743 }
744
745 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
746 {
747 unsigned i;
748 PhysPageEntry *p;
749
750 if (lp->ptr == PHYS_MAP_NODE_NIL) {
751 return;
752 }
753
754 p = phys_map_nodes[lp->ptr];
755 for (i = 0; i < L2_SIZE; ++i) {
756 if (!p[i].is_leaf) {
757 destroy_l2_mapping(&p[i], level - 1);
758 } else {
759 destroy_page_desc(p[i].ptr);
760 }
761 }
762 lp->is_leaf = 0;
763 lp->ptr = PHYS_MAP_NODE_NIL;
764 }
765
766 static void destroy_all_mappings(AddressSpaceDispatch *d)
767 {
768 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
769 phys_map_nodes_reset();
770 }
771
772 static uint16_t phys_section_add(MemoryRegionSection *section)
773 {
774 /* The physical section number is ORed with a page-aligned
775 * pointer to produce the iotlb entries. Thus it should
776 * never overflow into the page-aligned value.
777 */
778 assert(phys_sections_nb < TARGET_PAGE_SIZE);
779
780 if (phys_sections_nb == phys_sections_nb_alloc) {
781 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
782 phys_sections = g_renew(MemoryRegionSection, phys_sections,
783 phys_sections_nb_alloc);
784 }
785 phys_sections[phys_sections_nb] = *section;
786 return phys_sections_nb++;
787 }
788
789 static void phys_sections_clear(void)
790 {
791 phys_sections_nb = 0;
792 }
793
794 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
795 {
796 subpage_t *subpage;
797 hwaddr base = section->offset_within_address_space
798 & TARGET_PAGE_MASK;
799 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
800 MemoryRegionSection subsection = {
801 .offset_within_address_space = base,
802 .size = TARGET_PAGE_SIZE,
803 };
804 hwaddr start, end;
805
806 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
807
808 if (!(existing->mr->subpage)) {
809 subpage = subpage_init(base);
810 subsection.mr = &subpage->iomem;
811 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
812 phys_section_add(&subsection));
813 } else {
814 subpage = container_of(existing->mr, subpage_t, iomem);
815 }
816 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
817 end = start + section->size - 1;
818 subpage_register(subpage, start, end, phys_section_add(section));
819 }
820
821
822 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
823 {
824 hwaddr start_addr = section->offset_within_address_space;
825 ram_addr_t size = section->size;
826 hwaddr addr;
827 uint16_t section_index = phys_section_add(section);
828
829 assert(size);
830
831 addr = start_addr;
832 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
833 section_index);
834 }
835
836 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
837
838 static MemoryRegionSection limit(MemoryRegionSection section)
839 {
840 section.size = MIN(section.offset_within_address_space + section.size,
841 MAX_PHYS_ADDR + 1)
842 - section.offset_within_address_space;
843
844 return section;
845 }
846
847 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
848 {
849 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
850 MemoryRegionSection now = limit(*section), remain = limit(*section);
851
852 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
853 || (now.size < TARGET_PAGE_SIZE)) {
854 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
855 - now.offset_within_address_space,
856 now.size);
857 register_subpage(d, &now);
858 remain.size -= now.size;
859 remain.offset_within_address_space += now.size;
860 remain.offset_within_region += now.size;
861 }
862 while (remain.size >= TARGET_PAGE_SIZE) {
863 now = remain;
864 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
865 now.size = TARGET_PAGE_SIZE;
866 register_subpage(d, &now);
867 } else {
868 now.size &= TARGET_PAGE_MASK;
869 register_multipage(d, &now);
870 }
871 remain.size -= now.size;
872 remain.offset_within_address_space += now.size;
873 remain.offset_within_region += now.size;
874 }
875 now = remain;
876 if (now.size) {
877 register_subpage(d, &now);
878 }
879 }
880
881 void qemu_flush_coalesced_mmio_buffer(void)
882 {
883 if (kvm_enabled())
884 kvm_flush_coalesced_mmio_buffer();
885 }
886
887 void qemu_mutex_lock_ramlist(void)
888 {
889 qemu_mutex_lock(&ram_list.mutex);
890 }
891
892 void qemu_mutex_unlock_ramlist(void)
893 {
894 qemu_mutex_unlock(&ram_list.mutex);
895 }
896
897 #if defined(__linux__) && !defined(TARGET_S390X)
898
899 #include <sys/vfs.h>
900
901 #define HUGETLBFS_MAGIC 0x958458f6
902
903 static long gethugepagesize(const char *path)
904 {
905 struct statfs fs;
906 int ret;
907
908 do {
909 ret = statfs(path, &fs);
910 } while (ret != 0 && errno == EINTR);
911
912 if (ret != 0) {
913 perror(path);
914 return 0;
915 }
916
917 if (fs.f_type != HUGETLBFS_MAGIC)
918 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
919
920 return fs.f_bsize;
921 }
922
923 static void *file_ram_alloc(RAMBlock *block,
924 ram_addr_t memory,
925 const char *path)
926 {
927 char *filename;
928 char *sanitized_name;
929 char *c;
930 void *area;
931 int fd;
932 #ifdef MAP_POPULATE
933 int flags;
934 #endif
935 unsigned long hpagesize;
936
937 hpagesize = gethugepagesize(path);
938 if (!hpagesize) {
939 return NULL;
940 }
941
942 if (memory < hpagesize) {
943 return NULL;
944 }
945
946 if (kvm_enabled() && !kvm_has_sync_mmu()) {
947 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
948 return NULL;
949 }
950
951 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
952 sanitized_name = g_strdup(block->mr->name);
953 for (c = sanitized_name; *c != '\0'; c++) {
954 if (*c == '/')
955 *c = '_';
956 }
957
958 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
959 sanitized_name);
960 g_free(sanitized_name);
961
962 fd = mkstemp(filename);
963 if (fd < 0) {
964 perror("unable to create backing store for hugepages");
965 g_free(filename);
966 return NULL;
967 }
968 unlink(filename);
969 g_free(filename);
970
971 memory = (memory+hpagesize-1) & ~(hpagesize-1);
972
973 /*
974 * ftruncate is not supported by hugetlbfs in older
975 * hosts, so don't bother bailing out on errors.
976 * If anything goes wrong with it under other filesystems,
977 * mmap will fail.
978 */
979 if (ftruncate(fd, memory))
980 perror("ftruncate");
981
982 #ifdef MAP_POPULATE
983 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
984 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
985 * to sidestep this quirk.
986 */
987 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
988 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
989 #else
990 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
991 #endif
992 if (area == MAP_FAILED) {
993 perror("file_ram_alloc: can't mmap RAM pages");
994 close(fd);
995 return (NULL);
996 }
997 block->fd = fd;
998 return area;
999 }
1000 #endif
1001
1002 static ram_addr_t find_ram_offset(ram_addr_t size)
1003 {
1004 RAMBlock *block, *next_block;
1005 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1006
1007 assert(size != 0); /* it would hand out same offset multiple times */
1008
1009 if (QTAILQ_EMPTY(&ram_list.blocks))
1010 return 0;
1011
1012 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1013 ram_addr_t end, next = RAM_ADDR_MAX;
1014
1015 end = block->offset + block->length;
1016
1017 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1018 if (next_block->offset >= end) {
1019 next = MIN(next, next_block->offset);
1020 }
1021 }
1022 if (next - end >= size && next - end < mingap) {
1023 offset = end;
1024 mingap = next - end;
1025 }
1026 }
1027
1028 if (offset == RAM_ADDR_MAX) {
1029 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1030 (uint64_t)size);
1031 abort();
1032 }
1033
1034 return offset;
1035 }
1036
1037 ram_addr_t last_ram_offset(void)
1038 {
1039 RAMBlock *block;
1040 ram_addr_t last = 0;
1041
1042 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1043 last = MAX(last, block->offset + block->length);
1044
1045 return last;
1046 }
1047
1048 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1049 {
1050 int ret;
1051 QemuOpts *machine_opts;
1052
1053 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1054 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1055 if (machine_opts &&
1056 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1057 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1058 if (ret) {
1059 perror("qemu_madvise");
1060 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1061 "but dump_guest_core=off specified\n");
1062 }
1063 }
1064 }
1065
1066 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1067 {
1068 RAMBlock *new_block, *block;
1069
1070 new_block = NULL;
1071 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1072 if (block->offset == addr) {
1073 new_block = block;
1074 break;
1075 }
1076 }
1077 assert(new_block);
1078 assert(!new_block->idstr[0]);
1079
1080 if (dev) {
1081 char *id = qdev_get_dev_path(dev);
1082 if (id) {
1083 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1084 g_free(id);
1085 }
1086 }
1087 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1088
1089 /* This assumes the iothread lock is taken here too. */
1090 qemu_mutex_lock_ramlist();
1091 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1092 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1093 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1094 new_block->idstr);
1095 abort();
1096 }
1097 }
1098 qemu_mutex_unlock_ramlist();
1099 }
1100
1101 static int memory_try_enable_merging(void *addr, size_t len)
1102 {
1103 QemuOpts *opts;
1104
1105 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1106 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1107 /* disabled by the user */
1108 return 0;
1109 }
1110
1111 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1112 }
1113
1114 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1115 MemoryRegion *mr)
1116 {
1117 RAMBlock *block, *new_block;
1118
1119 size = TARGET_PAGE_ALIGN(size);
1120 new_block = g_malloc0(sizeof(*new_block));
1121
1122 /* This assumes the iothread lock is taken here too. */
1123 qemu_mutex_lock_ramlist();
1124 new_block->mr = mr;
1125 new_block->offset = find_ram_offset(size);
1126 if (host) {
1127 new_block->host = host;
1128 new_block->flags |= RAM_PREALLOC_MASK;
1129 } else {
1130 if (mem_path) {
1131 #if defined (__linux__) && !defined(TARGET_S390X)
1132 new_block->host = file_ram_alloc(new_block, size, mem_path);
1133 if (!new_block->host) {
1134 new_block->host = qemu_anon_ram_alloc(size);
1135 memory_try_enable_merging(new_block->host, size);
1136 }
1137 #else
1138 fprintf(stderr, "-mem-path option unsupported\n");
1139 exit(1);
1140 #endif
1141 } else {
1142 if (xen_enabled()) {
1143 xen_ram_alloc(new_block->offset, size, mr);
1144 } else if (kvm_enabled()) {
1145 /* some s390/kvm configurations have special constraints */
1146 new_block->host = kvm_ram_alloc(size);
1147 } else {
1148 new_block->host = qemu_anon_ram_alloc(size);
1149 }
1150 memory_try_enable_merging(new_block->host, size);
1151 }
1152 }
1153 new_block->length = size;
1154
1155 /* Keep the list sorted from biggest to smallest block. */
1156 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1157 if (block->length < new_block->length) {
1158 break;
1159 }
1160 }
1161 if (block) {
1162 QTAILQ_INSERT_BEFORE(block, new_block, next);
1163 } else {
1164 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1165 }
1166 ram_list.mru_block = NULL;
1167
1168 ram_list.version++;
1169 qemu_mutex_unlock_ramlist();
1170
1171 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1172 last_ram_offset() >> TARGET_PAGE_BITS);
1173 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1174 0, size >> TARGET_PAGE_BITS);
1175 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1176
1177 qemu_ram_setup_dump(new_block->host, size);
1178 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1179
1180 if (kvm_enabled())
1181 kvm_setup_guest_memory(new_block->host, size);
1182
1183 return new_block->offset;
1184 }
1185
1186 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1187 {
1188 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1189 }
1190
1191 void qemu_ram_free_from_ptr(ram_addr_t addr)
1192 {
1193 RAMBlock *block;
1194
1195 /* This assumes the iothread lock is taken here too. */
1196 qemu_mutex_lock_ramlist();
1197 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1198 if (addr == block->offset) {
1199 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1200 ram_list.mru_block = NULL;
1201 ram_list.version++;
1202 g_free(block);
1203 break;
1204 }
1205 }
1206 qemu_mutex_unlock_ramlist();
1207 }
1208
1209 void qemu_ram_free(ram_addr_t addr)
1210 {
1211 RAMBlock *block;
1212
1213 /* This assumes the iothread lock is taken here too. */
1214 qemu_mutex_lock_ramlist();
1215 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1216 if (addr == block->offset) {
1217 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1218 ram_list.mru_block = NULL;
1219 ram_list.version++;
1220 if (block->flags & RAM_PREALLOC_MASK) {
1221 ;
1222 } else if (mem_path) {
1223 #if defined (__linux__) && !defined(TARGET_S390X)
1224 if (block->fd) {
1225 munmap(block->host, block->length);
1226 close(block->fd);
1227 } else {
1228 qemu_anon_ram_free(block->host, block->length);
1229 }
1230 #else
1231 abort();
1232 #endif
1233 } else {
1234 if (xen_enabled()) {
1235 xen_invalidate_map_cache_entry(block->host);
1236 } else {
1237 qemu_anon_ram_free(block->host, block->length);
1238 }
1239 }
1240 g_free(block);
1241 break;
1242 }
1243 }
1244 qemu_mutex_unlock_ramlist();
1245
1246 }
1247
1248 #ifndef _WIN32
1249 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1250 {
1251 RAMBlock *block;
1252 ram_addr_t offset;
1253 int flags;
1254 void *area, *vaddr;
1255
1256 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1257 offset = addr - block->offset;
1258 if (offset < block->length) {
1259 vaddr = block->host + offset;
1260 if (block->flags & RAM_PREALLOC_MASK) {
1261 ;
1262 } else {
1263 flags = MAP_FIXED;
1264 munmap(vaddr, length);
1265 if (mem_path) {
1266 #if defined(__linux__) && !defined(TARGET_S390X)
1267 if (block->fd) {
1268 #ifdef MAP_POPULATE
1269 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1270 MAP_PRIVATE;
1271 #else
1272 flags |= MAP_PRIVATE;
1273 #endif
1274 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1275 flags, block->fd, offset);
1276 } else {
1277 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1278 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1279 flags, -1, 0);
1280 }
1281 #else
1282 abort();
1283 #endif
1284 } else {
1285 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1286 flags |= MAP_SHARED | MAP_ANONYMOUS;
1287 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1288 flags, -1, 0);
1289 #else
1290 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1291 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1292 flags, -1, 0);
1293 #endif
1294 }
1295 if (area != vaddr) {
1296 fprintf(stderr, "Could not remap addr: "
1297 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1298 length, addr);
1299 exit(1);
1300 }
1301 memory_try_enable_merging(vaddr, length);
1302 qemu_ram_setup_dump(vaddr, length);
1303 }
1304 return;
1305 }
1306 }
1307 }
1308 #endif /* !_WIN32 */
1309
1310 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1311 With the exception of the softmmu code in this file, this should
1312 only be used for local memory (e.g. video ram) that the device owns,
1313 and knows it isn't going to access beyond the end of the block.
1314
1315 It should not be used for general purpose DMA.
1316 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1317 */
1318 void *qemu_get_ram_ptr(ram_addr_t addr)
1319 {
1320 RAMBlock *block;
1321
1322 /* The list is protected by the iothread lock here. */
1323 block = ram_list.mru_block;
1324 if (block && addr - block->offset < block->length) {
1325 goto found;
1326 }
1327 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1328 if (addr - block->offset < block->length) {
1329 goto found;
1330 }
1331 }
1332
1333 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1334 abort();
1335
1336 found:
1337 ram_list.mru_block = block;
1338 if (xen_enabled()) {
1339 /* We need to check if the requested address is in the RAM
1340 * because we don't want to map the entire memory in QEMU.
1341 * In that case just map until the end of the page.
1342 */
1343 if (block->offset == 0) {
1344 return xen_map_cache(addr, 0, 0);
1345 } else if (block->host == NULL) {
1346 block->host =
1347 xen_map_cache(block->offset, block->length, 1);
1348 }
1349 }
1350 return block->host + (addr - block->offset);
1351 }
1352
1353 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1354 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1355 *
1356 * ??? Is this still necessary?
1357 */
1358 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1359 {
1360 RAMBlock *block;
1361
1362 /* The list is protected by the iothread lock here. */
1363 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1364 if (addr - block->offset < block->length) {
1365 if (xen_enabled()) {
1366 /* We need to check if the requested address is in the RAM
1367 * because we don't want to map the entire memory in QEMU.
1368 * In that case just map until the end of the page.
1369 */
1370 if (block->offset == 0) {
1371 return xen_map_cache(addr, 0, 0);
1372 } else if (block->host == NULL) {
1373 block->host =
1374 xen_map_cache(block->offset, block->length, 1);
1375 }
1376 }
1377 return block->host + (addr - block->offset);
1378 }
1379 }
1380
1381 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1382 abort();
1383
1384 return NULL;
1385 }
1386
1387 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1388 * but takes a size argument */
1389 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1390 {
1391 if (*size == 0) {
1392 return NULL;
1393 }
1394 if (xen_enabled()) {
1395 return xen_map_cache(addr, *size, 1);
1396 } else {
1397 RAMBlock *block;
1398
1399 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1400 if (addr - block->offset < block->length) {
1401 if (addr - block->offset + *size > block->length)
1402 *size = block->length - addr + block->offset;
1403 return block->host + (addr - block->offset);
1404 }
1405 }
1406
1407 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1408 abort();
1409 }
1410 }
1411
1412 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1413 {
1414 RAMBlock *block;
1415 uint8_t *host = ptr;
1416
1417 if (xen_enabled()) {
1418 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1419 return 0;
1420 }
1421
1422 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1423 /* This case append when the block is not mapped. */
1424 if (block->host == NULL) {
1425 continue;
1426 }
1427 if (host - block->host < block->length) {
1428 *ram_addr = block->offset + (host - block->host);
1429 return 0;
1430 }
1431 }
1432
1433 return -1;
1434 }
1435
1436 /* Some of the softmmu routines need to translate from a host pointer
1437 (typically a TLB entry) back to a ram offset. */
1438 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1439 {
1440 ram_addr_t ram_addr;
1441
1442 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1443 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1444 abort();
1445 }
1446 return ram_addr;
1447 }
1448
1449 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1450 uint64_t val, unsigned size)
1451 {
1452 int dirty_flags;
1453 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1454 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1455 tb_invalidate_phys_page_fast(ram_addr, size);
1456 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1457 }
1458 switch (size) {
1459 case 1:
1460 stb_p(qemu_get_ram_ptr(ram_addr), val);
1461 break;
1462 case 2:
1463 stw_p(qemu_get_ram_ptr(ram_addr), val);
1464 break;
1465 case 4:
1466 stl_p(qemu_get_ram_ptr(ram_addr), val);
1467 break;
1468 default:
1469 abort();
1470 }
1471 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1472 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1473 /* we remove the notdirty callback only if the code has been
1474 flushed */
1475 if (dirty_flags == 0xff)
1476 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1477 }
1478
1479 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1480 unsigned size, bool is_write)
1481 {
1482 return is_write;
1483 }
1484
1485 static const MemoryRegionOps notdirty_mem_ops = {
1486 .write = notdirty_mem_write,
1487 .valid.accepts = notdirty_mem_accepts,
1488 .endianness = DEVICE_NATIVE_ENDIAN,
1489 };
1490
1491 /* Generate a debug exception if a watchpoint has been hit. */
1492 static void check_watchpoint(int offset, int len_mask, int flags)
1493 {
1494 CPUArchState *env = cpu_single_env;
1495 target_ulong pc, cs_base;
1496 target_ulong vaddr;
1497 CPUWatchpoint *wp;
1498 int cpu_flags;
1499
1500 if (env->watchpoint_hit) {
1501 /* We re-entered the check after replacing the TB. Now raise
1502 * the debug interrupt so that is will trigger after the
1503 * current instruction. */
1504 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1505 return;
1506 }
1507 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1508 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1509 if ((vaddr == (wp->vaddr & len_mask) ||
1510 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1511 wp->flags |= BP_WATCHPOINT_HIT;
1512 if (!env->watchpoint_hit) {
1513 env->watchpoint_hit = wp;
1514 tb_check_watchpoint(env);
1515 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1516 env->exception_index = EXCP_DEBUG;
1517 cpu_loop_exit(env);
1518 } else {
1519 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1520 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1521 cpu_resume_from_signal(env, NULL);
1522 }
1523 }
1524 } else {
1525 wp->flags &= ~BP_WATCHPOINT_HIT;
1526 }
1527 }
1528 }
1529
1530 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1531 so these check for a hit then pass through to the normal out-of-line
1532 phys routines. */
1533 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1534 unsigned size)
1535 {
1536 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1537 switch (size) {
1538 case 1: return ldub_phys(addr);
1539 case 2: return lduw_phys(addr);
1540 case 4: return ldl_phys(addr);
1541 default: abort();
1542 }
1543 }
1544
1545 static void watch_mem_write(void *opaque, hwaddr addr,
1546 uint64_t val, unsigned size)
1547 {
1548 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1549 switch (size) {
1550 case 1:
1551 stb_phys(addr, val);
1552 break;
1553 case 2:
1554 stw_phys(addr, val);
1555 break;
1556 case 4:
1557 stl_phys(addr, val);
1558 break;
1559 default: abort();
1560 }
1561 }
1562
1563 static const MemoryRegionOps watch_mem_ops = {
1564 .read = watch_mem_read,
1565 .write = watch_mem_write,
1566 .endianness = DEVICE_NATIVE_ENDIAN,
1567 };
1568
1569 static uint64_t subpage_read(void *opaque, hwaddr addr,
1570 unsigned len)
1571 {
1572 subpage_t *mmio = opaque;
1573 unsigned int idx = SUBPAGE_IDX(addr);
1574 uint64_t val;
1575
1576 MemoryRegionSection *section;
1577 #if defined(DEBUG_SUBPAGE)
1578 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1579 mmio, len, addr, idx);
1580 #endif
1581
1582 section = &phys_sections[mmio->sub_section[idx]];
1583 addr += mmio->base;
1584 addr -= section->offset_within_address_space;
1585 addr += section->offset_within_region;
1586 io_mem_read(section->mr, addr, &val, len);
1587 return val;
1588 }
1589
1590 static void subpage_write(void *opaque, hwaddr addr,
1591 uint64_t value, unsigned len)
1592 {
1593 subpage_t *mmio = opaque;
1594 unsigned int idx = SUBPAGE_IDX(addr);
1595 MemoryRegionSection *section;
1596 #if defined(DEBUG_SUBPAGE)
1597 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1598 " idx %d value %"PRIx64"\n",
1599 __func__, mmio, len, addr, idx, value);
1600 #endif
1601
1602 section = &phys_sections[mmio->sub_section[idx]];
1603 addr += mmio->base;
1604 addr -= section->offset_within_address_space;
1605 addr += section->offset_within_region;
1606 io_mem_write(section->mr, addr, value, len);
1607 }
1608
1609 static bool subpage_accepts(void *opaque, hwaddr addr,
1610 unsigned size, bool is_write)
1611 {
1612 subpage_t *mmio = opaque;
1613 unsigned int idx = SUBPAGE_IDX(addr);
1614 MemoryRegionSection *section;
1615 #if defined(DEBUG_SUBPAGE)
1616 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
1617 " idx %d\n", __func__, mmio,
1618 is_write ? 'w' : 'r', len, addr, idx);
1619 #endif
1620
1621 section = &phys_sections[mmio->sub_section[idx]];
1622 addr += mmio->base;
1623 addr -= section->offset_within_address_space;
1624 addr += section->offset_within_region;
1625 return memory_region_access_valid(section->mr, addr, size, is_write);
1626 }
1627
1628 static const MemoryRegionOps subpage_ops = {
1629 .read = subpage_read,
1630 .write = subpage_write,
1631 .valid.accepts = subpage_accepts,
1632 .endianness = DEVICE_NATIVE_ENDIAN,
1633 };
1634
1635 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1636 unsigned size)
1637 {
1638 ram_addr_t raddr = addr;
1639 void *ptr = qemu_get_ram_ptr(raddr);
1640 switch (size) {
1641 case 1: return ldub_p(ptr);
1642 case 2: return lduw_p(ptr);
1643 case 4: return ldl_p(ptr);
1644 default: abort();
1645 }
1646 }
1647
1648 static void subpage_ram_write(void *opaque, hwaddr addr,
1649 uint64_t value, unsigned size)
1650 {
1651 ram_addr_t raddr = addr;
1652 void *ptr = qemu_get_ram_ptr(raddr);
1653 switch (size) {
1654 case 1: return stb_p(ptr, value);
1655 case 2: return stw_p(ptr, value);
1656 case 4: return stl_p(ptr, value);
1657 default: abort();
1658 }
1659 }
1660
1661 static const MemoryRegionOps subpage_ram_ops = {
1662 .read = subpage_ram_read,
1663 .write = subpage_ram_write,
1664 .endianness = DEVICE_NATIVE_ENDIAN,
1665 };
1666
1667 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1668 uint16_t section)
1669 {
1670 int idx, eidx;
1671
1672 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1673 return -1;
1674 idx = SUBPAGE_IDX(start);
1675 eidx = SUBPAGE_IDX(end);
1676 #if defined(DEBUG_SUBPAGE)
1677 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1678 mmio, start, end, idx, eidx, memory);
1679 #endif
1680 if (memory_region_is_ram(phys_sections[section].mr)) {
1681 MemoryRegionSection new_section = phys_sections[section];
1682 new_section.mr = &io_mem_subpage_ram;
1683 section = phys_section_add(&new_section);
1684 }
1685 for (; idx <= eidx; idx++) {
1686 mmio->sub_section[idx] = section;
1687 }
1688
1689 return 0;
1690 }
1691
1692 static subpage_t *subpage_init(hwaddr base)
1693 {
1694 subpage_t *mmio;
1695
1696 mmio = g_malloc0(sizeof(subpage_t));
1697
1698 mmio->base = base;
1699 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1700 "subpage", TARGET_PAGE_SIZE);
1701 mmio->iomem.subpage = true;
1702 #if defined(DEBUG_SUBPAGE)
1703 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1704 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1705 #endif
1706 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1707
1708 return mmio;
1709 }
1710
1711 static uint16_t dummy_section(MemoryRegion *mr)
1712 {
1713 MemoryRegionSection section = {
1714 .mr = mr,
1715 .offset_within_address_space = 0,
1716 .offset_within_region = 0,
1717 .size = UINT64_MAX,
1718 };
1719
1720 return phys_section_add(&section);
1721 }
1722
1723 MemoryRegion *iotlb_to_region(hwaddr index)
1724 {
1725 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1726 }
1727
1728 static void io_mem_init(void)
1729 {
1730 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1731 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1732 "unassigned", UINT64_MAX);
1733 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1734 "notdirty", UINT64_MAX);
1735 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1736 "subpage-ram", UINT64_MAX);
1737 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1738 "watch", UINT64_MAX);
1739 }
1740
1741 static void mem_begin(MemoryListener *listener)
1742 {
1743 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1744
1745 destroy_all_mappings(d);
1746 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1747 }
1748
1749 static void core_begin(MemoryListener *listener)
1750 {
1751 phys_sections_clear();
1752 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1753 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1754 phys_section_rom = dummy_section(&io_mem_rom);
1755 phys_section_watch = dummy_section(&io_mem_watch);
1756 }
1757
1758 static void tcg_commit(MemoryListener *listener)
1759 {
1760 CPUArchState *env;
1761
1762 /* since each CPU stores ram addresses in its TLB cache, we must
1763 reset the modified entries */
1764 /* XXX: slow ! */
1765 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1766 tlb_flush(env, 1);
1767 }
1768 }
1769
1770 static void core_log_global_start(MemoryListener *listener)
1771 {
1772 cpu_physical_memory_set_dirty_tracking(1);
1773 }
1774
1775 static void core_log_global_stop(MemoryListener *listener)
1776 {
1777 cpu_physical_memory_set_dirty_tracking(0);
1778 }
1779
1780 static void io_region_add(MemoryListener *listener,
1781 MemoryRegionSection *section)
1782 {
1783 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1784
1785 mrio->mr = section->mr;
1786 mrio->offset = section->offset_within_region;
1787 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1788 section->offset_within_address_space, section->size);
1789 ioport_register(&mrio->iorange);
1790 }
1791
1792 static void io_region_del(MemoryListener *listener,
1793 MemoryRegionSection *section)
1794 {
1795 isa_unassign_ioport(section->offset_within_address_space, section->size);
1796 }
1797
1798 static MemoryListener core_memory_listener = {
1799 .begin = core_begin,
1800 .log_global_start = core_log_global_start,
1801 .log_global_stop = core_log_global_stop,
1802 .priority = 1,
1803 };
1804
1805 static MemoryListener io_memory_listener = {
1806 .region_add = io_region_add,
1807 .region_del = io_region_del,
1808 .priority = 0,
1809 };
1810
1811 static MemoryListener tcg_memory_listener = {
1812 .commit = tcg_commit,
1813 };
1814
1815 void address_space_init_dispatch(AddressSpace *as)
1816 {
1817 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1818
1819 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1820 d->listener = (MemoryListener) {
1821 .begin = mem_begin,
1822 .region_add = mem_add,
1823 .region_nop = mem_add,
1824 .priority = 0,
1825 };
1826 as->dispatch = d;
1827 memory_listener_register(&d->listener, as);
1828 }
1829
1830 void address_space_destroy_dispatch(AddressSpace *as)
1831 {
1832 AddressSpaceDispatch *d = as->dispatch;
1833
1834 memory_listener_unregister(&d->listener);
1835 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1836 g_free(d);
1837 as->dispatch = NULL;
1838 }
1839
1840 static void memory_map_init(void)
1841 {
1842 system_memory = g_malloc(sizeof(*system_memory));
1843 memory_region_init(system_memory, "system", INT64_MAX);
1844 address_space_init(&address_space_memory, system_memory);
1845 address_space_memory.name = "memory";
1846
1847 system_io = g_malloc(sizeof(*system_io));
1848 memory_region_init(system_io, "io", 65536);
1849 address_space_init(&address_space_io, system_io);
1850 address_space_io.name = "I/O";
1851
1852 memory_listener_register(&core_memory_listener, &address_space_memory);
1853 memory_listener_register(&io_memory_listener, &address_space_io);
1854 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1855
1856 dma_context_init(&dma_context_memory, &address_space_memory,
1857 NULL, NULL, NULL);
1858 }
1859
1860 MemoryRegion *get_system_memory(void)
1861 {
1862 return system_memory;
1863 }
1864
1865 MemoryRegion *get_system_io(void)
1866 {
1867 return system_io;
1868 }
1869
1870 #endif /* !defined(CONFIG_USER_ONLY) */
1871
1872 /* physical memory access (slow version, mainly for debug) */
1873 #if defined(CONFIG_USER_ONLY)
1874 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1875 uint8_t *buf, int len, int is_write)
1876 {
1877 int l, flags;
1878 target_ulong page;
1879 void * p;
1880
1881 while (len > 0) {
1882 page = addr & TARGET_PAGE_MASK;
1883 l = (page + TARGET_PAGE_SIZE) - addr;
1884 if (l > len)
1885 l = len;
1886 flags = page_get_flags(page);
1887 if (!(flags & PAGE_VALID))
1888 return -1;
1889 if (is_write) {
1890 if (!(flags & PAGE_WRITE))
1891 return -1;
1892 /* XXX: this code should not depend on lock_user */
1893 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1894 return -1;
1895 memcpy(p, buf, l);
1896 unlock_user(p, addr, l);
1897 } else {
1898 if (!(flags & PAGE_READ))
1899 return -1;
1900 /* XXX: this code should not depend on lock_user */
1901 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1902 return -1;
1903 memcpy(buf, p, l);
1904 unlock_user(p, addr, 0);
1905 }
1906 len -= l;
1907 buf += l;
1908 addr += l;
1909 }
1910 return 0;
1911 }
1912
1913 #else
1914
1915 static void invalidate_and_set_dirty(hwaddr addr,
1916 hwaddr length)
1917 {
1918 if (!cpu_physical_memory_is_dirty(addr)) {
1919 /* invalidate code */
1920 tb_invalidate_phys_page_range(addr, addr + length, 0);
1921 /* set dirty bit */
1922 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1923 }
1924 xen_modified_memory(addr, length);
1925 }
1926
1927 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1928 {
1929 if (memory_region_is_ram(mr)) {
1930 return !(is_write && mr->readonly);
1931 }
1932 if (memory_region_is_romd(mr)) {
1933 return !is_write;
1934 }
1935
1936 return false;
1937 }
1938
1939 static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1940 {
1941 if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1942 return 4;
1943 }
1944 if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1945 return 2;
1946 }
1947 return 1;
1948 }
1949
1950 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1951 int len, bool is_write)
1952 {
1953 hwaddr l;
1954 uint8_t *ptr;
1955 uint64_t val;
1956 hwaddr addr1;
1957 MemoryRegionSection *section;
1958 bool error = false;
1959
1960 while (len > 0) {
1961 l = len;
1962 section = address_space_translate(as, addr, &addr1, &l, is_write);
1963
1964 if (is_write) {
1965 if (!memory_access_is_direct(section->mr, is_write)) {
1966 l = memory_access_size(section->mr, l, addr1);
1967 /* XXX: could force cpu_single_env to NULL to avoid
1968 potential bugs */
1969 if (l == 4) {
1970 /* 32 bit write access */
1971 val = ldl_p(buf);
1972 error |= io_mem_write(section->mr, addr1, val, 4);
1973 } else if (l == 2) {
1974 /* 16 bit write access */
1975 val = lduw_p(buf);
1976 error |= io_mem_write(section->mr, addr1, val, 2);
1977 } else {
1978 /* 8 bit write access */
1979 val = ldub_p(buf);
1980 error |= io_mem_write(section->mr, addr1, val, 1);
1981 }
1982 } else {
1983 addr1 += memory_region_get_ram_addr(section->mr);
1984 /* RAM case */
1985 ptr = qemu_get_ram_ptr(addr1);
1986 memcpy(ptr, buf, l);
1987 invalidate_and_set_dirty(addr1, l);
1988 }
1989 } else {
1990 if (!memory_access_is_direct(section->mr, is_write)) {
1991 /* I/O case */
1992 l = memory_access_size(section->mr, l, addr1);
1993 if (l == 4) {
1994 /* 32 bit read access */
1995 error |= io_mem_read(section->mr, addr1, &val, 4);
1996 stl_p(buf, val);
1997 } else if (l == 2) {
1998 /* 16 bit read access */
1999 error |= io_mem_read(section->mr, addr1, &val, 2);
2000 stw_p(buf, val);
2001 } else {
2002 /* 8 bit read access */
2003 error |= io_mem_read(section->mr, addr1, &val, 1);
2004 stb_p(buf, val);
2005 }
2006 } else {
2007 /* RAM case */
2008 ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
2009 memcpy(buf, ptr, l);
2010 }
2011 }
2012 len -= l;
2013 buf += l;
2014 addr += l;
2015 }
2016
2017 return error;
2018 }
2019
2020 bool address_space_write(AddressSpace *as, hwaddr addr,
2021 const uint8_t *buf, int len)
2022 {
2023 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2024 }
2025
2026 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2027 {
2028 return address_space_rw(as, addr, buf, len, false);
2029 }
2030
2031
2032 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2033 int len, int is_write)
2034 {
2035 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2036 }
2037
2038 /* used for ROM loading : can write in RAM and ROM */
2039 void cpu_physical_memory_write_rom(hwaddr addr,
2040 const uint8_t *buf, int len)
2041 {
2042 hwaddr l;
2043 uint8_t *ptr;
2044 hwaddr addr1;
2045 MemoryRegionSection *section;
2046
2047 while (len > 0) {
2048 l = len;
2049 section = address_space_translate(&address_space_memory,
2050 addr, &addr1, &l, true);
2051
2052 if (!(memory_region_is_ram(section->mr) ||
2053 memory_region_is_romd(section->mr))) {
2054 /* do nothing */
2055 } else {
2056 addr1 += memory_region_get_ram_addr(section->mr);
2057 /* ROM/RAM case */
2058 ptr = qemu_get_ram_ptr(addr1);
2059 memcpy(ptr, buf, l);
2060 invalidate_and_set_dirty(addr1, l);
2061 }
2062 len -= l;
2063 buf += l;
2064 addr += l;
2065 }
2066 }
2067
2068 typedef struct {
2069 void *buffer;
2070 hwaddr addr;
2071 hwaddr len;
2072 } BounceBuffer;
2073
2074 static BounceBuffer bounce;
2075
2076 typedef struct MapClient {
2077 void *opaque;
2078 void (*callback)(void *opaque);
2079 QLIST_ENTRY(MapClient) link;
2080 } MapClient;
2081
2082 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2083 = QLIST_HEAD_INITIALIZER(map_client_list);
2084
2085 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2086 {
2087 MapClient *client = g_malloc(sizeof(*client));
2088
2089 client->opaque = opaque;
2090 client->callback = callback;
2091 QLIST_INSERT_HEAD(&map_client_list, client, link);
2092 return client;
2093 }
2094
2095 static void cpu_unregister_map_client(void *_client)
2096 {
2097 MapClient *client = (MapClient *)_client;
2098
2099 QLIST_REMOVE(client, link);
2100 g_free(client);
2101 }
2102
2103 static void cpu_notify_map_clients(void)
2104 {
2105 MapClient *client;
2106
2107 while (!QLIST_EMPTY(&map_client_list)) {
2108 client = QLIST_FIRST(&map_client_list);
2109 client->callback(client->opaque);
2110 cpu_unregister_map_client(client);
2111 }
2112 }
2113
2114 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2115 {
2116 MemoryRegionSection *section;
2117 hwaddr l, xlat;
2118
2119 while (len > 0) {
2120 l = len;
2121 section = address_space_translate(as, addr, &xlat, &l, is_write);
2122 if (!memory_access_is_direct(section->mr, is_write)) {
2123 l = memory_access_size(section->mr, l, addr);
2124 if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
2125 return false;
2126 }
2127 }
2128
2129 len -= l;
2130 addr += l;
2131 }
2132 return true;
2133 }
2134
2135 /* Map a physical memory region into a host virtual address.
2136 * May map a subset of the requested range, given by and returned in *plen.
2137 * May return NULL if resources needed to perform the mapping are exhausted.
2138 * Use only for reads OR writes - not for read-modify-write operations.
2139 * Use cpu_register_map_client() to know when retrying the map operation is
2140 * likely to succeed.
2141 */
2142 void *address_space_map(AddressSpace *as,
2143 hwaddr addr,
2144 hwaddr *plen,
2145 bool is_write)
2146 {
2147 hwaddr len = *plen;
2148 hwaddr todo = 0;
2149 hwaddr l, xlat;
2150 MemoryRegionSection *section;
2151 ram_addr_t raddr = RAM_ADDR_MAX;
2152 ram_addr_t rlen;
2153 void *ret;
2154
2155 while (len > 0) {
2156 l = len;
2157 section = address_space_translate(as, addr, &xlat, &l, is_write);
2158
2159 if (!memory_access_is_direct(section->mr, is_write)) {
2160 if (todo || bounce.buffer) {
2161 break;
2162 }
2163 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2164 bounce.addr = addr;
2165 bounce.len = l;
2166 if (!is_write) {
2167 address_space_read(as, addr, bounce.buffer, l);
2168 }
2169
2170 *plen = l;
2171 return bounce.buffer;
2172 }
2173 if (!todo) {
2174 raddr = memory_region_get_ram_addr(section->mr) + xlat;
2175 } else {
2176 if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2177 break;
2178 }
2179 }
2180
2181 len -= l;
2182 addr += l;
2183 todo += l;
2184 }
2185 rlen = todo;
2186 ret = qemu_ram_ptr_length(raddr, &rlen);
2187 *plen = rlen;
2188 return ret;
2189 }
2190
2191 /* Unmaps a memory region previously mapped by address_space_map().
2192 * Will also mark the memory as dirty if is_write == 1. access_len gives
2193 * the amount of memory that was actually read or written by the caller.
2194 */
2195 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2196 int is_write, hwaddr access_len)
2197 {
2198 if (buffer != bounce.buffer) {
2199 if (is_write) {
2200 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2201 while (access_len) {
2202 unsigned l;
2203 l = TARGET_PAGE_SIZE;
2204 if (l > access_len)
2205 l = access_len;
2206 invalidate_and_set_dirty(addr1, l);
2207 addr1 += l;
2208 access_len -= l;
2209 }
2210 }
2211 if (xen_enabled()) {
2212 xen_invalidate_map_cache_entry(buffer);
2213 }
2214 return;
2215 }
2216 if (is_write) {
2217 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2218 }
2219 qemu_vfree(bounce.buffer);
2220 bounce.buffer = NULL;
2221 cpu_notify_map_clients();
2222 }
2223
2224 void *cpu_physical_memory_map(hwaddr addr,
2225 hwaddr *plen,
2226 int is_write)
2227 {
2228 return address_space_map(&address_space_memory, addr, plen, is_write);
2229 }
2230
2231 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2232 int is_write, hwaddr access_len)
2233 {
2234 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2235 }
2236
2237 /* warning: addr must be aligned */
2238 static inline uint32_t ldl_phys_internal(hwaddr addr,
2239 enum device_endian endian)
2240 {
2241 uint8_t *ptr;
2242 uint64_t val;
2243 MemoryRegionSection *section;
2244 hwaddr l = 4;
2245 hwaddr addr1;
2246
2247 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2248 false);
2249 if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2250 /* I/O case */
2251 io_mem_read(section->mr, addr1, &val, 4);
2252 #if defined(TARGET_WORDS_BIGENDIAN)
2253 if (endian == DEVICE_LITTLE_ENDIAN) {
2254 val = bswap32(val);
2255 }
2256 #else
2257 if (endian == DEVICE_BIG_ENDIAN) {
2258 val = bswap32(val);
2259 }
2260 #endif
2261 } else {
2262 /* RAM case */
2263 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2264 & TARGET_PAGE_MASK)
2265 + addr1);
2266 switch (endian) {
2267 case DEVICE_LITTLE_ENDIAN:
2268 val = ldl_le_p(ptr);
2269 break;
2270 case DEVICE_BIG_ENDIAN:
2271 val = ldl_be_p(ptr);
2272 break;
2273 default:
2274 val = ldl_p(ptr);
2275 break;
2276 }
2277 }
2278 return val;
2279 }
2280
2281 uint32_t ldl_phys(hwaddr addr)
2282 {
2283 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2284 }
2285
2286 uint32_t ldl_le_phys(hwaddr addr)
2287 {
2288 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2289 }
2290
2291 uint32_t ldl_be_phys(hwaddr addr)
2292 {
2293 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2294 }
2295
2296 /* warning: addr must be aligned */
2297 static inline uint64_t ldq_phys_internal(hwaddr addr,
2298 enum device_endian endian)
2299 {
2300 uint8_t *ptr;
2301 uint64_t val;
2302 MemoryRegionSection *section;
2303 hwaddr l = 8;
2304 hwaddr addr1;
2305
2306 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2307 false);
2308 if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2309 /* I/O case */
2310 io_mem_read(section->mr, addr1, &val, 8);
2311 #if defined(TARGET_WORDS_BIGENDIAN)
2312 if (endian == DEVICE_LITTLE_ENDIAN) {
2313 val = bswap64(val);
2314 }
2315 #else
2316 if (endian == DEVICE_BIG_ENDIAN) {
2317 val = bswap64(val);
2318 }
2319 #endif
2320 } else {
2321 /* RAM case */
2322 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2323 & TARGET_PAGE_MASK)
2324 + addr1);
2325 switch (endian) {
2326 case DEVICE_LITTLE_ENDIAN:
2327 val = ldq_le_p(ptr);
2328 break;
2329 case DEVICE_BIG_ENDIAN:
2330 val = ldq_be_p(ptr);
2331 break;
2332 default:
2333 val = ldq_p(ptr);
2334 break;
2335 }
2336 }
2337 return val;
2338 }
2339
2340 uint64_t ldq_phys(hwaddr addr)
2341 {
2342 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2343 }
2344
2345 uint64_t ldq_le_phys(hwaddr addr)
2346 {
2347 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2348 }
2349
2350 uint64_t ldq_be_phys(hwaddr addr)
2351 {
2352 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2353 }
2354
2355 /* XXX: optimize */
2356 uint32_t ldub_phys(hwaddr addr)
2357 {
2358 uint8_t val;
2359 cpu_physical_memory_read(addr, &val, 1);
2360 return val;
2361 }
2362
2363 /* warning: addr must be aligned */
2364 static inline uint32_t lduw_phys_internal(hwaddr addr,
2365 enum device_endian endian)
2366 {
2367 uint8_t *ptr;
2368 uint64_t val;
2369 MemoryRegionSection *section;
2370 hwaddr l = 2;
2371 hwaddr addr1;
2372
2373 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2374 false);
2375 if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2376 /* I/O case */
2377 io_mem_read(section->mr, addr1, &val, 2);
2378 #if defined(TARGET_WORDS_BIGENDIAN)
2379 if (endian == DEVICE_LITTLE_ENDIAN) {
2380 val = bswap16(val);
2381 }
2382 #else
2383 if (endian == DEVICE_BIG_ENDIAN) {
2384 val = bswap16(val);
2385 }
2386 #endif
2387 } else {
2388 /* RAM case */
2389 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2390 & TARGET_PAGE_MASK)
2391 + addr1);
2392 switch (endian) {
2393 case DEVICE_LITTLE_ENDIAN:
2394 val = lduw_le_p(ptr);
2395 break;
2396 case DEVICE_BIG_ENDIAN:
2397 val = lduw_be_p(ptr);
2398 break;
2399 default:
2400 val = lduw_p(ptr);
2401 break;
2402 }
2403 }
2404 return val;
2405 }
2406
2407 uint32_t lduw_phys(hwaddr addr)
2408 {
2409 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2410 }
2411
2412 uint32_t lduw_le_phys(hwaddr addr)
2413 {
2414 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2415 }
2416
2417 uint32_t lduw_be_phys(hwaddr addr)
2418 {
2419 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2420 }
2421
2422 /* warning: addr must be aligned. The ram page is not masked as dirty
2423 and the code inside is not invalidated. It is useful if the dirty
2424 bits are used to track modified PTEs */
2425 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2426 {
2427 uint8_t *ptr;
2428 MemoryRegionSection *section;
2429 hwaddr l = 4;
2430 hwaddr addr1;
2431
2432 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2433 true);
2434 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2435 io_mem_write(section->mr, addr1, val, 4);
2436 } else {
2437 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2438 ptr = qemu_get_ram_ptr(addr1);
2439 stl_p(ptr, val);
2440
2441 if (unlikely(in_migration)) {
2442 if (!cpu_physical_memory_is_dirty(addr1)) {
2443 /* invalidate code */
2444 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2445 /* set dirty bit */
2446 cpu_physical_memory_set_dirty_flags(
2447 addr1, (0xff & ~CODE_DIRTY_FLAG));
2448 }
2449 }
2450 }
2451 }
2452
2453 /* warning: addr must be aligned */
2454 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2455 enum device_endian endian)
2456 {
2457 uint8_t *ptr;
2458 MemoryRegionSection *section;
2459 hwaddr l = 4;
2460 hwaddr addr1;
2461
2462 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2463 true);
2464 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2465 #if defined(TARGET_WORDS_BIGENDIAN)
2466 if (endian == DEVICE_LITTLE_ENDIAN) {
2467 val = bswap32(val);
2468 }
2469 #else
2470 if (endian == DEVICE_BIG_ENDIAN) {
2471 val = bswap32(val);
2472 }
2473 #endif
2474 io_mem_write(section->mr, addr1, val, 4);
2475 } else {
2476 /* RAM case */
2477 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2478 ptr = qemu_get_ram_ptr(addr1);
2479 switch (endian) {
2480 case DEVICE_LITTLE_ENDIAN:
2481 stl_le_p(ptr, val);
2482 break;
2483 case DEVICE_BIG_ENDIAN:
2484 stl_be_p(ptr, val);
2485 break;
2486 default:
2487 stl_p(ptr, val);
2488 break;
2489 }
2490 invalidate_and_set_dirty(addr1, 4);
2491 }
2492 }
2493
2494 void stl_phys(hwaddr addr, uint32_t val)
2495 {
2496 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2497 }
2498
2499 void stl_le_phys(hwaddr addr, uint32_t val)
2500 {
2501 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2502 }
2503
2504 void stl_be_phys(hwaddr addr, uint32_t val)
2505 {
2506 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2507 }
2508
2509 /* XXX: optimize */
2510 void stb_phys(hwaddr addr, uint32_t val)
2511 {
2512 uint8_t v = val;
2513 cpu_physical_memory_write(addr, &v, 1);
2514 }
2515
2516 /* warning: addr must be aligned */
2517 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2518 enum device_endian endian)
2519 {
2520 uint8_t *ptr;
2521 MemoryRegionSection *section;
2522 hwaddr l = 2;
2523 hwaddr addr1;
2524
2525 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2526 true);
2527 if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2528 #if defined(TARGET_WORDS_BIGENDIAN)
2529 if (endian == DEVICE_LITTLE_ENDIAN) {
2530 val = bswap16(val);
2531 }
2532 #else
2533 if (endian == DEVICE_BIG_ENDIAN) {
2534 val = bswap16(val);
2535 }
2536 #endif
2537 io_mem_write(section->mr, addr1, val, 2);
2538 } else {
2539 /* RAM case */
2540 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2541 ptr = qemu_get_ram_ptr(addr1);
2542 switch (endian) {
2543 case DEVICE_LITTLE_ENDIAN:
2544 stw_le_p(ptr, val);
2545 break;
2546 case DEVICE_BIG_ENDIAN:
2547 stw_be_p(ptr, val);
2548 break;
2549 default:
2550 stw_p(ptr, val);
2551 break;
2552 }
2553 invalidate_and_set_dirty(addr1, 2);
2554 }
2555 }
2556
2557 void stw_phys(hwaddr addr, uint32_t val)
2558 {
2559 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2560 }
2561
2562 void stw_le_phys(hwaddr addr, uint32_t val)
2563 {
2564 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2565 }
2566
2567 void stw_be_phys(hwaddr addr, uint32_t val)
2568 {
2569 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2570 }
2571
2572 /* XXX: optimize */
2573 void stq_phys(hwaddr addr, uint64_t val)
2574 {
2575 val = tswap64(val);
2576 cpu_physical_memory_write(addr, &val, 8);
2577 }
2578
2579 void stq_le_phys(hwaddr addr, uint64_t val)
2580 {
2581 val = cpu_to_le64(val);
2582 cpu_physical_memory_write(addr, &val, 8);
2583 }
2584
2585 void stq_be_phys(hwaddr addr, uint64_t val)
2586 {
2587 val = cpu_to_be64(val);
2588 cpu_physical_memory_write(addr, &val, 8);
2589 }
2590
2591 /* virtual memory access for debug (includes writing to ROM) */
2592 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2593 uint8_t *buf, int len, int is_write)
2594 {
2595 int l;
2596 hwaddr phys_addr;
2597 target_ulong page;
2598
2599 while (len > 0) {
2600 page = addr & TARGET_PAGE_MASK;
2601 phys_addr = cpu_get_phys_page_debug(env, page);
2602 /* if no physical page mapped, return an error */
2603 if (phys_addr == -1)
2604 return -1;
2605 l = (page + TARGET_PAGE_SIZE) - addr;
2606 if (l > len)
2607 l = len;
2608 phys_addr += (addr & ~TARGET_PAGE_MASK);
2609 if (is_write)
2610 cpu_physical_memory_write_rom(phys_addr, buf, l);
2611 else
2612 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2613 len -= l;
2614 buf += l;
2615 addr += l;
2616 }
2617 return 0;
2618 }
2619 #endif
2620
2621 #if !defined(CONFIG_USER_ONLY)
2622
2623 /*
2624 * A helper function for the _utterly broken_ virtio device model to find out if
2625 * it's running on a big endian machine. Don't do this at home kids!
2626 */
2627 bool virtio_is_big_endian(void);
2628 bool virtio_is_big_endian(void)
2629 {
2630 #if defined(TARGET_WORDS_BIGENDIAN)
2631 return true;
2632 #else
2633 return false;
2634 #endif
2635 }
2636
2637 #endif
2638
2639 #ifndef CONFIG_USER_ONLY
2640 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2641 {
2642 MemoryRegionSection *section;
2643 hwaddr l = 1;
2644
2645 section = address_space_translate(&address_space_memory,
2646 phys_addr, &phys_addr, &l, false);
2647
2648 return !(memory_region_is_ram(section->mr) ||
2649 memory_region_is_romd(section->mr));
2650 }
2651 #endif