]> git.proxmox.com Git - qemu.git/blob - exec.c
memory: make section size a 128-bit integer
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_SUBPAGE
54
55 #if !defined(CONFIG_USER_ONLY)
56 int phys_ram_fd;
57 static int in_migration;
58
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
63
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
67
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned;
70
71 #endif
72
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
75 cpu_exec() */
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
80 int use_icount;
81
82 #if !defined(CONFIG_USER_ONLY)
83
84 typedef struct PhysPageEntry PhysPageEntry;
85
86 struct PhysPageEntry {
87 uint16_t is_leaf : 1;
88 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
89 uint16_t ptr : 15;
90 };
91
92 struct AddressSpaceDispatch {
93 /* This is a multi-level map on the physical address space.
94 * The bottom level has pointers to MemoryRegionSections.
95 */
96 PhysPageEntry phys_map;
97 MemoryListener listener;
98 AddressSpace *as;
99 };
100
101 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
102 typedef struct subpage_t {
103 MemoryRegion iomem;
104 AddressSpace *as;
105 hwaddr base;
106 uint16_t sub_section[TARGET_PAGE_SIZE];
107 } subpage_t;
108
109 static MemoryRegionSection *phys_sections;
110 static unsigned phys_sections_nb, phys_sections_nb_alloc;
111 static uint16_t phys_section_unassigned;
112 static uint16_t phys_section_notdirty;
113 static uint16_t phys_section_rom;
114 static uint16_t phys_section_watch;
115
116 /* Simple allocator for PhysPageEntry nodes */
117 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
118 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
119
120 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
121
122 static void io_mem_init(void);
123 static void memory_map_init(void);
124 static void *qemu_safe_ram_ptr(ram_addr_t addr);
125
126 static MemoryRegion io_mem_watch;
127 #endif
128
129 #if !defined(CONFIG_USER_ONLY)
130
131 static void phys_map_node_reserve(unsigned nodes)
132 {
133 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
134 typedef PhysPageEntry Node[L2_SIZE];
135 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
136 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
137 phys_map_nodes_nb + nodes);
138 phys_map_nodes = g_renew(Node, phys_map_nodes,
139 phys_map_nodes_nb_alloc);
140 }
141 }
142
143 static uint16_t phys_map_node_alloc(void)
144 {
145 unsigned i;
146 uint16_t ret;
147
148 ret = phys_map_nodes_nb++;
149 assert(ret != PHYS_MAP_NODE_NIL);
150 assert(ret != phys_map_nodes_nb_alloc);
151 for (i = 0; i < L2_SIZE; ++i) {
152 phys_map_nodes[ret][i].is_leaf = 0;
153 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
154 }
155 return ret;
156 }
157
158 static void phys_map_nodes_reset(void)
159 {
160 phys_map_nodes_nb = 0;
161 }
162
163
164 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
165 hwaddr *nb, uint16_t leaf,
166 int level)
167 {
168 PhysPageEntry *p;
169 int i;
170 hwaddr step = (hwaddr)1 << (level * L2_BITS);
171
172 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
173 lp->ptr = phys_map_node_alloc();
174 p = phys_map_nodes[lp->ptr];
175 if (level == 0) {
176 for (i = 0; i < L2_SIZE; i++) {
177 p[i].is_leaf = 1;
178 p[i].ptr = phys_section_unassigned;
179 }
180 }
181 } else {
182 p = phys_map_nodes[lp->ptr];
183 }
184 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
185
186 while (*nb && lp < &p[L2_SIZE]) {
187 if ((*index & (step - 1)) == 0 && *nb >= step) {
188 lp->is_leaf = true;
189 lp->ptr = leaf;
190 *index += step;
191 *nb -= step;
192 } else {
193 phys_page_set_level(lp, index, nb, leaf, level - 1);
194 }
195 ++lp;
196 }
197 }
198
199 static void phys_page_set(AddressSpaceDispatch *d,
200 hwaddr index, hwaddr nb,
201 uint16_t leaf)
202 {
203 /* Wildly overreserve - it doesn't matter much. */
204 phys_map_node_reserve(3 * P_L2_LEVELS);
205
206 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
207 }
208
209 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
210 {
211 PhysPageEntry lp = d->phys_map;
212 PhysPageEntry *p;
213 int i;
214
215 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
216 if (lp.ptr == PHYS_MAP_NODE_NIL) {
217 return &phys_sections[phys_section_unassigned];
218 }
219 p = phys_map_nodes[lp.ptr];
220 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
221 }
222 return &phys_sections[lp.ptr];
223 }
224
225 bool memory_region_is_unassigned(MemoryRegion *mr)
226 {
227 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
228 && mr != &io_mem_watch;
229 }
230
231 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
232 hwaddr addr,
233 bool resolve_subpage)
234 {
235 MemoryRegionSection *section;
236 subpage_t *subpage;
237
238 section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
239 if (resolve_subpage && section->mr->subpage) {
240 subpage = container_of(section->mr, subpage_t, iomem);
241 section = &phys_sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
242 }
243 return section;
244 }
245
246 static MemoryRegionSection *
247 address_space_translate_internal(AddressSpace *as, hwaddr addr, hwaddr *xlat,
248 hwaddr *plen, bool resolve_subpage)
249 {
250 MemoryRegionSection *section;
251 Int128 diff;
252
253 section = address_space_lookup_region(as, addr, resolve_subpage);
254 /* Compute offset within MemoryRegionSection */
255 addr -= section->offset_within_address_space;
256
257 /* Compute offset within MemoryRegion */
258 *xlat = addr + section->offset_within_region;
259
260 diff = int128_sub(section->mr->size, int128_make64(addr));
261 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
262 return section;
263 }
264
265 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
266 hwaddr *xlat, hwaddr *plen,
267 bool is_write)
268 {
269 return address_space_translate_internal(as, addr, xlat, plen, true)->mr;
270 }
271
272 MemoryRegionSection *
273 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
274 hwaddr *plen)
275 {
276 return address_space_translate_internal(as, addr, xlat, plen, false);
277 }
278 #endif
279
280 void cpu_exec_init_all(void)
281 {
282 #if !defined(CONFIG_USER_ONLY)
283 qemu_mutex_init(&ram_list.mutex);
284 memory_map_init();
285 io_mem_init();
286 #endif
287 }
288
289 #if !defined(CONFIG_USER_ONLY)
290
291 static int cpu_common_post_load(void *opaque, int version_id)
292 {
293 CPUState *cpu = opaque;
294
295 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
296 version_id is increased. */
297 cpu->interrupt_request &= ~0x01;
298 tlb_flush(cpu->env_ptr, 1);
299
300 return 0;
301 }
302
303 static const VMStateDescription vmstate_cpu_common = {
304 .name = "cpu_common",
305 .version_id = 1,
306 .minimum_version_id = 1,
307 .minimum_version_id_old = 1,
308 .post_load = cpu_common_post_load,
309 .fields = (VMStateField []) {
310 VMSTATE_UINT32(halted, CPUState),
311 VMSTATE_UINT32(interrupt_request, CPUState),
312 VMSTATE_END_OF_LIST()
313 }
314 };
315 #else
316 #define vmstate_cpu_common vmstate_dummy
317 #endif
318
319 CPUState *qemu_get_cpu(int index)
320 {
321 CPUArchState *env = first_cpu;
322 CPUState *cpu = NULL;
323
324 while (env) {
325 cpu = ENV_GET_CPU(env);
326 if (cpu->cpu_index == index) {
327 break;
328 }
329 env = env->next_cpu;
330 }
331
332 return env ? cpu : NULL;
333 }
334
335 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
336 {
337 CPUArchState *env = first_cpu;
338
339 while (env) {
340 func(ENV_GET_CPU(env), data);
341 env = env->next_cpu;
342 }
343 }
344
345 void cpu_exec_init(CPUArchState *env)
346 {
347 CPUState *cpu = ENV_GET_CPU(env);
348 CPUClass *cc = CPU_GET_CLASS(cpu);
349 CPUArchState **penv;
350 int cpu_index;
351
352 #if defined(CONFIG_USER_ONLY)
353 cpu_list_lock();
354 #endif
355 env->next_cpu = NULL;
356 penv = &first_cpu;
357 cpu_index = 0;
358 while (*penv != NULL) {
359 penv = &(*penv)->next_cpu;
360 cpu_index++;
361 }
362 cpu->cpu_index = cpu_index;
363 cpu->numa_node = 0;
364 QTAILQ_INIT(&env->breakpoints);
365 QTAILQ_INIT(&env->watchpoints);
366 #ifndef CONFIG_USER_ONLY
367 cpu->thread_id = qemu_get_thread_id();
368 #endif
369 *penv = env;
370 #if defined(CONFIG_USER_ONLY)
371 cpu_list_unlock();
372 #endif
373 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
374 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
375 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
376 cpu_save, cpu_load, env);
377 assert(cc->vmsd == NULL);
378 #endif
379 if (cc->vmsd != NULL) {
380 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
381 }
382 }
383
384 #if defined(TARGET_HAS_ICE)
385 #if defined(CONFIG_USER_ONLY)
386 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
387 {
388 tb_invalidate_phys_page_range(pc, pc + 1, 0);
389 }
390 #else
391 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
392 {
393 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
394 (pc & ~TARGET_PAGE_MASK));
395 }
396 #endif
397 #endif /* TARGET_HAS_ICE */
398
399 #if defined(CONFIG_USER_ONLY)
400 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
401
402 {
403 }
404
405 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
406 int flags, CPUWatchpoint **watchpoint)
407 {
408 return -ENOSYS;
409 }
410 #else
411 /* Add a watchpoint. */
412 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
413 int flags, CPUWatchpoint **watchpoint)
414 {
415 target_ulong len_mask = ~(len - 1);
416 CPUWatchpoint *wp;
417
418 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
419 if ((len & (len - 1)) || (addr & ~len_mask) ||
420 len == 0 || len > TARGET_PAGE_SIZE) {
421 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
422 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
423 return -EINVAL;
424 }
425 wp = g_malloc(sizeof(*wp));
426
427 wp->vaddr = addr;
428 wp->len_mask = len_mask;
429 wp->flags = flags;
430
431 /* keep all GDB-injected watchpoints in front */
432 if (flags & BP_GDB)
433 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
434 else
435 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
436
437 tlb_flush_page(env, addr);
438
439 if (watchpoint)
440 *watchpoint = wp;
441 return 0;
442 }
443
444 /* Remove a specific watchpoint. */
445 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
446 int flags)
447 {
448 target_ulong len_mask = ~(len - 1);
449 CPUWatchpoint *wp;
450
451 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
452 if (addr == wp->vaddr && len_mask == wp->len_mask
453 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
454 cpu_watchpoint_remove_by_ref(env, wp);
455 return 0;
456 }
457 }
458 return -ENOENT;
459 }
460
461 /* Remove a specific watchpoint by reference. */
462 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
463 {
464 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
465
466 tlb_flush_page(env, watchpoint->vaddr);
467
468 g_free(watchpoint);
469 }
470
471 /* Remove all matching watchpoints. */
472 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
473 {
474 CPUWatchpoint *wp, *next;
475
476 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
477 if (wp->flags & mask)
478 cpu_watchpoint_remove_by_ref(env, wp);
479 }
480 }
481 #endif
482
483 /* Add a breakpoint. */
484 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
485 CPUBreakpoint **breakpoint)
486 {
487 #if defined(TARGET_HAS_ICE)
488 CPUBreakpoint *bp;
489
490 bp = g_malloc(sizeof(*bp));
491
492 bp->pc = pc;
493 bp->flags = flags;
494
495 /* keep all GDB-injected breakpoints in front */
496 if (flags & BP_GDB)
497 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
498 else
499 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
500
501 breakpoint_invalidate(env, pc);
502
503 if (breakpoint)
504 *breakpoint = bp;
505 return 0;
506 #else
507 return -ENOSYS;
508 #endif
509 }
510
511 /* Remove a specific breakpoint. */
512 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
513 {
514 #if defined(TARGET_HAS_ICE)
515 CPUBreakpoint *bp;
516
517 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
518 if (bp->pc == pc && bp->flags == flags) {
519 cpu_breakpoint_remove_by_ref(env, bp);
520 return 0;
521 }
522 }
523 return -ENOENT;
524 #else
525 return -ENOSYS;
526 #endif
527 }
528
529 /* Remove a specific breakpoint by reference. */
530 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
531 {
532 #if defined(TARGET_HAS_ICE)
533 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
534
535 breakpoint_invalidate(env, breakpoint->pc);
536
537 g_free(breakpoint);
538 #endif
539 }
540
541 /* Remove all matching breakpoints. */
542 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
543 {
544 #if defined(TARGET_HAS_ICE)
545 CPUBreakpoint *bp, *next;
546
547 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
548 if (bp->flags & mask)
549 cpu_breakpoint_remove_by_ref(env, bp);
550 }
551 #endif
552 }
553
554 /* enable or disable single step mode. EXCP_DEBUG is returned by the
555 CPU loop after each instruction */
556 void cpu_single_step(CPUArchState *env, int enabled)
557 {
558 #if defined(TARGET_HAS_ICE)
559 if (env->singlestep_enabled != enabled) {
560 env->singlestep_enabled = enabled;
561 if (kvm_enabled())
562 kvm_update_guest_debug(env, 0);
563 else {
564 /* must flush all the translated code to avoid inconsistencies */
565 /* XXX: only flush what is necessary */
566 tb_flush(env);
567 }
568 }
569 #endif
570 }
571
572 void cpu_exit(CPUArchState *env)
573 {
574 CPUState *cpu = ENV_GET_CPU(env);
575
576 cpu->exit_request = 1;
577 cpu->tcg_exit_req = 1;
578 }
579
580 void cpu_abort(CPUArchState *env, const char *fmt, ...)
581 {
582 va_list ap;
583 va_list ap2;
584
585 va_start(ap, fmt);
586 va_copy(ap2, ap);
587 fprintf(stderr, "qemu: fatal: ");
588 vfprintf(stderr, fmt, ap);
589 fprintf(stderr, "\n");
590 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
591 if (qemu_log_enabled()) {
592 qemu_log("qemu: fatal: ");
593 qemu_log_vprintf(fmt, ap2);
594 qemu_log("\n");
595 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
596 qemu_log_flush();
597 qemu_log_close();
598 }
599 va_end(ap2);
600 va_end(ap);
601 #if defined(CONFIG_USER_ONLY)
602 {
603 struct sigaction act;
604 sigfillset(&act.sa_mask);
605 act.sa_handler = SIG_DFL;
606 sigaction(SIGABRT, &act, NULL);
607 }
608 #endif
609 abort();
610 }
611
612 CPUArchState *cpu_copy(CPUArchState *env)
613 {
614 CPUArchState *new_env = cpu_init(env->cpu_model_str);
615 CPUArchState *next_cpu = new_env->next_cpu;
616 #if defined(TARGET_HAS_ICE)
617 CPUBreakpoint *bp;
618 CPUWatchpoint *wp;
619 #endif
620
621 memcpy(new_env, env, sizeof(CPUArchState));
622
623 /* Preserve chaining. */
624 new_env->next_cpu = next_cpu;
625
626 /* Clone all break/watchpoints.
627 Note: Once we support ptrace with hw-debug register access, make sure
628 BP_CPU break/watchpoints are handled correctly on clone. */
629 QTAILQ_INIT(&env->breakpoints);
630 QTAILQ_INIT(&env->watchpoints);
631 #if defined(TARGET_HAS_ICE)
632 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
633 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
634 }
635 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
636 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
637 wp->flags, NULL);
638 }
639 #endif
640
641 return new_env;
642 }
643
644 #if !defined(CONFIG_USER_ONLY)
645 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
646 uintptr_t length)
647 {
648 uintptr_t start1;
649
650 /* we modify the TLB cache so that the dirty bit will be set again
651 when accessing the range */
652 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
653 /* Check that we don't span multiple blocks - this breaks the
654 address comparisons below. */
655 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
656 != (end - 1) - start) {
657 abort();
658 }
659 cpu_tlb_reset_dirty_all(start1, length);
660
661 }
662
663 /* Note: start and end must be within the same ram block. */
664 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
665 int dirty_flags)
666 {
667 uintptr_t length;
668
669 start &= TARGET_PAGE_MASK;
670 end = TARGET_PAGE_ALIGN(end);
671
672 length = end - start;
673 if (length == 0)
674 return;
675 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
676
677 if (tcg_enabled()) {
678 tlb_reset_dirty_range_all(start, end, length);
679 }
680 }
681
682 static int cpu_physical_memory_set_dirty_tracking(int enable)
683 {
684 int ret = 0;
685 in_migration = enable;
686 return ret;
687 }
688
689 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
690 MemoryRegionSection *section,
691 target_ulong vaddr,
692 hwaddr paddr, hwaddr xlat,
693 int prot,
694 target_ulong *address)
695 {
696 hwaddr iotlb;
697 CPUWatchpoint *wp;
698
699 if (memory_region_is_ram(section->mr)) {
700 /* Normal RAM. */
701 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
702 + xlat;
703 if (!section->readonly) {
704 iotlb |= phys_section_notdirty;
705 } else {
706 iotlb |= phys_section_rom;
707 }
708 } else {
709 iotlb = section - phys_sections;
710 iotlb += xlat;
711 }
712
713 /* Make accesses to pages with watchpoints go via the
714 watchpoint trap routines. */
715 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
716 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
717 /* Avoid trapping reads of pages with a write breakpoint. */
718 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
719 iotlb = phys_section_watch + paddr;
720 *address |= TLB_MMIO;
721 break;
722 }
723 }
724 }
725
726 return iotlb;
727 }
728 #endif /* defined(CONFIG_USER_ONLY) */
729
730 #if !defined(CONFIG_USER_ONLY)
731
732 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
733 uint16_t section);
734 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
735 static void destroy_page_desc(uint16_t section_index)
736 {
737 MemoryRegionSection *section = &phys_sections[section_index];
738 MemoryRegion *mr = section->mr;
739
740 if (mr->subpage) {
741 subpage_t *subpage = container_of(mr, subpage_t, iomem);
742 memory_region_destroy(&subpage->iomem);
743 g_free(subpage);
744 }
745 }
746
747 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
748 {
749 unsigned i;
750 PhysPageEntry *p;
751
752 if (lp->ptr == PHYS_MAP_NODE_NIL) {
753 return;
754 }
755
756 p = phys_map_nodes[lp->ptr];
757 for (i = 0; i < L2_SIZE; ++i) {
758 if (!p[i].is_leaf) {
759 destroy_l2_mapping(&p[i], level - 1);
760 } else {
761 destroy_page_desc(p[i].ptr);
762 }
763 }
764 lp->is_leaf = 0;
765 lp->ptr = PHYS_MAP_NODE_NIL;
766 }
767
768 static void destroy_all_mappings(AddressSpaceDispatch *d)
769 {
770 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
771 phys_map_nodes_reset();
772 }
773
774 static uint16_t phys_section_add(MemoryRegionSection *section)
775 {
776 /* The physical section number is ORed with a page-aligned
777 * pointer to produce the iotlb entries. Thus it should
778 * never overflow into the page-aligned value.
779 */
780 assert(phys_sections_nb < TARGET_PAGE_SIZE);
781
782 if (phys_sections_nb == phys_sections_nb_alloc) {
783 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
784 phys_sections = g_renew(MemoryRegionSection, phys_sections,
785 phys_sections_nb_alloc);
786 }
787 phys_sections[phys_sections_nb] = *section;
788 return phys_sections_nb++;
789 }
790
791 static void phys_sections_clear(void)
792 {
793 phys_sections_nb = 0;
794 }
795
796 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
797 {
798 subpage_t *subpage;
799 hwaddr base = section->offset_within_address_space
800 & TARGET_PAGE_MASK;
801 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
802 MemoryRegionSection subsection = {
803 .offset_within_address_space = base,
804 .size = int128_make64(TARGET_PAGE_SIZE),
805 };
806 hwaddr start, end;
807
808 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
809
810 if (!(existing->mr->subpage)) {
811 subpage = subpage_init(d->as, base);
812 subsection.mr = &subpage->iomem;
813 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
814 phys_section_add(&subsection));
815 } else {
816 subpage = container_of(existing->mr, subpage_t, iomem);
817 }
818 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
819 end = start + int128_get64(section->size) - 1;
820 subpage_register(subpage, start, end, phys_section_add(section));
821 }
822
823
824 static void register_multipage(AddressSpaceDispatch *d,
825 MemoryRegionSection *section)
826 {
827 hwaddr start_addr = section->offset_within_address_space;
828 uint16_t section_index = phys_section_add(section);
829 uint64_t num_pages = int128_get64(int128_rshift(section->size,
830 TARGET_PAGE_BITS));
831
832 assert(num_pages);
833 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
834 }
835
836 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
837 {
838 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
839 MemoryRegionSection now = *section, remain = *section;
840 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
841
842 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
843 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
844 - now.offset_within_address_space;
845
846 now.size = int128_min(int128_make64(left), now.size);
847 register_subpage(d, &now);
848 } else {
849 now.size = int128_zero();
850 }
851 while (int128_ne(remain.size, now.size)) {
852 remain.size = int128_sub(remain.size, now.size);
853 remain.offset_within_address_space += int128_get64(now.size);
854 remain.offset_within_region += int128_get64(now.size);
855 now = remain;
856 if (int128_lt(remain.size, page_size)) {
857 register_subpage(d, &now);
858 } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
859 now.size = page_size;
860 register_subpage(d, &now);
861 } else {
862 now.size = int128_and(now.size, int128_neg(page_size));
863 register_multipage(d, &now);
864 }
865 }
866 }
867
868 void qemu_flush_coalesced_mmio_buffer(void)
869 {
870 if (kvm_enabled())
871 kvm_flush_coalesced_mmio_buffer();
872 }
873
874 void qemu_mutex_lock_ramlist(void)
875 {
876 qemu_mutex_lock(&ram_list.mutex);
877 }
878
879 void qemu_mutex_unlock_ramlist(void)
880 {
881 qemu_mutex_unlock(&ram_list.mutex);
882 }
883
884 #if defined(__linux__) && !defined(TARGET_S390X)
885
886 #include <sys/vfs.h>
887
888 #define HUGETLBFS_MAGIC 0x958458f6
889
890 static long gethugepagesize(const char *path)
891 {
892 struct statfs fs;
893 int ret;
894
895 do {
896 ret = statfs(path, &fs);
897 } while (ret != 0 && errno == EINTR);
898
899 if (ret != 0) {
900 perror(path);
901 return 0;
902 }
903
904 if (fs.f_type != HUGETLBFS_MAGIC)
905 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
906
907 return fs.f_bsize;
908 }
909
910 static void *file_ram_alloc(RAMBlock *block,
911 ram_addr_t memory,
912 const char *path)
913 {
914 char *filename;
915 char *sanitized_name;
916 char *c;
917 void *area;
918 int fd;
919 #ifdef MAP_POPULATE
920 int flags;
921 #endif
922 unsigned long hpagesize;
923
924 hpagesize = gethugepagesize(path);
925 if (!hpagesize) {
926 return NULL;
927 }
928
929 if (memory < hpagesize) {
930 return NULL;
931 }
932
933 if (kvm_enabled() && !kvm_has_sync_mmu()) {
934 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
935 return NULL;
936 }
937
938 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
939 sanitized_name = g_strdup(block->mr->name);
940 for (c = sanitized_name; *c != '\0'; c++) {
941 if (*c == '/')
942 *c = '_';
943 }
944
945 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
946 sanitized_name);
947 g_free(sanitized_name);
948
949 fd = mkstemp(filename);
950 if (fd < 0) {
951 perror("unable to create backing store for hugepages");
952 g_free(filename);
953 return NULL;
954 }
955 unlink(filename);
956 g_free(filename);
957
958 memory = (memory+hpagesize-1) & ~(hpagesize-1);
959
960 /*
961 * ftruncate is not supported by hugetlbfs in older
962 * hosts, so don't bother bailing out on errors.
963 * If anything goes wrong with it under other filesystems,
964 * mmap will fail.
965 */
966 if (ftruncate(fd, memory))
967 perror("ftruncate");
968
969 #ifdef MAP_POPULATE
970 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
971 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
972 * to sidestep this quirk.
973 */
974 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
975 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
976 #else
977 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
978 #endif
979 if (area == MAP_FAILED) {
980 perror("file_ram_alloc: can't mmap RAM pages");
981 close(fd);
982 return (NULL);
983 }
984 block->fd = fd;
985 return area;
986 }
987 #endif
988
989 static ram_addr_t find_ram_offset(ram_addr_t size)
990 {
991 RAMBlock *block, *next_block;
992 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
993
994 assert(size != 0); /* it would hand out same offset multiple times */
995
996 if (QTAILQ_EMPTY(&ram_list.blocks))
997 return 0;
998
999 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1000 ram_addr_t end, next = RAM_ADDR_MAX;
1001
1002 end = block->offset + block->length;
1003
1004 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1005 if (next_block->offset >= end) {
1006 next = MIN(next, next_block->offset);
1007 }
1008 }
1009 if (next - end >= size && next - end < mingap) {
1010 offset = end;
1011 mingap = next - end;
1012 }
1013 }
1014
1015 if (offset == RAM_ADDR_MAX) {
1016 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1017 (uint64_t)size);
1018 abort();
1019 }
1020
1021 return offset;
1022 }
1023
1024 ram_addr_t last_ram_offset(void)
1025 {
1026 RAMBlock *block;
1027 ram_addr_t last = 0;
1028
1029 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1030 last = MAX(last, block->offset + block->length);
1031
1032 return last;
1033 }
1034
1035 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1036 {
1037 int ret;
1038 QemuOpts *machine_opts;
1039
1040 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1041 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1042 if (machine_opts &&
1043 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1044 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1045 if (ret) {
1046 perror("qemu_madvise");
1047 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1048 "but dump_guest_core=off specified\n");
1049 }
1050 }
1051 }
1052
1053 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1054 {
1055 RAMBlock *new_block, *block;
1056
1057 new_block = NULL;
1058 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1059 if (block->offset == addr) {
1060 new_block = block;
1061 break;
1062 }
1063 }
1064 assert(new_block);
1065 assert(!new_block->idstr[0]);
1066
1067 if (dev) {
1068 char *id = qdev_get_dev_path(dev);
1069 if (id) {
1070 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1071 g_free(id);
1072 }
1073 }
1074 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1075
1076 /* This assumes the iothread lock is taken here too. */
1077 qemu_mutex_lock_ramlist();
1078 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1079 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1080 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1081 new_block->idstr);
1082 abort();
1083 }
1084 }
1085 qemu_mutex_unlock_ramlist();
1086 }
1087
1088 static int memory_try_enable_merging(void *addr, size_t len)
1089 {
1090 QemuOpts *opts;
1091
1092 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1093 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1094 /* disabled by the user */
1095 return 0;
1096 }
1097
1098 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1099 }
1100
1101 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1102 MemoryRegion *mr)
1103 {
1104 RAMBlock *block, *new_block;
1105
1106 size = TARGET_PAGE_ALIGN(size);
1107 new_block = g_malloc0(sizeof(*new_block));
1108
1109 /* This assumes the iothread lock is taken here too. */
1110 qemu_mutex_lock_ramlist();
1111 new_block->mr = mr;
1112 new_block->offset = find_ram_offset(size);
1113 if (host) {
1114 new_block->host = host;
1115 new_block->flags |= RAM_PREALLOC_MASK;
1116 } else {
1117 if (mem_path) {
1118 #if defined (__linux__) && !defined(TARGET_S390X)
1119 new_block->host = file_ram_alloc(new_block, size, mem_path);
1120 if (!new_block->host) {
1121 new_block->host = qemu_anon_ram_alloc(size);
1122 memory_try_enable_merging(new_block->host, size);
1123 }
1124 #else
1125 fprintf(stderr, "-mem-path option unsupported\n");
1126 exit(1);
1127 #endif
1128 } else {
1129 if (xen_enabled()) {
1130 xen_ram_alloc(new_block->offset, size, mr);
1131 } else if (kvm_enabled()) {
1132 /* some s390/kvm configurations have special constraints */
1133 new_block->host = kvm_ram_alloc(size);
1134 } else {
1135 new_block->host = qemu_anon_ram_alloc(size);
1136 }
1137 memory_try_enable_merging(new_block->host, size);
1138 }
1139 }
1140 new_block->length = size;
1141
1142 /* Keep the list sorted from biggest to smallest block. */
1143 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1144 if (block->length < new_block->length) {
1145 break;
1146 }
1147 }
1148 if (block) {
1149 QTAILQ_INSERT_BEFORE(block, new_block, next);
1150 } else {
1151 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1152 }
1153 ram_list.mru_block = NULL;
1154
1155 ram_list.version++;
1156 qemu_mutex_unlock_ramlist();
1157
1158 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1159 last_ram_offset() >> TARGET_PAGE_BITS);
1160 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1161 0, size >> TARGET_PAGE_BITS);
1162 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1163
1164 qemu_ram_setup_dump(new_block->host, size);
1165 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1166
1167 if (kvm_enabled())
1168 kvm_setup_guest_memory(new_block->host, size);
1169
1170 return new_block->offset;
1171 }
1172
1173 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1174 {
1175 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1176 }
1177
1178 void qemu_ram_free_from_ptr(ram_addr_t addr)
1179 {
1180 RAMBlock *block;
1181
1182 /* This assumes the iothread lock is taken here too. */
1183 qemu_mutex_lock_ramlist();
1184 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1185 if (addr == block->offset) {
1186 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1187 ram_list.mru_block = NULL;
1188 ram_list.version++;
1189 g_free(block);
1190 break;
1191 }
1192 }
1193 qemu_mutex_unlock_ramlist();
1194 }
1195
1196 void qemu_ram_free(ram_addr_t addr)
1197 {
1198 RAMBlock *block;
1199
1200 /* This assumes the iothread lock is taken here too. */
1201 qemu_mutex_lock_ramlist();
1202 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1203 if (addr == block->offset) {
1204 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1205 ram_list.mru_block = NULL;
1206 ram_list.version++;
1207 if (block->flags & RAM_PREALLOC_MASK) {
1208 ;
1209 } else if (mem_path) {
1210 #if defined (__linux__) && !defined(TARGET_S390X)
1211 if (block->fd) {
1212 munmap(block->host, block->length);
1213 close(block->fd);
1214 } else {
1215 qemu_anon_ram_free(block->host, block->length);
1216 }
1217 #else
1218 abort();
1219 #endif
1220 } else {
1221 if (xen_enabled()) {
1222 xen_invalidate_map_cache_entry(block->host);
1223 } else {
1224 qemu_anon_ram_free(block->host, block->length);
1225 }
1226 }
1227 g_free(block);
1228 break;
1229 }
1230 }
1231 qemu_mutex_unlock_ramlist();
1232
1233 }
1234
1235 #ifndef _WIN32
1236 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1237 {
1238 RAMBlock *block;
1239 ram_addr_t offset;
1240 int flags;
1241 void *area, *vaddr;
1242
1243 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1244 offset = addr - block->offset;
1245 if (offset < block->length) {
1246 vaddr = block->host + offset;
1247 if (block->flags & RAM_PREALLOC_MASK) {
1248 ;
1249 } else {
1250 flags = MAP_FIXED;
1251 munmap(vaddr, length);
1252 if (mem_path) {
1253 #if defined(__linux__) && !defined(TARGET_S390X)
1254 if (block->fd) {
1255 #ifdef MAP_POPULATE
1256 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1257 MAP_PRIVATE;
1258 #else
1259 flags |= MAP_PRIVATE;
1260 #endif
1261 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1262 flags, block->fd, offset);
1263 } else {
1264 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1265 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1266 flags, -1, 0);
1267 }
1268 #else
1269 abort();
1270 #endif
1271 } else {
1272 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1273 flags |= MAP_SHARED | MAP_ANONYMOUS;
1274 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1275 flags, -1, 0);
1276 #else
1277 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1278 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1279 flags, -1, 0);
1280 #endif
1281 }
1282 if (area != vaddr) {
1283 fprintf(stderr, "Could not remap addr: "
1284 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1285 length, addr);
1286 exit(1);
1287 }
1288 memory_try_enable_merging(vaddr, length);
1289 qemu_ram_setup_dump(vaddr, length);
1290 }
1291 return;
1292 }
1293 }
1294 }
1295 #endif /* !_WIN32 */
1296
1297 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1298 With the exception of the softmmu code in this file, this should
1299 only be used for local memory (e.g. video ram) that the device owns,
1300 and knows it isn't going to access beyond the end of the block.
1301
1302 It should not be used for general purpose DMA.
1303 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1304 */
1305 void *qemu_get_ram_ptr(ram_addr_t addr)
1306 {
1307 RAMBlock *block;
1308
1309 /* The list is protected by the iothread lock here. */
1310 block = ram_list.mru_block;
1311 if (block && addr - block->offset < block->length) {
1312 goto found;
1313 }
1314 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1315 if (addr - block->offset < block->length) {
1316 goto found;
1317 }
1318 }
1319
1320 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1321 abort();
1322
1323 found:
1324 ram_list.mru_block = block;
1325 if (xen_enabled()) {
1326 /* We need to check if the requested address is in the RAM
1327 * because we don't want to map the entire memory in QEMU.
1328 * In that case just map until the end of the page.
1329 */
1330 if (block->offset == 0) {
1331 return xen_map_cache(addr, 0, 0);
1332 } else if (block->host == NULL) {
1333 block->host =
1334 xen_map_cache(block->offset, block->length, 1);
1335 }
1336 }
1337 return block->host + (addr - block->offset);
1338 }
1339
1340 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1341 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1342 *
1343 * ??? Is this still necessary?
1344 */
1345 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1346 {
1347 RAMBlock *block;
1348
1349 /* The list is protected by the iothread lock here. */
1350 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1351 if (addr - block->offset < block->length) {
1352 if (xen_enabled()) {
1353 /* We need to check if the requested address is in the RAM
1354 * because we don't want to map the entire memory in QEMU.
1355 * In that case just map until the end of the page.
1356 */
1357 if (block->offset == 0) {
1358 return xen_map_cache(addr, 0, 0);
1359 } else if (block->host == NULL) {
1360 block->host =
1361 xen_map_cache(block->offset, block->length, 1);
1362 }
1363 }
1364 return block->host + (addr - block->offset);
1365 }
1366 }
1367
1368 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1369 abort();
1370
1371 return NULL;
1372 }
1373
1374 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1375 * but takes a size argument */
1376 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1377 {
1378 if (*size == 0) {
1379 return NULL;
1380 }
1381 if (xen_enabled()) {
1382 return xen_map_cache(addr, *size, 1);
1383 } else {
1384 RAMBlock *block;
1385
1386 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1387 if (addr - block->offset < block->length) {
1388 if (addr - block->offset + *size > block->length)
1389 *size = block->length - addr + block->offset;
1390 return block->host + (addr - block->offset);
1391 }
1392 }
1393
1394 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1395 abort();
1396 }
1397 }
1398
1399 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1400 {
1401 RAMBlock *block;
1402 uint8_t *host = ptr;
1403
1404 if (xen_enabled()) {
1405 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1406 return 0;
1407 }
1408
1409 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1410 /* This case append when the block is not mapped. */
1411 if (block->host == NULL) {
1412 continue;
1413 }
1414 if (host - block->host < block->length) {
1415 *ram_addr = block->offset + (host - block->host);
1416 return 0;
1417 }
1418 }
1419
1420 return -1;
1421 }
1422
1423 /* Some of the softmmu routines need to translate from a host pointer
1424 (typically a TLB entry) back to a ram offset. */
1425 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1426 {
1427 ram_addr_t ram_addr;
1428
1429 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1430 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1431 abort();
1432 }
1433 return ram_addr;
1434 }
1435
1436 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1437 uint64_t val, unsigned size)
1438 {
1439 int dirty_flags;
1440 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1441 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1442 tb_invalidate_phys_page_fast(ram_addr, size);
1443 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1444 }
1445 switch (size) {
1446 case 1:
1447 stb_p(qemu_get_ram_ptr(ram_addr), val);
1448 break;
1449 case 2:
1450 stw_p(qemu_get_ram_ptr(ram_addr), val);
1451 break;
1452 case 4:
1453 stl_p(qemu_get_ram_ptr(ram_addr), val);
1454 break;
1455 default:
1456 abort();
1457 }
1458 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1459 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1460 /* we remove the notdirty callback only if the code has been
1461 flushed */
1462 if (dirty_flags == 0xff)
1463 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1464 }
1465
1466 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1467 unsigned size, bool is_write)
1468 {
1469 return is_write;
1470 }
1471
1472 static const MemoryRegionOps notdirty_mem_ops = {
1473 .write = notdirty_mem_write,
1474 .valid.accepts = notdirty_mem_accepts,
1475 .endianness = DEVICE_NATIVE_ENDIAN,
1476 };
1477
1478 /* Generate a debug exception if a watchpoint has been hit. */
1479 static void check_watchpoint(int offset, int len_mask, int flags)
1480 {
1481 CPUArchState *env = cpu_single_env;
1482 target_ulong pc, cs_base;
1483 target_ulong vaddr;
1484 CPUWatchpoint *wp;
1485 int cpu_flags;
1486
1487 if (env->watchpoint_hit) {
1488 /* We re-entered the check after replacing the TB. Now raise
1489 * the debug interrupt so that is will trigger after the
1490 * current instruction. */
1491 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1492 return;
1493 }
1494 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1495 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1496 if ((vaddr == (wp->vaddr & len_mask) ||
1497 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1498 wp->flags |= BP_WATCHPOINT_HIT;
1499 if (!env->watchpoint_hit) {
1500 env->watchpoint_hit = wp;
1501 tb_check_watchpoint(env);
1502 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1503 env->exception_index = EXCP_DEBUG;
1504 cpu_loop_exit(env);
1505 } else {
1506 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1507 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1508 cpu_resume_from_signal(env, NULL);
1509 }
1510 }
1511 } else {
1512 wp->flags &= ~BP_WATCHPOINT_HIT;
1513 }
1514 }
1515 }
1516
1517 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1518 so these check for a hit then pass through to the normal out-of-line
1519 phys routines. */
1520 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1521 unsigned size)
1522 {
1523 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1524 switch (size) {
1525 case 1: return ldub_phys(addr);
1526 case 2: return lduw_phys(addr);
1527 case 4: return ldl_phys(addr);
1528 default: abort();
1529 }
1530 }
1531
1532 static void watch_mem_write(void *opaque, hwaddr addr,
1533 uint64_t val, unsigned size)
1534 {
1535 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1536 switch (size) {
1537 case 1:
1538 stb_phys(addr, val);
1539 break;
1540 case 2:
1541 stw_phys(addr, val);
1542 break;
1543 case 4:
1544 stl_phys(addr, val);
1545 break;
1546 default: abort();
1547 }
1548 }
1549
1550 static const MemoryRegionOps watch_mem_ops = {
1551 .read = watch_mem_read,
1552 .write = watch_mem_write,
1553 .endianness = DEVICE_NATIVE_ENDIAN,
1554 };
1555
1556 static uint64_t subpage_read(void *opaque, hwaddr addr,
1557 unsigned len)
1558 {
1559 subpage_t *subpage = opaque;
1560 uint8_t buf[4];
1561
1562 #if defined(DEBUG_SUBPAGE)
1563 printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1564 subpage, len, addr);
1565 #endif
1566 address_space_read(subpage->as, addr + subpage->base, buf, len);
1567 switch (len) {
1568 case 1:
1569 return ldub_p(buf);
1570 case 2:
1571 return lduw_p(buf);
1572 case 4:
1573 return ldl_p(buf);
1574 default:
1575 abort();
1576 }
1577 }
1578
1579 static void subpage_write(void *opaque, hwaddr addr,
1580 uint64_t value, unsigned len)
1581 {
1582 subpage_t *subpage = opaque;
1583 uint8_t buf[4];
1584
1585 #if defined(DEBUG_SUBPAGE)
1586 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1587 " value %"PRIx64"\n",
1588 __func__, subpage, len, addr, value);
1589 #endif
1590 switch (len) {
1591 case 1:
1592 stb_p(buf, value);
1593 break;
1594 case 2:
1595 stw_p(buf, value);
1596 break;
1597 case 4:
1598 stl_p(buf, value);
1599 break;
1600 default:
1601 abort();
1602 }
1603 address_space_write(subpage->as, addr + subpage->base, buf, len);
1604 }
1605
1606 static bool subpage_accepts(void *opaque, hwaddr addr,
1607 unsigned size, bool is_write)
1608 {
1609 subpage_t *subpage = opaque;
1610 #if defined(DEBUG_SUBPAGE)
1611 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1612 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1613 #endif
1614
1615 return address_space_access_valid(subpage->as, addr + subpage->base,
1616 size, is_write);
1617 }
1618
1619 static const MemoryRegionOps subpage_ops = {
1620 .read = subpage_read,
1621 .write = subpage_write,
1622 .valid.accepts = subpage_accepts,
1623 .endianness = DEVICE_NATIVE_ENDIAN,
1624 };
1625
1626 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1627 uint16_t section)
1628 {
1629 int idx, eidx;
1630
1631 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1632 return -1;
1633 idx = SUBPAGE_IDX(start);
1634 eidx = SUBPAGE_IDX(end);
1635 #if defined(DEBUG_SUBPAGE)
1636 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1637 mmio, start, end, idx, eidx, memory);
1638 #endif
1639 for (; idx <= eidx; idx++) {
1640 mmio->sub_section[idx] = section;
1641 }
1642
1643 return 0;
1644 }
1645
1646 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1647 {
1648 subpage_t *mmio;
1649
1650 mmio = g_malloc0(sizeof(subpage_t));
1651
1652 mmio->as = as;
1653 mmio->base = base;
1654 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1655 "subpage", TARGET_PAGE_SIZE);
1656 mmio->iomem.subpage = true;
1657 #if defined(DEBUG_SUBPAGE)
1658 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1659 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1660 #endif
1661 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1662
1663 return mmio;
1664 }
1665
1666 static uint16_t dummy_section(MemoryRegion *mr)
1667 {
1668 MemoryRegionSection section = {
1669 .mr = mr,
1670 .offset_within_address_space = 0,
1671 .offset_within_region = 0,
1672 .size = int128_2_64(),
1673 };
1674
1675 return phys_section_add(&section);
1676 }
1677
1678 MemoryRegion *iotlb_to_region(hwaddr index)
1679 {
1680 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1681 }
1682
1683 static void io_mem_init(void)
1684 {
1685 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1686 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1687 "unassigned", UINT64_MAX);
1688 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1689 "notdirty", UINT64_MAX);
1690 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1691 "watch", UINT64_MAX);
1692 }
1693
1694 static void mem_begin(MemoryListener *listener)
1695 {
1696 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1697
1698 destroy_all_mappings(d);
1699 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1700 }
1701
1702 static void core_begin(MemoryListener *listener)
1703 {
1704 phys_sections_clear();
1705 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1706 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1707 phys_section_rom = dummy_section(&io_mem_rom);
1708 phys_section_watch = dummy_section(&io_mem_watch);
1709 }
1710
1711 static void tcg_commit(MemoryListener *listener)
1712 {
1713 CPUArchState *env;
1714
1715 /* since each CPU stores ram addresses in its TLB cache, we must
1716 reset the modified entries */
1717 /* XXX: slow ! */
1718 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1719 tlb_flush(env, 1);
1720 }
1721 }
1722
1723 static void core_log_global_start(MemoryListener *listener)
1724 {
1725 cpu_physical_memory_set_dirty_tracking(1);
1726 }
1727
1728 static void core_log_global_stop(MemoryListener *listener)
1729 {
1730 cpu_physical_memory_set_dirty_tracking(0);
1731 }
1732
1733 static void io_region_add(MemoryListener *listener,
1734 MemoryRegionSection *section)
1735 {
1736 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1737
1738 mrio->mr = section->mr;
1739 mrio->offset = section->offset_within_region;
1740 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1741 section->offset_within_address_space,
1742 int128_get64(section->size));
1743 ioport_register(&mrio->iorange);
1744 }
1745
1746 static void io_region_del(MemoryListener *listener,
1747 MemoryRegionSection *section)
1748 {
1749 isa_unassign_ioport(section->offset_within_address_space,
1750 int128_get64(section->size));
1751 }
1752
1753 static MemoryListener core_memory_listener = {
1754 .begin = core_begin,
1755 .log_global_start = core_log_global_start,
1756 .log_global_stop = core_log_global_stop,
1757 .priority = 1,
1758 };
1759
1760 static MemoryListener io_memory_listener = {
1761 .region_add = io_region_add,
1762 .region_del = io_region_del,
1763 .priority = 0,
1764 };
1765
1766 static MemoryListener tcg_memory_listener = {
1767 .commit = tcg_commit,
1768 };
1769
1770 void address_space_init_dispatch(AddressSpace *as)
1771 {
1772 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1773
1774 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1775 d->listener = (MemoryListener) {
1776 .begin = mem_begin,
1777 .region_add = mem_add,
1778 .region_nop = mem_add,
1779 .priority = 0,
1780 };
1781 d->as = as;
1782 as->dispatch = d;
1783 memory_listener_register(&d->listener, as);
1784 }
1785
1786 void address_space_destroy_dispatch(AddressSpace *as)
1787 {
1788 AddressSpaceDispatch *d = as->dispatch;
1789
1790 memory_listener_unregister(&d->listener);
1791 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1792 g_free(d);
1793 as->dispatch = NULL;
1794 }
1795
1796 static void memory_map_init(void)
1797 {
1798 system_memory = g_malloc(sizeof(*system_memory));
1799 memory_region_init(system_memory, "system", INT64_MAX);
1800 address_space_init(&address_space_memory, system_memory);
1801 address_space_memory.name = "memory";
1802
1803 system_io = g_malloc(sizeof(*system_io));
1804 memory_region_init(system_io, "io", 65536);
1805 address_space_init(&address_space_io, system_io);
1806 address_space_io.name = "I/O";
1807
1808 memory_listener_register(&core_memory_listener, &address_space_memory);
1809 memory_listener_register(&io_memory_listener, &address_space_io);
1810 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1811
1812 dma_context_init(&dma_context_memory, &address_space_memory,
1813 NULL, NULL, NULL);
1814 }
1815
1816 MemoryRegion *get_system_memory(void)
1817 {
1818 return system_memory;
1819 }
1820
1821 MemoryRegion *get_system_io(void)
1822 {
1823 return system_io;
1824 }
1825
1826 #endif /* !defined(CONFIG_USER_ONLY) */
1827
1828 /* physical memory access (slow version, mainly for debug) */
1829 #if defined(CONFIG_USER_ONLY)
1830 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1831 uint8_t *buf, int len, int is_write)
1832 {
1833 int l, flags;
1834 target_ulong page;
1835 void * p;
1836
1837 while (len > 0) {
1838 page = addr & TARGET_PAGE_MASK;
1839 l = (page + TARGET_PAGE_SIZE) - addr;
1840 if (l > len)
1841 l = len;
1842 flags = page_get_flags(page);
1843 if (!(flags & PAGE_VALID))
1844 return -1;
1845 if (is_write) {
1846 if (!(flags & PAGE_WRITE))
1847 return -1;
1848 /* XXX: this code should not depend on lock_user */
1849 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1850 return -1;
1851 memcpy(p, buf, l);
1852 unlock_user(p, addr, l);
1853 } else {
1854 if (!(flags & PAGE_READ))
1855 return -1;
1856 /* XXX: this code should not depend on lock_user */
1857 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1858 return -1;
1859 memcpy(buf, p, l);
1860 unlock_user(p, addr, 0);
1861 }
1862 len -= l;
1863 buf += l;
1864 addr += l;
1865 }
1866 return 0;
1867 }
1868
1869 #else
1870
1871 static void invalidate_and_set_dirty(hwaddr addr,
1872 hwaddr length)
1873 {
1874 if (!cpu_physical_memory_is_dirty(addr)) {
1875 /* invalidate code */
1876 tb_invalidate_phys_page_range(addr, addr + length, 0);
1877 /* set dirty bit */
1878 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1879 }
1880 xen_modified_memory(addr, length);
1881 }
1882
1883 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1884 {
1885 if (memory_region_is_ram(mr)) {
1886 return !(is_write && mr->readonly);
1887 }
1888 if (memory_region_is_romd(mr)) {
1889 return !is_write;
1890 }
1891
1892 return false;
1893 }
1894
1895 static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1896 {
1897 if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1898 return 4;
1899 }
1900 if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1901 return 2;
1902 }
1903 return 1;
1904 }
1905
1906 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1907 int len, bool is_write)
1908 {
1909 hwaddr l;
1910 uint8_t *ptr;
1911 uint64_t val;
1912 hwaddr addr1;
1913 MemoryRegion *mr;
1914 bool error = false;
1915
1916 while (len > 0) {
1917 l = len;
1918 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1919
1920 if (is_write) {
1921 if (!memory_access_is_direct(mr, is_write)) {
1922 l = memory_access_size(mr, l, addr1);
1923 /* XXX: could force cpu_single_env to NULL to avoid
1924 potential bugs */
1925 if (l == 4) {
1926 /* 32 bit write access */
1927 val = ldl_p(buf);
1928 error |= io_mem_write(mr, addr1, val, 4);
1929 } else if (l == 2) {
1930 /* 16 bit write access */
1931 val = lduw_p(buf);
1932 error |= io_mem_write(mr, addr1, val, 2);
1933 } else {
1934 /* 8 bit write access */
1935 val = ldub_p(buf);
1936 error |= io_mem_write(mr, addr1, val, 1);
1937 }
1938 } else {
1939 addr1 += memory_region_get_ram_addr(mr);
1940 /* RAM case */
1941 ptr = qemu_get_ram_ptr(addr1);
1942 memcpy(ptr, buf, l);
1943 invalidate_and_set_dirty(addr1, l);
1944 }
1945 } else {
1946 if (!memory_access_is_direct(mr, is_write)) {
1947 /* I/O case */
1948 l = memory_access_size(mr, l, addr1);
1949 if (l == 4) {
1950 /* 32 bit read access */
1951 error |= io_mem_read(mr, addr1, &val, 4);
1952 stl_p(buf, val);
1953 } else if (l == 2) {
1954 /* 16 bit read access */
1955 error |= io_mem_read(mr, addr1, &val, 2);
1956 stw_p(buf, val);
1957 } else {
1958 /* 8 bit read access */
1959 error |= io_mem_read(mr, addr1, &val, 1);
1960 stb_p(buf, val);
1961 }
1962 } else {
1963 /* RAM case */
1964 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
1965 memcpy(buf, ptr, l);
1966 }
1967 }
1968 len -= l;
1969 buf += l;
1970 addr += l;
1971 }
1972
1973 return error;
1974 }
1975
1976 bool address_space_write(AddressSpace *as, hwaddr addr,
1977 const uint8_t *buf, int len)
1978 {
1979 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1980 }
1981
1982 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1983 {
1984 return address_space_rw(as, addr, buf, len, false);
1985 }
1986
1987
1988 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1989 int len, int is_write)
1990 {
1991 address_space_rw(&address_space_memory, addr, buf, len, is_write);
1992 }
1993
1994 /* used for ROM loading : can write in RAM and ROM */
1995 void cpu_physical_memory_write_rom(hwaddr addr,
1996 const uint8_t *buf, int len)
1997 {
1998 hwaddr l;
1999 uint8_t *ptr;
2000 hwaddr addr1;
2001 MemoryRegion *mr;
2002
2003 while (len > 0) {
2004 l = len;
2005 mr = address_space_translate(&address_space_memory,
2006 addr, &addr1, &l, true);
2007
2008 if (!(memory_region_is_ram(mr) ||
2009 memory_region_is_romd(mr))) {
2010 /* do nothing */
2011 } else {
2012 addr1 += memory_region_get_ram_addr(mr);
2013 /* ROM/RAM case */
2014 ptr = qemu_get_ram_ptr(addr1);
2015 memcpy(ptr, buf, l);
2016 invalidate_and_set_dirty(addr1, l);
2017 }
2018 len -= l;
2019 buf += l;
2020 addr += l;
2021 }
2022 }
2023
2024 typedef struct {
2025 void *buffer;
2026 hwaddr addr;
2027 hwaddr len;
2028 } BounceBuffer;
2029
2030 static BounceBuffer bounce;
2031
2032 typedef struct MapClient {
2033 void *opaque;
2034 void (*callback)(void *opaque);
2035 QLIST_ENTRY(MapClient) link;
2036 } MapClient;
2037
2038 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2039 = QLIST_HEAD_INITIALIZER(map_client_list);
2040
2041 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2042 {
2043 MapClient *client = g_malloc(sizeof(*client));
2044
2045 client->opaque = opaque;
2046 client->callback = callback;
2047 QLIST_INSERT_HEAD(&map_client_list, client, link);
2048 return client;
2049 }
2050
2051 static void cpu_unregister_map_client(void *_client)
2052 {
2053 MapClient *client = (MapClient *)_client;
2054
2055 QLIST_REMOVE(client, link);
2056 g_free(client);
2057 }
2058
2059 static void cpu_notify_map_clients(void)
2060 {
2061 MapClient *client;
2062
2063 while (!QLIST_EMPTY(&map_client_list)) {
2064 client = QLIST_FIRST(&map_client_list);
2065 client->callback(client->opaque);
2066 cpu_unregister_map_client(client);
2067 }
2068 }
2069
2070 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2071 {
2072 MemoryRegion *mr;
2073 hwaddr l, xlat;
2074
2075 while (len > 0) {
2076 l = len;
2077 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2078 if (!memory_access_is_direct(mr, is_write)) {
2079 l = memory_access_size(mr, l, addr);
2080 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2081 return false;
2082 }
2083 }
2084
2085 len -= l;
2086 addr += l;
2087 }
2088 return true;
2089 }
2090
2091 /* Map a physical memory region into a host virtual address.
2092 * May map a subset of the requested range, given by and returned in *plen.
2093 * May return NULL if resources needed to perform the mapping are exhausted.
2094 * Use only for reads OR writes - not for read-modify-write operations.
2095 * Use cpu_register_map_client() to know when retrying the map operation is
2096 * likely to succeed.
2097 */
2098 void *address_space_map(AddressSpace *as,
2099 hwaddr addr,
2100 hwaddr *plen,
2101 bool is_write)
2102 {
2103 hwaddr len = *plen;
2104 hwaddr todo = 0;
2105 hwaddr l, xlat;
2106 MemoryRegion *mr;
2107 ram_addr_t raddr = RAM_ADDR_MAX;
2108 ram_addr_t rlen;
2109 void *ret;
2110
2111 while (len > 0) {
2112 l = len;
2113 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2114
2115 if (!memory_access_is_direct(mr, is_write)) {
2116 if (todo || bounce.buffer) {
2117 break;
2118 }
2119 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2120 bounce.addr = addr;
2121 bounce.len = l;
2122 if (!is_write) {
2123 address_space_read(as, addr, bounce.buffer, l);
2124 }
2125
2126 *plen = l;
2127 return bounce.buffer;
2128 }
2129 if (!todo) {
2130 raddr = memory_region_get_ram_addr(mr) + xlat;
2131 } else {
2132 if (memory_region_get_ram_addr(mr) + xlat != raddr + todo) {
2133 break;
2134 }
2135 }
2136
2137 len -= l;
2138 addr += l;
2139 todo += l;
2140 }
2141 rlen = todo;
2142 ret = qemu_ram_ptr_length(raddr, &rlen);
2143 *plen = rlen;
2144 return ret;
2145 }
2146
2147 /* Unmaps a memory region previously mapped by address_space_map().
2148 * Will also mark the memory as dirty if is_write == 1. access_len gives
2149 * the amount of memory that was actually read or written by the caller.
2150 */
2151 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2152 int is_write, hwaddr access_len)
2153 {
2154 if (buffer != bounce.buffer) {
2155 if (is_write) {
2156 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2157 while (access_len) {
2158 unsigned l;
2159 l = TARGET_PAGE_SIZE;
2160 if (l > access_len)
2161 l = access_len;
2162 invalidate_and_set_dirty(addr1, l);
2163 addr1 += l;
2164 access_len -= l;
2165 }
2166 }
2167 if (xen_enabled()) {
2168 xen_invalidate_map_cache_entry(buffer);
2169 }
2170 return;
2171 }
2172 if (is_write) {
2173 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2174 }
2175 qemu_vfree(bounce.buffer);
2176 bounce.buffer = NULL;
2177 cpu_notify_map_clients();
2178 }
2179
2180 void *cpu_physical_memory_map(hwaddr addr,
2181 hwaddr *plen,
2182 int is_write)
2183 {
2184 return address_space_map(&address_space_memory, addr, plen, is_write);
2185 }
2186
2187 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2188 int is_write, hwaddr access_len)
2189 {
2190 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2191 }
2192
2193 /* warning: addr must be aligned */
2194 static inline uint32_t ldl_phys_internal(hwaddr addr,
2195 enum device_endian endian)
2196 {
2197 uint8_t *ptr;
2198 uint64_t val;
2199 MemoryRegion *mr;
2200 hwaddr l = 4;
2201 hwaddr addr1;
2202
2203 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2204 false);
2205 if (l < 4 || !memory_access_is_direct(mr, false)) {
2206 /* I/O case */
2207 io_mem_read(mr, addr1, &val, 4);
2208 #if defined(TARGET_WORDS_BIGENDIAN)
2209 if (endian == DEVICE_LITTLE_ENDIAN) {
2210 val = bswap32(val);
2211 }
2212 #else
2213 if (endian == DEVICE_BIG_ENDIAN) {
2214 val = bswap32(val);
2215 }
2216 #endif
2217 } else {
2218 /* RAM case */
2219 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2220 & TARGET_PAGE_MASK)
2221 + addr1);
2222 switch (endian) {
2223 case DEVICE_LITTLE_ENDIAN:
2224 val = ldl_le_p(ptr);
2225 break;
2226 case DEVICE_BIG_ENDIAN:
2227 val = ldl_be_p(ptr);
2228 break;
2229 default:
2230 val = ldl_p(ptr);
2231 break;
2232 }
2233 }
2234 return val;
2235 }
2236
2237 uint32_t ldl_phys(hwaddr addr)
2238 {
2239 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2240 }
2241
2242 uint32_t ldl_le_phys(hwaddr addr)
2243 {
2244 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2245 }
2246
2247 uint32_t ldl_be_phys(hwaddr addr)
2248 {
2249 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2250 }
2251
2252 /* warning: addr must be aligned */
2253 static inline uint64_t ldq_phys_internal(hwaddr addr,
2254 enum device_endian endian)
2255 {
2256 uint8_t *ptr;
2257 uint64_t val;
2258 MemoryRegion *mr;
2259 hwaddr l = 8;
2260 hwaddr addr1;
2261
2262 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2263 false);
2264 if (l < 8 || !memory_access_is_direct(mr, false)) {
2265 /* I/O case */
2266 io_mem_read(mr, addr1, &val, 8);
2267 #if defined(TARGET_WORDS_BIGENDIAN)
2268 if (endian == DEVICE_LITTLE_ENDIAN) {
2269 val = bswap64(val);
2270 }
2271 #else
2272 if (endian == DEVICE_BIG_ENDIAN) {
2273 val = bswap64(val);
2274 }
2275 #endif
2276 } else {
2277 /* RAM case */
2278 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2279 & TARGET_PAGE_MASK)
2280 + addr1);
2281 switch (endian) {
2282 case DEVICE_LITTLE_ENDIAN:
2283 val = ldq_le_p(ptr);
2284 break;
2285 case DEVICE_BIG_ENDIAN:
2286 val = ldq_be_p(ptr);
2287 break;
2288 default:
2289 val = ldq_p(ptr);
2290 break;
2291 }
2292 }
2293 return val;
2294 }
2295
2296 uint64_t ldq_phys(hwaddr addr)
2297 {
2298 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2299 }
2300
2301 uint64_t ldq_le_phys(hwaddr addr)
2302 {
2303 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2304 }
2305
2306 uint64_t ldq_be_phys(hwaddr addr)
2307 {
2308 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2309 }
2310
2311 /* XXX: optimize */
2312 uint32_t ldub_phys(hwaddr addr)
2313 {
2314 uint8_t val;
2315 cpu_physical_memory_read(addr, &val, 1);
2316 return val;
2317 }
2318
2319 /* warning: addr must be aligned */
2320 static inline uint32_t lduw_phys_internal(hwaddr addr,
2321 enum device_endian endian)
2322 {
2323 uint8_t *ptr;
2324 uint64_t val;
2325 MemoryRegion *mr;
2326 hwaddr l = 2;
2327 hwaddr addr1;
2328
2329 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2330 false);
2331 if (l < 2 || !memory_access_is_direct(mr, false)) {
2332 /* I/O case */
2333 io_mem_read(mr, addr1, &val, 2);
2334 #if defined(TARGET_WORDS_BIGENDIAN)
2335 if (endian == DEVICE_LITTLE_ENDIAN) {
2336 val = bswap16(val);
2337 }
2338 #else
2339 if (endian == DEVICE_BIG_ENDIAN) {
2340 val = bswap16(val);
2341 }
2342 #endif
2343 } else {
2344 /* RAM case */
2345 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2346 & TARGET_PAGE_MASK)
2347 + addr1);
2348 switch (endian) {
2349 case DEVICE_LITTLE_ENDIAN:
2350 val = lduw_le_p(ptr);
2351 break;
2352 case DEVICE_BIG_ENDIAN:
2353 val = lduw_be_p(ptr);
2354 break;
2355 default:
2356 val = lduw_p(ptr);
2357 break;
2358 }
2359 }
2360 return val;
2361 }
2362
2363 uint32_t lduw_phys(hwaddr addr)
2364 {
2365 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2366 }
2367
2368 uint32_t lduw_le_phys(hwaddr addr)
2369 {
2370 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2371 }
2372
2373 uint32_t lduw_be_phys(hwaddr addr)
2374 {
2375 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2376 }
2377
2378 /* warning: addr must be aligned. The ram page is not masked as dirty
2379 and the code inside is not invalidated. It is useful if the dirty
2380 bits are used to track modified PTEs */
2381 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2382 {
2383 uint8_t *ptr;
2384 MemoryRegion *mr;
2385 hwaddr l = 4;
2386 hwaddr addr1;
2387
2388 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2389 true);
2390 if (l < 4 || !memory_access_is_direct(mr, true)) {
2391 io_mem_write(mr, addr1, val, 4);
2392 } else {
2393 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2394 ptr = qemu_get_ram_ptr(addr1);
2395 stl_p(ptr, val);
2396
2397 if (unlikely(in_migration)) {
2398 if (!cpu_physical_memory_is_dirty(addr1)) {
2399 /* invalidate code */
2400 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2401 /* set dirty bit */
2402 cpu_physical_memory_set_dirty_flags(
2403 addr1, (0xff & ~CODE_DIRTY_FLAG));
2404 }
2405 }
2406 }
2407 }
2408
2409 /* warning: addr must be aligned */
2410 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2411 enum device_endian endian)
2412 {
2413 uint8_t *ptr;
2414 MemoryRegion *mr;
2415 hwaddr l = 4;
2416 hwaddr addr1;
2417
2418 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2419 true);
2420 if (l < 4 || !memory_access_is_direct(mr, true)) {
2421 #if defined(TARGET_WORDS_BIGENDIAN)
2422 if (endian == DEVICE_LITTLE_ENDIAN) {
2423 val = bswap32(val);
2424 }
2425 #else
2426 if (endian == DEVICE_BIG_ENDIAN) {
2427 val = bswap32(val);
2428 }
2429 #endif
2430 io_mem_write(mr, addr1, val, 4);
2431 } else {
2432 /* RAM case */
2433 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2434 ptr = qemu_get_ram_ptr(addr1);
2435 switch (endian) {
2436 case DEVICE_LITTLE_ENDIAN:
2437 stl_le_p(ptr, val);
2438 break;
2439 case DEVICE_BIG_ENDIAN:
2440 stl_be_p(ptr, val);
2441 break;
2442 default:
2443 stl_p(ptr, val);
2444 break;
2445 }
2446 invalidate_and_set_dirty(addr1, 4);
2447 }
2448 }
2449
2450 void stl_phys(hwaddr addr, uint32_t val)
2451 {
2452 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2453 }
2454
2455 void stl_le_phys(hwaddr addr, uint32_t val)
2456 {
2457 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2458 }
2459
2460 void stl_be_phys(hwaddr addr, uint32_t val)
2461 {
2462 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2463 }
2464
2465 /* XXX: optimize */
2466 void stb_phys(hwaddr addr, uint32_t val)
2467 {
2468 uint8_t v = val;
2469 cpu_physical_memory_write(addr, &v, 1);
2470 }
2471
2472 /* warning: addr must be aligned */
2473 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2474 enum device_endian endian)
2475 {
2476 uint8_t *ptr;
2477 MemoryRegion *mr;
2478 hwaddr l = 2;
2479 hwaddr addr1;
2480
2481 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2482 true);
2483 if (l < 2 || !memory_access_is_direct(mr, true)) {
2484 #if defined(TARGET_WORDS_BIGENDIAN)
2485 if (endian == DEVICE_LITTLE_ENDIAN) {
2486 val = bswap16(val);
2487 }
2488 #else
2489 if (endian == DEVICE_BIG_ENDIAN) {
2490 val = bswap16(val);
2491 }
2492 #endif
2493 io_mem_write(mr, addr1, val, 2);
2494 } else {
2495 /* RAM case */
2496 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2497 ptr = qemu_get_ram_ptr(addr1);
2498 switch (endian) {
2499 case DEVICE_LITTLE_ENDIAN:
2500 stw_le_p(ptr, val);
2501 break;
2502 case DEVICE_BIG_ENDIAN:
2503 stw_be_p(ptr, val);
2504 break;
2505 default:
2506 stw_p(ptr, val);
2507 break;
2508 }
2509 invalidate_and_set_dirty(addr1, 2);
2510 }
2511 }
2512
2513 void stw_phys(hwaddr addr, uint32_t val)
2514 {
2515 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2516 }
2517
2518 void stw_le_phys(hwaddr addr, uint32_t val)
2519 {
2520 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2521 }
2522
2523 void stw_be_phys(hwaddr addr, uint32_t val)
2524 {
2525 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2526 }
2527
2528 /* XXX: optimize */
2529 void stq_phys(hwaddr addr, uint64_t val)
2530 {
2531 val = tswap64(val);
2532 cpu_physical_memory_write(addr, &val, 8);
2533 }
2534
2535 void stq_le_phys(hwaddr addr, uint64_t val)
2536 {
2537 val = cpu_to_le64(val);
2538 cpu_physical_memory_write(addr, &val, 8);
2539 }
2540
2541 void stq_be_phys(hwaddr addr, uint64_t val)
2542 {
2543 val = cpu_to_be64(val);
2544 cpu_physical_memory_write(addr, &val, 8);
2545 }
2546
2547 /* virtual memory access for debug (includes writing to ROM) */
2548 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2549 uint8_t *buf, int len, int is_write)
2550 {
2551 int l;
2552 hwaddr phys_addr;
2553 target_ulong page;
2554
2555 while (len > 0) {
2556 page = addr & TARGET_PAGE_MASK;
2557 phys_addr = cpu_get_phys_page_debug(env, page);
2558 /* if no physical page mapped, return an error */
2559 if (phys_addr == -1)
2560 return -1;
2561 l = (page + TARGET_PAGE_SIZE) - addr;
2562 if (l > len)
2563 l = len;
2564 phys_addr += (addr & ~TARGET_PAGE_MASK);
2565 if (is_write)
2566 cpu_physical_memory_write_rom(phys_addr, buf, l);
2567 else
2568 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2569 len -= l;
2570 buf += l;
2571 addr += l;
2572 }
2573 return 0;
2574 }
2575 #endif
2576
2577 #if !defined(CONFIG_USER_ONLY)
2578
2579 /*
2580 * A helper function for the _utterly broken_ virtio device model to find out if
2581 * it's running on a big endian machine. Don't do this at home kids!
2582 */
2583 bool virtio_is_big_endian(void);
2584 bool virtio_is_big_endian(void)
2585 {
2586 #if defined(TARGET_WORDS_BIGENDIAN)
2587 return true;
2588 #else
2589 return false;
2590 #endif
2591 }
2592
2593 #endif
2594
2595 #ifndef CONFIG_USER_ONLY
2596 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2597 {
2598 MemoryRegion*mr;
2599 hwaddr l = 1;
2600
2601 mr = address_space_translate(&address_space_memory,
2602 phys_addr, &phys_addr, &l, false);
2603
2604 return !(memory_region_is_ram(mr) ||
2605 memory_region_is_romd(mr));
2606 }
2607 #endif