]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec: Allow unaligned address_space_rw
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_SUBPAGE
54
55 #if !defined(CONFIG_USER_ONLY)
56 int phys_ram_fd;
57 static int in_migration;
58
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
63
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
67
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
70
71 #endif
72
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
75 cpu_exec() */
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
80 int use_icount;
81
82 #if !defined(CONFIG_USER_ONLY)
83
84 typedef struct PhysPageEntry PhysPageEntry;
85
86 struct PhysPageEntry {
87 uint16_t is_leaf : 1;
88 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
89 uint16_t ptr : 15;
90 };
91
92 struct AddressSpaceDispatch {
93 /* This is a multi-level map on the physical address space.
94 * The bottom level has pointers to MemoryRegionSections.
95 */
96 PhysPageEntry phys_map;
97 MemoryListener listener;
98 };
99
100 static MemoryRegionSection *phys_sections;
101 static unsigned phys_sections_nb, phys_sections_nb_alloc;
102 static uint16_t phys_section_unassigned;
103 static uint16_t phys_section_notdirty;
104 static uint16_t phys_section_rom;
105 static uint16_t phys_section_watch;
106
107 /* Simple allocator for PhysPageEntry nodes */
108 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
109 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
110
111 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
112
113 static void io_mem_init(void);
114 static void memory_map_init(void);
115 static void *qemu_safe_ram_ptr(ram_addr_t addr);
116
117 static MemoryRegion io_mem_watch;
118 #endif
119
120 #if !defined(CONFIG_USER_ONLY)
121
122 static void phys_map_node_reserve(unsigned nodes)
123 {
124 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
125 typedef PhysPageEntry Node[L2_SIZE];
126 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
127 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
128 phys_map_nodes_nb + nodes);
129 phys_map_nodes = g_renew(Node, phys_map_nodes,
130 phys_map_nodes_nb_alloc);
131 }
132 }
133
134 static uint16_t phys_map_node_alloc(void)
135 {
136 unsigned i;
137 uint16_t ret;
138
139 ret = phys_map_nodes_nb++;
140 assert(ret != PHYS_MAP_NODE_NIL);
141 assert(ret != phys_map_nodes_nb_alloc);
142 for (i = 0; i < L2_SIZE; ++i) {
143 phys_map_nodes[ret][i].is_leaf = 0;
144 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
145 }
146 return ret;
147 }
148
149 static void phys_map_nodes_reset(void)
150 {
151 phys_map_nodes_nb = 0;
152 }
153
154
155 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
156 hwaddr *nb, uint16_t leaf,
157 int level)
158 {
159 PhysPageEntry *p;
160 int i;
161 hwaddr step = (hwaddr)1 << (level * L2_BITS);
162
163 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
164 lp->ptr = phys_map_node_alloc();
165 p = phys_map_nodes[lp->ptr];
166 if (level == 0) {
167 for (i = 0; i < L2_SIZE; i++) {
168 p[i].is_leaf = 1;
169 p[i].ptr = phys_section_unassigned;
170 }
171 }
172 } else {
173 p = phys_map_nodes[lp->ptr];
174 }
175 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
176
177 while (*nb && lp < &p[L2_SIZE]) {
178 if ((*index & (step - 1)) == 0 && *nb >= step) {
179 lp->is_leaf = true;
180 lp->ptr = leaf;
181 *index += step;
182 *nb -= step;
183 } else {
184 phys_page_set_level(lp, index, nb, leaf, level - 1);
185 }
186 ++lp;
187 }
188 }
189
190 static void phys_page_set(AddressSpaceDispatch *d,
191 hwaddr index, hwaddr nb,
192 uint16_t leaf)
193 {
194 /* Wildly overreserve - it doesn't matter much. */
195 phys_map_node_reserve(3 * P_L2_LEVELS);
196
197 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
198 }
199
200 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
201 {
202 PhysPageEntry lp = d->phys_map;
203 PhysPageEntry *p;
204 int i;
205
206 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
207 if (lp.ptr == PHYS_MAP_NODE_NIL) {
208 return &phys_sections[phys_section_unassigned];
209 }
210 p = phys_map_nodes[lp.ptr];
211 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
212 }
213 return &phys_sections[lp.ptr];
214 }
215
216 bool memory_region_is_unassigned(MemoryRegion *mr)
217 {
218 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
219 && mr != &io_mem_watch;
220 }
221
222 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
223 hwaddr addr)
224 {
225 return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
226 }
227
228 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
229 hwaddr *xlat, hwaddr *plen,
230 bool is_write)
231 {
232 MemoryRegionSection *section;
233 Int128 diff;
234
235 section = address_space_lookup_region(as, addr);
236 /* Compute offset within MemoryRegionSection */
237 addr -= section->offset_within_address_space;
238
239 /* Compute offset within MemoryRegion */
240 *xlat = addr + section->offset_within_region;
241
242 diff = int128_sub(section->mr->size, int128_make64(addr));
243 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
244 return section;
245 }
246 #endif
247
248 void cpu_exec_init_all(void)
249 {
250 #if !defined(CONFIG_USER_ONLY)
251 qemu_mutex_init(&ram_list.mutex);
252 memory_map_init();
253 io_mem_init();
254 #endif
255 }
256
257 #if !defined(CONFIG_USER_ONLY)
258
259 static int cpu_common_post_load(void *opaque, int version_id)
260 {
261 CPUState *cpu = opaque;
262
263 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
264 version_id is increased. */
265 cpu->interrupt_request &= ~0x01;
266 tlb_flush(cpu->env_ptr, 1);
267
268 return 0;
269 }
270
271 static const VMStateDescription vmstate_cpu_common = {
272 .name = "cpu_common",
273 .version_id = 1,
274 .minimum_version_id = 1,
275 .minimum_version_id_old = 1,
276 .post_load = cpu_common_post_load,
277 .fields = (VMStateField []) {
278 VMSTATE_UINT32(halted, CPUState),
279 VMSTATE_UINT32(interrupt_request, CPUState),
280 VMSTATE_END_OF_LIST()
281 }
282 };
283 #else
284 #define vmstate_cpu_common vmstate_dummy
285 #endif
286
287 CPUState *qemu_get_cpu(int index)
288 {
289 CPUArchState *env = first_cpu;
290 CPUState *cpu = NULL;
291
292 while (env) {
293 cpu = ENV_GET_CPU(env);
294 if (cpu->cpu_index == index) {
295 break;
296 }
297 env = env->next_cpu;
298 }
299
300 return env ? cpu : NULL;
301 }
302
303 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
304 {
305 CPUArchState *env = first_cpu;
306
307 while (env) {
308 func(ENV_GET_CPU(env), data);
309 env = env->next_cpu;
310 }
311 }
312
313 void cpu_exec_init(CPUArchState *env)
314 {
315 CPUState *cpu = ENV_GET_CPU(env);
316 CPUClass *cc = CPU_GET_CLASS(cpu);
317 CPUArchState **penv;
318 int cpu_index;
319
320 #if defined(CONFIG_USER_ONLY)
321 cpu_list_lock();
322 #endif
323 env->next_cpu = NULL;
324 penv = &first_cpu;
325 cpu_index = 0;
326 while (*penv != NULL) {
327 penv = &(*penv)->next_cpu;
328 cpu_index++;
329 }
330 cpu->cpu_index = cpu_index;
331 cpu->numa_node = 0;
332 QTAILQ_INIT(&env->breakpoints);
333 QTAILQ_INIT(&env->watchpoints);
334 #ifndef CONFIG_USER_ONLY
335 cpu->thread_id = qemu_get_thread_id();
336 #endif
337 *penv = env;
338 #if defined(CONFIG_USER_ONLY)
339 cpu_list_unlock();
340 #endif
341 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
342 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
343 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
344 cpu_save, cpu_load, env);
345 assert(cc->vmsd == NULL);
346 #endif
347 if (cc->vmsd != NULL) {
348 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
349 }
350 }
351
352 #if defined(TARGET_HAS_ICE)
353 #if defined(CONFIG_USER_ONLY)
354 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
355 {
356 tb_invalidate_phys_page_range(pc, pc + 1, 0);
357 }
358 #else
359 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
360 {
361 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
362 (pc & ~TARGET_PAGE_MASK));
363 }
364 #endif
365 #endif /* TARGET_HAS_ICE */
366
367 #if defined(CONFIG_USER_ONLY)
368 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
369
370 {
371 }
372
373 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
374 int flags, CPUWatchpoint **watchpoint)
375 {
376 return -ENOSYS;
377 }
378 #else
379 /* Add a watchpoint. */
380 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
381 int flags, CPUWatchpoint **watchpoint)
382 {
383 target_ulong len_mask = ~(len - 1);
384 CPUWatchpoint *wp;
385
386 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
387 if ((len & (len - 1)) || (addr & ~len_mask) ||
388 len == 0 || len > TARGET_PAGE_SIZE) {
389 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
390 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
391 return -EINVAL;
392 }
393 wp = g_malloc(sizeof(*wp));
394
395 wp->vaddr = addr;
396 wp->len_mask = len_mask;
397 wp->flags = flags;
398
399 /* keep all GDB-injected watchpoints in front */
400 if (flags & BP_GDB)
401 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
402 else
403 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
404
405 tlb_flush_page(env, addr);
406
407 if (watchpoint)
408 *watchpoint = wp;
409 return 0;
410 }
411
412 /* Remove a specific watchpoint. */
413 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
414 int flags)
415 {
416 target_ulong len_mask = ~(len - 1);
417 CPUWatchpoint *wp;
418
419 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
420 if (addr == wp->vaddr && len_mask == wp->len_mask
421 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
422 cpu_watchpoint_remove_by_ref(env, wp);
423 return 0;
424 }
425 }
426 return -ENOENT;
427 }
428
429 /* Remove a specific watchpoint by reference. */
430 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
431 {
432 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
433
434 tlb_flush_page(env, watchpoint->vaddr);
435
436 g_free(watchpoint);
437 }
438
439 /* Remove all matching watchpoints. */
440 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
441 {
442 CPUWatchpoint *wp, *next;
443
444 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
445 if (wp->flags & mask)
446 cpu_watchpoint_remove_by_ref(env, wp);
447 }
448 }
449 #endif
450
451 /* Add a breakpoint. */
452 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
453 CPUBreakpoint **breakpoint)
454 {
455 #if defined(TARGET_HAS_ICE)
456 CPUBreakpoint *bp;
457
458 bp = g_malloc(sizeof(*bp));
459
460 bp->pc = pc;
461 bp->flags = flags;
462
463 /* keep all GDB-injected breakpoints in front */
464 if (flags & BP_GDB)
465 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
466 else
467 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
468
469 breakpoint_invalidate(env, pc);
470
471 if (breakpoint)
472 *breakpoint = bp;
473 return 0;
474 #else
475 return -ENOSYS;
476 #endif
477 }
478
479 /* Remove a specific breakpoint. */
480 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
481 {
482 #if defined(TARGET_HAS_ICE)
483 CPUBreakpoint *bp;
484
485 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
486 if (bp->pc == pc && bp->flags == flags) {
487 cpu_breakpoint_remove_by_ref(env, bp);
488 return 0;
489 }
490 }
491 return -ENOENT;
492 #else
493 return -ENOSYS;
494 #endif
495 }
496
497 /* Remove a specific breakpoint by reference. */
498 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
499 {
500 #if defined(TARGET_HAS_ICE)
501 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
502
503 breakpoint_invalidate(env, breakpoint->pc);
504
505 g_free(breakpoint);
506 #endif
507 }
508
509 /* Remove all matching breakpoints. */
510 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
511 {
512 #if defined(TARGET_HAS_ICE)
513 CPUBreakpoint *bp, *next;
514
515 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
516 if (bp->flags & mask)
517 cpu_breakpoint_remove_by_ref(env, bp);
518 }
519 #endif
520 }
521
522 /* enable or disable single step mode. EXCP_DEBUG is returned by the
523 CPU loop after each instruction */
524 void cpu_single_step(CPUArchState *env, int enabled)
525 {
526 #if defined(TARGET_HAS_ICE)
527 if (env->singlestep_enabled != enabled) {
528 env->singlestep_enabled = enabled;
529 if (kvm_enabled())
530 kvm_update_guest_debug(env, 0);
531 else {
532 /* must flush all the translated code to avoid inconsistencies */
533 /* XXX: only flush what is necessary */
534 tb_flush(env);
535 }
536 }
537 #endif
538 }
539
540 void cpu_exit(CPUArchState *env)
541 {
542 CPUState *cpu = ENV_GET_CPU(env);
543
544 cpu->exit_request = 1;
545 cpu->tcg_exit_req = 1;
546 }
547
548 void cpu_abort(CPUArchState *env, const char *fmt, ...)
549 {
550 va_list ap;
551 va_list ap2;
552
553 va_start(ap, fmt);
554 va_copy(ap2, ap);
555 fprintf(stderr, "qemu: fatal: ");
556 vfprintf(stderr, fmt, ap);
557 fprintf(stderr, "\n");
558 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
559 if (qemu_log_enabled()) {
560 qemu_log("qemu: fatal: ");
561 qemu_log_vprintf(fmt, ap2);
562 qemu_log("\n");
563 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
564 qemu_log_flush();
565 qemu_log_close();
566 }
567 va_end(ap2);
568 va_end(ap);
569 #if defined(CONFIG_USER_ONLY)
570 {
571 struct sigaction act;
572 sigfillset(&act.sa_mask);
573 act.sa_handler = SIG_DFL;
574 sigaction(SIGABRT, &act, NULL);
575 }
576 #endif
577 abort();
578 }
579
580 CPUArchState *cpu_copy(CPUArchState *env)
581 {
582 CPUArchState *new_env = cpu_init(env->cpu_model_str);
583 CPUArchState *next_cpu = new_env->next_cpu;
584 #if defined(TARGET_HAS_ICE)
585 CPUBreakpoint *bp;
586 CPUWatchpoint *wp;
587 #endif
588
589 memcpy(new_env, env, sizeof(CPUArchState));
590
591 /* Preserve chaining. */
592 new_env->next_cpu = next_cpu;
593
594 /* Clone all break/watchpoints.
595 Note: Once we support ptrace with hw-debug register access, make sure
596 BP_CPU break/watchpoints are handled correctly on clone. */
597 QTAILQ_INIT(&env->breakpoints);
598 QTAILQ_INIT(&env->watchpoints);
599 #if defined(TARGET_HAS_ICE)
600 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
601 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
602 }
603 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
604 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
605 wp->flags, NULL);
606 }
607 #endif
608
609 return new_env;
610 }
611
612 #if !defined(CONFIG_USER_ONLY)
613 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
614 uintptr_t length)
615 {
616 uintptr_t start1;
617
618 /* we modify the TLB cache so that the dirty bit will be set again
619 when accessing the range */
620 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
621 /* Check that we don't span multiple blocks - this breaks the
622 address comparisons below. */
623 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
624 != (end - 1) - start) {
625 abort();
626 }
627 cpu_tlb_reset_dirty_all(start1, length);
628
629 }
630
631 /* Note: start and end must be within the same ram block. */
632 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
633 int dirty_flags)
634 {
635 uintptr_t length;
636
637 start &= TARGET_PAGE_MASK;
638 end = TARGET_PAGE_ALIGN(end);
639
640 length = end - start;
641 if (length == 0)
642 return;
643 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
644
645 if (tcg_enabled()) {
646 tlb_reset_dirty_range_all(start, end, length);
647 }
648 }
649
650 static int cpu_physical_memory_set_dirty_tracking(int enable)
651 {
652 int ret = 0;
653 in_migration = enable;
654 return ret;
655 }
656
657 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
658 MemoryRegionSection *section,
659 target_ulong vaddr,
660 hwaddr paddr, hwaddr xlat,
661 int prot,
662 target_ulong *address)
663 {
664 hwaddr iotlb;
665 CPUWatchpoint *wp;
666
667 if (memory_region_is_ram(section->mr)) {
668 /* Normal RAM. */
669 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
670 + xlat;
671 if (!section->readonly) {
672 iotlb |= phys_section_notdirty;
673 } else {
674 iotlb |= phys_section_rom;
675 }
676 } else {
677 iotlb = section - phys_sections;
678 iotlb += xlat;
679 }
680
681 /* Make accesses to pages with watchpoints go via the
682 watchpoint trap routines. */
683 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
684 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
685 /* Avoid trapping reads of pages with a write breakpoint. */
686 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
687 iotlb = phys_section_watch + paddr;
688 *address |= TLB_MMIO;
689 break;
690 }
691 }
692 }
693
694 return iotlb;
695 }
696 #endif /* defined(CONFIG_USER_ONLY) */
697
698 #if !defined(CONFIG_USER_ONLY)
699
700 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
701 typedef struct subpage_t {
702 MemoryRegion iomem;
703 hwaddr base;
704 uint16_t sub_section[TARGET_PAGE_SIZE];
705 } subpage_t;
706
707 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
708 uint16_t section);
709 static subpage_t *subpage_init(hwaddr base);
710 static void destroy_page_desc(uint16_t section_index)
711 {
712 MemoryRegionSection *section = &phys_sections[section_index];
713 MemoryRegion *mr = section->mr;
714
715 if (mr->subpage) {
716 subpage_t *subpage = container_of(mr, subpage_t, iomem);
717 memory_region_destroy(&subpage->iomem);
718 g_free(subpage);
719 }
720 }
721
722 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
723 {
724 unsigned i;
725 PhysPageEntry *p;
726
727 if (lp->ptr == PHYS_MAP_NODE_NIL) {
728 return;
729 }
730
731 p = phys_map_nodes[lp->ptr];
732 for (i = 0; i < L2_SIZE; ++i) {
733 if (!p[i].is_leaf) {
734 destroy_l2_mapping(&p[i], level - 1);
735 } else {
736 destroy_page_desc(p[i].ptr);
737 }
738 }
739 lp->is_leaf = 0;
740 lp->ptr = PHYS_MAP_NODE_NIL;
741 }
742
743 static void destroy_all_mappings(AddressSpaceDispatch *d)
744 {
745 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
746 phys_map_nodes_reset();
747 }
748
749 static uint16_t phys_section_add(MemoryRegionSection *section)
750 {
751 /* The physical section number is ORed with a page-aligned
752 * pointer to produce the iotlb entries. Thus it should
753 * never overflow into the page-aligned value.
754 */
755 assert(phys_sections_nb < TARGET_PAGE_SIZE);
756
757 if (phys_sections_nb == phys_sections_nb_alloc) {
758 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
759 phys_sections = g_renew(MemoryRegionSection, phys_sections,
760 phys_sections_nb_alloc);
761 }
762 phys_sections[phys_sections_nb] = *section;
763 return phys_sections_nb++;
764 }
765
766 static void phys_sections_clear(void)
767 {
768 phys_sections_nb = 0;
769 }
770
771 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
772 {
773 subpage_t *subpage;
774 hwaddr base = section->offset_within_address_space
775 & TARGET_PAGE_MASK;
776 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
777 MemoryRegionSection subsection = {
778 .offset_within_address_space = base,
779 .size = TARGET_PAGE_SIZE,
780 };
781 hwaddr start, end;
782
783 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
784
785 if (!(existing->mr->subpage)) {
786 subpage = subpage_init(base);
787 subsection.mr = &subpage->iomem;
788 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
789 phys_section_add(&subsection));
790 } else {
791 subpage = container_of(existing->mr, subpage_t, iomem);
792 }
793 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
794 end = start + section->size - 1;
795 subpage_register(subpage, start, end, phys_section_add(section));
796 }
797
798
799 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
800 {
801 hwaddr start_addr = section->offset_within_address_space;
802 ram_addr_t size = section->size;
803 hwaddr addr;
804 uint16_t section_index = phys_section_add(section);
805
806 assert(size);
807
808 addr = start_addr;
809 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
810 section_index);
811 }
812
813 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
814
815 static MemoryRegionSection limit(MemoryRegionSection section)
816 {
817 section.size = MIN(section.offset_within_address_space + section.size,
818 MAX_PHYS_ADDR + 1)
819 - section.offset_within_address_space;
820
821 return section;
822 }
823
824 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
825 {
826 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
827 MemoryRegionSection now = limit(*section), remain = limit(*section);
828
829 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
830 || (now.size < TARGET_PAGE_SIZE)) {
831 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
832 - now.offset_within_address_space,
833 now.size);
834 register_subpage(d, &now);
835 remain.size -= now.size;
836 remain.offset_within_address_space += now.size;
837 remain.offset_within_region += now.size;
838 }
839 while (remain.size >= TARGET_PAGE_SIZE) {
840 now = remain;
841 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
842 now.size = TARGET_PAGE_SIZE;
843 register_subpage(d, &now);
844 } else {
845 now.size &= TARGET_PAGE_MASK;
846 register_multipage(d, &now);
847 }
848 remain.size -= now.size;
849 remain.offset_within_address_space += now.size;
850 remain.offset_within_region += now.size;
851 }
852 now = remain;
853 if (now.size) {
854 register_subpage(d, &now);
855 }
856 }
857
858 void qemu_flush_coalesced_mmio_buffer(void)
859 {
860 if (kvm_enabled())
861 kvm_flush_coalesced_mmio_buffer();
862 }
863
864 void qemu_mutex_lock_ramlist(void)
865 {
866 qemu_mutex_lock(&ram_list.mutex);
867 }
868
869 void qemu_mutex_unlock_ramlist(void)
870 {
871 qemu_mutex_unlock(&ram_list.mutex);
872 }
873
874 #if defined(__linux__) && !defined(TARGET_S390X)
875
876 #include <sys/vfs.h>
877
878 #define HUGETLBFS_MAGIC 0x958458f6
879
880 static long gethugepagesize(const char *path)
881 {
882 struct statfs fs;
883 int ret;
884
885 do {
886 ret = statfs(path, &fs);
887 } while (ret != 0 && errno == EINTR);
888
889 if (ret != 0) {
890 perror(path);
891 return 0;
892 }
893
894 if (fs.f_type != HUGETLBFS_MAGIC)
895 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
896
897 return fs.f_bsize;
898 }
899
900 static void *file_ram_alloc(RAMBlock *block,
901 ram_addr_t memory,
902 const char *path)
903 {
904 char *filename;
905 char *sanitized_name;
906 char *c;
907 void *area;
908 int fd;
909 #ifdef MAP_POPULATE
910 int flags;
911 #endif
912 unsigned long hpagesize;
913
914 hpagesize = gethugepagesize(path);
915 if (!hpagesize) {
916 return NULL;
917 }
918
919 if (memory < hpagesize) {
920 return NULL;
921 }
922
923 if (kvm_enabled() && !kvm_has_sync_mmu()) {
924 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
925 return NULL;
926 }
927
928 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
929 sanitized_name = g_strdup(block->mr->name);
930 for (c = sanitized_name; *c != '\0'; c++) {
931 if (*c == '/')
932 *c = '_';
933 }
934
935 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
936 sanitized_name);
937 g_free(sanitized_name);
938
939 fd = mkstemp(filename);
940 if (fd < 0) {
941 perror("unable to create backing store for hugepages");
942 g_free(filename);
943 return NULL;
944 }
945 unlink(filename);
946 g_free(filename);
947
948 memory = (memory+hpagesize-1) & ~(hpagesize-1);
949
950 /*
951 * ftruncate is not supported by hugetlbfs in older
952 * hosts, so don't bother bailing out on errors.
953 * If anything goes wrong with it under other filesystems,
954 * mmap will fail.
955 */
956 if (ftruncate(fd, memory))
957 perror("ftruncate");
958
959 #ifdef MAP_POPULATE
960 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
961 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
962 * to sidestep this quirk.
963 */
964 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
965 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
966 #else
967 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
968 #endif
969 if (area == MAP_FAILED) {
970 perror("file_ram_alloc: can't mmap RAM pages");
971 close(fd);
972 return (NULL);
973 }
974 block->fd = fd;
975 return area;
976 }
977 #endif
978
979 static ram_addr_t find_ram_offset(ram_addr_t size)
980 {
981 RAMBlock *block, *next_block;
982 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
983
984 assert(size != 0); /* it would hand out same offset multiple times */
985
986 if (QTAILQ_EMPTY(&ram_list.blocks))
987 return 0;
988
989 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
990 ram_addr_t end, next = RAM_ADDR_MAX;
991
992 end = block->offset + block->length;
993
994 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
995 if (next_block->offset >= end) {
996 next = MIN(next, next_block->offset);
997 }
998 }
999 if (next - end >= size && next - end < mingap) {
1000 offset = end;
1001 mingap = next - end;
1002 }
1003 }
1004
1005 if (offset == RAM_ADDR_MAX) {
1006 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1007 (uint64_t)size);
1008 abort();
1009 }
1010
1011 return offset;
1012 }
1013
1014 ram_addr_t last_ram_offset(void)
1015 {
1016 RAMBlock *block;
1017 ram_addr_t last = 0;
1018
1019 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1020 last = MAX(last, block->offset + block->length);
1021
1022 return last;
1023 }
1024
1025 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1026 {
1027 int ret;
1028 QemuOpts *machine_opts;
1029
1030 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1031 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1032 if (machine_opts &&
1033 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1034 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1035 if (ret) {
1036 perror("qemu_madvise");
1037 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1038 "but dump_guest_core=off specified\n");
1039 }
1040 }
1041 }
1042
1043 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1044 {
1045 RAMBlock *new_block, *block;
1046
1047 new_block = NULL;
1048 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1049 if (block->offset == addr) {
1050 new_block = block;
1051 break;
1052 }
1053 }
1054 assert(new_block);
1055 assert(!new_block->idstr[0]);
1056
1057 if (dev) {
1058 char *id = qdev_get_dev_path(dev);
1059 if (id) {
1060 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1061 g_free(id);
1062 }
1063 }
1064 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1065
1066 /* This assumes the iothread lock is taken here too. */
1067 qemu_mutex_lock_ramlist();
1068 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1069 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1070 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1071 new_block->idstr);
1072 abort();
1073 }
1074 }
1075 qemu_mutex_unlock_ramlist();
1076 }
1077
1078 static int memory_try_enable_merging(void *addr, size_t len)
1079 {
1080 QemuOpts *opts;
1081
1082 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1083 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1084 /* disabled by the user */
1085 return 0;
1086 }
1087
1088 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1089 }
1090
1091 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1092 MemoryRegion *mr)
1093 {
1094 RAMBlock *block, *new_block;
1095
1096 size = TARGET_PAGE_ALIGN(size);
1097 new_block = g_malloc0(sizeof(*new_block));
1098
1099 /* This assumes the iothread lock is taken here too. */
1100 qemu_mutex_lock_ramlist();
1101 new_block->mr = mr;
1102 new_block->offset = find_ram_offset(size);
1103 if (host) {
1104 new_block->host = host;
1105 new_block->flags |= RAM_PREALLOC_MASK;
1106 } else {
1107 if (mem_path) {
1108 #if defined (__linux__) && !defined(TARGET_S390X)
1109 new_block->host = file_ram_alloc(new_block, size, mem_path);
1110 if (!new_block->host) {
1111 new_block->host = qemu_anon_ram_alloc(size);
1112 memory_try_enable_merging(new_block->host, size);
1113 }
1114 #else
1115 fprintf(stderr, "-mem-path option unsupported\n");
1116 exit(1);
1117 #endif
1118 } else {
1119 if (xen_enabled()) {
1120 xen_ram_alloc(new_block->offset, size, mr);
1121 } else if (kvm_enabled()) {
1122 /* some s390/kvm configurations have special constraints */
1123 new_block->host = kvm_ram_alloc(size);
1124 } else {
1125 new_block->host = qemu_anon_ram_alloc(size);
1126 }
1127 memory_try_enable_merging(new_block->host, size);
1128 }
1129 }
1130 new_block->length = size;
1131
1132 /* Keep the list sorted from biggest to smallest block. */
1133 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1134 if (block->length < new_block->length) {
1135 break;
1136 }
1137 }
1138 if (block) {
1139 QTAILQ_INSERT_BEFORE(block, new_block, next);
1140 } else {
1141 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1142 }
1143 ram_list.mru_block = NULL;
1144
1145 ram_list.version++;
1146 qemu_mutex_unlock_ramlist();
1147
1148 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1149 last_ram_offset() >> TARGET_PAGE_BITS);
1150 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1151 0, size >> TARGET_PAGE_BITS);
1152 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1153
1154 qemu_ram_setup_dump(new_block->host, size);
1155 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1156
1157 if (kvm_enabled())
1158 kvm_setup_guest_memory(new_block->host, size);
1159
1160 return new_block->offset;
1161 }
1162
1163 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1164 {
1165 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1166 }
1167
1168 void qemu_ram_free_from_ptr(ram_addr_t addr)
1169 {
1170 RAMBlock *block;
1171
1172 /* This assumes the iothread lock is taken here too. */
1173 qemu_mutex_lock_ramlist();
1174 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1175 if (addr == block->offset) {
1176 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1177 ram_list.mru_block = NULL;
1178 ram_list.version++;
1179 g_free(block);
1180 break;
1181 }
1182 }
1183 qemu_mutex_unlock_ramlist();
1184 }
1185
1186 void qemu_ram_free(ram_addr_t addr)
1187 {
1188 RAMBlock *block;
1189
1190 /* This assumes the iothread lock is taken here too. */
1191 qemu_mutex_lock_ramlist();
1192 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1193 if (addr == block->offset) {
1194 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1195 ram_list.mru_block = NULL;
1196 ram_list.version++;
1197 if (block->flags & RAM_PREALLOC_MASK) {
1198 ;
1199 } else if (mem_path) {
1200 #if defined (__linux__) && !defined(TARGET_S390X)
1201 if (block->fd) {
1202 munmap(block->host, block->length);
1203 close(block->fd);
1204 } else {
1205 qemu_anon_ram_free(block->host, block->length);
1206 }
1207 #else
1208 abort();
1209 #endif
1210 } else {
1211 if (xen_enabled()) {
1212 xen_invalidate_map_cache_entry(block->host);
1213 } else {
1214 qemu_anon_ram_free(block->host, block->length);
1215 }
1216 }
1217 g_free(block);
1218 break;
1219 }
1220 }
1221 qemu_mutex_unlock_ramlist();
1222
1223 }
1224
1225 #ifndef _WIN32
1226 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1227 {
1228 RAMBlock *block;
1229 ram_addr_t offset;
1230 int flags;
1231 void *area, *vaddr;
1232
1233 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1234 offset = addr - block->offset;
1235 if (offset < block->length) {
1236 vaddr = block->host + offset;
1237 if (block->flags & RAM_PREALLOC_MASK) {
1238 ;
1239 } else {
1240 flags = MAP_FIXED;
1241 munmap(vaddr, length);
1242 if (mem_path) {
1243 #if defined(__linux__) && !defined(TARGET_S390X)
1244 if (block->fd) {
1245 #ifdef MAP_POPULATE
1246 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1247 MAP_PRIVATE;
1248 #else
1249 flags |= MAP_PRIVATE;
1250 #endif
1251 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1252 flags, block->fd, offset);
1253 } else {
1254 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1255 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1256 flags, -1, 0);
1257 }
1258 #else
1259 abort();
1260 #endif
1261 } else {
1262 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1263 flags |= MAP_SHARED | MAP_ANONYMOUS;
1264 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1265 flags, -1, 0);
1266 #else
1267 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1268 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1269 flags, -1, 0);
1270 #endif
1271 }
1272 if (area != vaddr) {
1273 fprintf(stderr, "Could not remap addr: "
1274 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1275 length, addr);
1276 exit(1);
1277 }
1278 memory_try_enable_merging(vaddr, length);
1279 qemu_ram_setup_dump(vaddr, length);
1280 }
1281 return;
1282 }
1283 }
1284 }
1285 #endif /* !_WIN32 */
1286
1287 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1288 With the exception of the softmmu code in this file, this should
1289 only be used for local memory (e.g. video ram) that the device owns,
1290 and knows it isn't going to access beyond the end of the block.
1291
1292 It should not be used for general purpose DMA.
1293 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1294 */
1295 void *qemu_get_ram_ptr(ram_addr_t addr)
1296 {
1297 RAMBlock *block;
1298
1299 /* The list is protected by the iothread lock here. */
1300 block = ram_list.mru_block;
1301 if (block && addr - block->offset < block->length) {
1302 goto found;
1303 }
1304 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1305 if (addr - block->offset < block->length) {
1306 goto found;
1307 }
1308 }
1309
1310 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1311 abort();
1312
1313 found:
1314 ram_list.mru_block = block;
1315 if (xen_enabled()) {
1316 /* We need to check if the requested address is in the RAM
1317 * because we don't want to map the entire memory in QEMU.
1318 * In that case just map until the end of the page.
1319 */
1320 if (block->offset == 0) {
1321 return xen_map_cache(addr, 0, 0);
1322 } else if (block->host == NULL) {
1323 block->host =
1324 xen_map_cache(block->offset, block->length, 1);
1325 }
1326 }
1327 return block->host + (addr - block->offset);
1328 }
1329
1330 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1331 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1332 *
1333 * ??? Is this still necessary?
1334 */
1335 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1336 {
1337 RAMBlock *block;
1338
1339 /* The list is protected by the iothread lock here. */
1340 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1341 if (addr - block->offset < block->length) {
1342 if (xen_enabled()) {
1343 /* We need to check if the requested address is in the RAM
1344 * because we don't want to map the entire memory in QEMU.
1345 * In that case just map until the end of the page.
1346 */
1347 if (block->offset == 0) {
1348 return xen_map_cache(addr, 0, 0);
1349 } else if (block->host == NULL) {
1350 block->host =
1351 xen_map_cache(block->offset, block->length, 1);
1352 }
1353 }
1354 return block->host + (addr - block->offset);
1355 }
1356 }
1357
1358 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1359 abort();
1360
1361 return NULL;
1362 }
1363
1364 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1365 * but takes a size argument */
1366 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1367 {
1368 if (*size == 0) {
1369 return NULL;
1370 }
1371 if (xen_enabled()) {
1372 return xen_map_cache(addr, *size, 1);
1373 } else {
1374 RAMBlock *block;
1375
1376 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1377 if (addr - block->offset < block->length) {
1378 if (addr - block->offset + *size > block->length)
1379 *size = block->length - addr + block->offset;
1380 return block->host + (addr - block->offset);
1381 }
1382 }
1383
1384 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1385 abort();
1386 }
1387 }
1388
1389 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1390 {
1391 RAMBlock *block;
1392 uint8_t *host = ptr;
1393
1394 if (xen_enabled()) {
1395 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1396 return 0;
1397 }
1398
1399 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1400 /* This case append when the block is not mapped. */
1401 if (block->host == NULL) {
1402 continue;
1403 }
1404 if (host - block->host < block->length) {
1405 *ram_addr = block->offset + (host - block->host);
1406 return 0;
1407 }
1408 }
1409
1410 return -1;
1411 }
1412
1413 /* Some of the softmmu routines need to translate from a host pointer
1414 (typically a TLB entry) back to a ram offset. */
1415 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1416 {
1417 ram_addr_t ram_addr;
1418
1419 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1420 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1421 abort();
1422 }
1423 return ram_addr;
1424 }
1425
1426 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1427 uint64_t val, unsigned size)
1428 {
1429 int dirty_flags;
1430 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1431 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1432 tb_invalidate_phys_page_fast(ram_addr, size);
1433 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1434 }
1435 switch (size) {
1436 case 1:
1437 stb_p(qemu_get_ram_ptr(ram_addr), val);
1438 break;
1439 case 2:
1440 stw_p(qemu_get_ram_ptr(ram_addr), val);
1441 break;
1442 case 4:
1443 stl_p(qemu_get_ram_ptr(ram_addr), val);
1444 break;
1445 default:
1446 abort();
1447 }
1448 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1449 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1450 /* we remove the notdirty callback only if the code has been
1451 flushed */
1452 if (dirty_flags == 0xff)
1453 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1454 }
1455
1456 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1457 unsigned size, bool is_write)
1458 {
1459 return is_write;
1460 }
1461
1462 static const MemoryRegionOps notdirty_mem_ops = {
1463 .write = notdirty_mem_write,
1464 .valid.accepts = notdirty_mem_accepts,
1465 .endianness = DEVICE_NATIVE_ENDIAN,
1466 };
1467
1468 /* Generate a debug exception if a watchpoint has been hit. */
1469 static void check_watchpoint(int offset, int len_mask, int flags)
1470 {
1471 CPUArchState *env = cpu_single_env;
1472 target_ulong pc, cs_base;
1473 target_ulong vaddr;
1474 CPUWatchpoint *wp;
1475 int cpu_flags;
1476
1477 if (env->watchpoint_hit) {
1478 /* We re-entered the check after replacing the TB. Now raise
1479 * the debug interrupt so that is will trigger after the
1480 * current instruction. */
1481 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1482 return;
1483 }
1484 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1485 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1486 if ((vaddr == (wp->vaddr & len_mask) ||
1487 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1488 wp->flags |= BP_WATCHPOINT_HIT;
1489 if (!env->watchpoint_hit) {
1490 env->watchpoint_hit = wp;
1491 tb_check_watchpoint(env);
1492 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1493 env->exception_index = EXCP_DEBUG;
1494 cpu_loop_exit(env);
1495 } else {
1496 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1497 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1498 cpu_resume_from_signal(env, NULL);
1499 }
1500 }
1501 } else {
1502 wp->flags &= ~BP_WATCHPOINT_HIT;
1503 }
1504 }
1505 }
1506
1507 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1508 so these check for a hit then pass through to the normal out-of-line
1509 phys routines. */
1510 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1511 unsigned size)
1512 {
1513 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1514 switch (size) {
1515 case 1: return ldub_phys(addr);
1516 case 2: return lduw_phys(addr);
1517 case 4: return ldl_phys(addr);
1518 default: abort();
1519 }
1520 }
1521
1522 static void watch_mem_write(void *opaque, hwaddr addr,
1523 uint64_t val, unsigned size)
1524 {
1525 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1526 switch (size) {
1527 case 1:
1528 stb_phys(addr, val);
1529 break;
1530 case 2:
1531 stw_phys(addr, val);
1532 break;
1533 case 4:
1534 stl_phys(addr, val);
1535 break;
1536 default: abort();
1537 }
1538 }
1539
1540 static const MemoryRegionOps watch_mem_ops = {
1541 .read = watch_mem_read,
1542 .write = watch_mem_write,
1543 .endianness = DEVICE_NATIVE_ENDIAN,
1544 };
1545
1546 static uint64_t subpage_read(void *opaque, hwaddr addr,
1547 unsigned len)
1548 {
1549 subpage_t *mmio = opaque;
1550 unsigned int idx = SUBPAGE_IDX(addr);
1551 uint64_t val;
1552
1553 MemoryRegionSection *section;
1554 #if defined(DEBUG_SUBPAGE)
1555 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1556 mmio, len, addr, idx);
1557 #endif
1558
1559 section = &phys_sections[mmio->sub_section[idx]];
1560 addr += mmio->base;
1561 addr -= section->offset_within_address_space;
1562 addr += section->offset_within_region;
1563 io_mem_read(section->mr, addr, &val, len);
1564 return val;
1565 }
1566
1567 static void subpage_write(void *opaque, hwaddr addr,
1568 uint64_t value, unsigned len)
1569 {
1570 subpage_t *mmio = opaque;
1571 unsigned int idx = SUBPAGE_IDX(addr);
1572 MemoryRegionSection *section;
1573 #if defined(DEBUG_SUBPAGE)
1574 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1575 " idx %d value %"PRIx64"\n",
1576 __func__, mmio, len, addr, idx, value);
1577 #endif
1578
1579 section = &phys_sections[mmio->sub_section[idx]];
1580 addr += mmio->base;
1581 addr -= section->offset_within_address_space;
1582 addr += section->offset_within_region;
1583 io_mem_write(section->mr, addr, value, len);
1584 }
1585
1586 static bool subpage_accepts(void *opaque, hwaddr addr,
1587 unsigned size, bool is_write)
1588 {
1589 subpage_t *mmio = opaque;
1590 unsigned int idx = SUBPAGE_IDX(addr);
1591 MemoryRegionSection *section;
1592 #if defined(DEBUG_SUBPAGE)
1593 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
1594 " idx %d\n", __func__, mmio,
1595 is_write ? 'w' : 'r', len, addr, idx);
1596 #endif
1597
1598 section = &phys_sections[mmio->sub_section[idx]];
1599 addr += mmio->base;
1600 addr -= section->offset_within_address_space;
1601 addr += section->offset_within_region;
1602 return memory_region_access_valid(section->mr, addr, size, is_write);
1603 }
1604
1605 static const MemoryRegionOps subpage_ops = {
1606 .read = subpage_read,
1607 .write = subpage_write,
1608 .valid.accepts = subpage_accepts,
1609 .endianness = DEVICE_NATIVE_ENDIAN,
1610 };
1611
1612 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1613 unsigned size)
1614 {
1615 ram_addr_t raddr = addr;
1616 void *ptr = qemu_get_ram_ptr(raddr);
1617 switch (size) {
1618 case 1: return ldub_p(ptr);
1619 case 2: return lduw_p(ptr);
1620 case 4: return ldl_p(ptr);
1621 default: abort();
1622 }
1623 }
1624
1625 static void subpage_ram_write(void *opaque, hwaddr addr,
1626 uint64_t value, unsigned size)
1627 {
1628 ram_addr_t raddr = addr;
1629 void *ptr = qemu_get_ram_ptr(raddr);
1630 switch (size) {
1631 case 1: return stb_p(ptr, value);
1632 case 2: return stw_p(ptr, value);
1633 case 4: return stl_p(ptr, value);
1634 default: abort();
1635 }
1636 }
1637
1638 static const MemoryRegionOps subpage_ram_ops = {
1639 .read = subpage_ram_read,
1640 .write = subpage_ram_write,
1641 .endianness = DEVICE_NATIVE_ENDIAN,
1642 };
1643
1644 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1645 uint16_t section)
1646 {
1647 int idx, eidx;
1648
1649 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1650 return -1;
1651 idx = SUBPAGE_IDX(start);
1652 eidx = SUBPAGE_IDX(end);
1653 #if defined(DEBUG_SUBPAGE)
1654 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1655 mmio, start, end, idx, eidx, memory);
1656 #endif
1657 if (memory_region_is_ram(phys_sections[section].mr)) {
1658 MemoryRegionSection new_section = phys_sections[section];
1659 new_section.mr = &io_mem_subpage_ram;
1660 section = phys_section_add(&new_section);
1661 }
1662 for (; idx <= eidx; idx++) {
1663 mmio->sub_section[idx] = section;
1664 }
1665
1666 return 0;
1667 }
1668
1669 static subpage_t *subpage_init(hwaddr base)
1670 {
1671 subpage_t *mmio;
1672
1673 mmio = g_malloc0(sizeof(subpage_t));
1674
1675 mmio->base = base;
1676 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1677 "subpage", TARGET_PAGE_SIZE);
1678 mmio->iomem.subpage = true;
1679 #if defined(DEBUG_SUBPAGE)
1680 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1681 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1682 #endif
1683 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1684
1685 return mmio;
1686 }
1687
1688 static uint16_t dummy_section(MemoryRegion *mr)
1689 {
1690 MemoryRegionSection section = {
1691 .mr = mr,
1692 .offset_within_address_space = 0,
1693 .offset_within_region = 0,
1694 .size = UINT64_MAX,
1695 };
1696
1697 return phys_section_add(&section);
1698 }
1699
1700 MemoryRegion *iotlb_to_region(hwaddr index)
1701 {
1702 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1703 }
1704
1705 static void io_mem_init(void)
1706 {
1707 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1708 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1709 "unassigned", UINT64_MAX);
1710 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1711 "notdirty", UINT64_MAX);
1712 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1713 "subpage-ram", UINT64_MAX);
1714 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1715 "watch", UINT64_MAX);
1716 }
1717
1718 static void mem_begin(MemoryListener *listener)
1719 {
1720 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1721
1722 destroy_all_mappings(d);
1723 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1724 }
1725
1726 static void core_begin(MemoryListener *listener)
1727 {
1728 phys_sections_clear();
1729 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1730 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1731 phys_section_rom = dummy_section(&io_mem_rom);
1732 phys_section_watch = dummy_section(&io_mem_watch);
1733 }
1734
1735 static void tcg_commit(MemoryListener *listener)
1736 {
1737 CPUArchState *env;
1738
1739 /* since each CPU stores ram addresses in its TLB cache, we must
1740 reset the modified entries */
1741 /* XXX: slow ! */
1742 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1743 tlb_flush(env, 1);
1744 }
1745 }
1746
1747 static void core_log_global_start(MemoryListener *listener)
1748 {
1749 cpu_physical_memory_set_dirty_tracking(1);
1750 }
1751
1752 static void core_log_global_stop(MemoryListener *listener)
1753 {
1754 cpu_physical_memory_set_dirty_tracking(0);
1755 }
1756
1757 static void io_region_add(MemoryListener *listener,
1758 MemoryRegionSection *section)
1759 {
1760 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1761
1762 mrio->mr = section->mr;
1763 mrio->offset = section->offset_within_region;
1764 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1765 section->offset_within_address_space, section->size);
1766 ioport_register(&mrio->iorange);
1767 }
1768
1769 static void io_region_del(MemoryListener *listener,
1770 MemoryRegionSection *section)
1771 {
1772 isa_unassign_ioport(section->offset_within_address_space, section->size);
1773 }
1774
1775 static MemoryListener core_memory_listener = {
1776 .begin = core_begin,
1777 .log_global_start = core_log_global_start,
1778 .log_global_stop = core_log_global_stop,
1779 .priority = 1,
1780 };
1781
1782 static MemoryListener io_memory_listener = {
1783 .region_add = io_region_add,
1784 .region_del = io_region_del,
1785 .priority = 0,
1786 };
1787
1788 static MemoryListener tcg_memory_listener = {
1789 .commit = tcg_commit,
1790 };
1791
1792 void address_space_init_dispatch(AddressSpace *as)
1793 {
1794 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1795
1796 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1797 d->listener = (MemoryListener) {
1798 .begin = mem_begin,
1799 .region_add = mem_add,
1800 .region_nop = mem_add,
1801 .priority = 0,
1802 };
1803 as->dispatch = d;
1804 memory_listener_register(&d->listener, as);
1805 }
1806
1807 void address_space_destroy_dispatch(AddressSpace *as)
1808 {
1809 AddressSpaceDispatch *d = as->dispatch;
1810
1811 memory_listener_unregister(&d->listener);
1812 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1813 g_free(d);
1814 as->dispatch = NULL;
1815 }
1816
1817 static void memory_map_init(void)
1818 {
1819 system_memory = g_malloc(sizeof(*system_memory));
1820 memory_region_init(system_memory, "system", INT64_MAX);
1821 address_space_init(&address_space_memory, system_memory);
1822 address_space_memory.name = "memory";
1823
1824 system_io = g_malloc(sizeof(*system_io));
1825 memory_region_init(system_io, "io", 65536);
1826 address_space_init(&address_space_io, system_io);
1827 address_space_io.name = "I/O";
1828
1829 memory_listener_register(&core_memory_listener, &address_space_memory);
1830 memory_listener_register(&io_memory_listener, &address_space_io);
1831 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1832
1833 dma_context_init(&dma_context_memory, &address_space_memory,
1834 NULL, NULL, NULL);
1835 }
1836
1837 MemoryRegion *get_system_memory(void)
1838 {
1839 return system_memory;
1840 }
1841
1842 MemoryRegion *get_system_io(void)
1843 {
1844 return system_io;
1845 }
1846
1847 #endif /* !defined(CONFIG_USER_ONLY) */
1848
1849 /* physical memory access (slow version, mainly for debug) */
1850 #if defined(CONFIG_USER_ONLY)
1851 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1852 uint8_t *buf, int len, int is_write)
1853 {
1854 int l, flags;
1855 target_ulong page;
1856 void * p;
1857
1858 while (len > 0) {
1859 page = addr & TARGET_PAGE_MASK;
1860 l = (page + TARGET_PAGE_SIZE) - addr;
1861 if (l > len)
1862 l = len;
1863 flags = page_get_flags(page);
1864 if (!(flags & PAGE_VALID))
1865 return -1;
1866 if (is_write) {
1867 if (!(flags & PAGE_WRITE))
1868 return -1;
1869 /* XXX: this code should not depend on lock_user */
1870 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1871 return -1;
1872 memcpy(p, buf, l);
1873 unlock_user(p, addr, l);
1874 } else {
1875 if (!(flags & PAGE_READ))
1876 return -1;
1877 /* XXX: this code should not depend on lock_user */
1878 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1879 return -1;
1880 memcpy(buf, p, l);
1881 unlock_user(p, addr, 0);
1882 }
1883 len -= l;
1884 buf += l;
1885 addr += l;
1886 }
1887 return 0;
1888 }
1889
1890 #else
1891
1892 static void invalidate_and_set_dirty(hwaddr addr,
1893 hwaddr length)
1894 {
1895 if (!cpu_physical_memory_is_dirty(addr)) {
1896 /* invalidate code */
1897 tb_invalidate_phys_page_range(addr, addr + length, 0);
1898 /* set dirty bit */
1899 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1900 }
1901 xen_modified_memory(addr, length);
1902 }
1903
1904 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1905 {
1906 if (memory_region_is_ram(mr)) {
1907 return !(is_write && mr->readonly);
1908 }
1909 if (memory_region_is_romd(mr)) {
1910 return !is_write;
1911 }
1912
1913 return false;
1914 }
1915
1916 static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1917 {
1918 if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1919 return 4;
1920 }
1921 if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1922 return 2;
1923 }
1924 return 1;
1925 }
1926
1927 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1928 int len, bool is_write)
1929 {
1930 hwaddr l;
1931 uint8_t *ptr;
1932 uint64_t val;
1933 hwaddr addr1;
1934 MemoryRegionSection *section;
1935 bool error = false;
1936
1937 while (len > 0) {
1938 l = len;
1939 section = address_space_translate(as, addr, &addr1, &l, is_write);
1940
1941 if (is_write) {
1942 if (!memory_access_is_direct(section->mr, is_write)) {
1943 l = memory_access_size(section->mr, l, addr1);
1944 /* XXX: could force cpu_single_env to NULL to avoid
1945 potential bugs */
1946 if (l == 4) {
1947 /* 32 bit write access */
1948 val = ldl_p(buf);
1949 error |= io_mem_write(section->mr, addr1, val, 4);
1950 } else if (l == 2) {
1951 /* 16 bit write access */
1952 val = lduw_p(buf);
1953 error |= io_mem_write(section->mr, addr1, val, 2);
1954 } else {
1955 /* 8 bit write access */
1956 val = ldub_p(buf);
1957 error |= io_mem_write(section->mr, addr1, val, 1);
1958 }
1959 } else {
1960 addr1 += memory_region_get_ram_addr(section->mr);
1961 /* RAM case */
1962 ptr = qemu_get_ram_ptr(addr1);
1963 memcpy(ptr, buf, l);
1964 invalidate_and_set_dirty(addr1, l);
1965 }
1966 } else {
1967 if (!memory_access_is_direct(section->mr, is_write)) {
1968 /* I/O case */
1969 l = memory_access_size(section->mr, l, addr1);
1970 if (l == 4) {
1971 /* 32 bit read access */
1972 error |= io_mem_read(section->mr, addr1, &val, 4);
1973 stl_p(buf, val);
1974 } else if (l == 2) {
1975 /* 16 bit read access */
1976 error |= io_mem_read(section->mr, addr1, &val, 2);
1977 stw_p(buf, val);
1978 } else {
1979 /* 8 bit read access */
1980 error |= io_mem_read(section->mr, addr1, &val, 1);
1981 stb_p(buf, val);
1982 }
1983 } else {
1984 /* RAM case */
1985 ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
1986 memcpy(buf, ptr, l);
1987 }
1988 }
1989 len -= l;
1990 buf += l;
1991 addr += l;
1992 }
1993
1994 return error;
1995 }
1996
1997 bool address_space_write(AddressSpace *as, hwaddr addr,
1998 const uint8_t *buf, int len)
1999 {
2000 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2001 }
2002
2003 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2004 {
2005 return address_space_rw(as, addr, buf, len, false);
2006 }
2007
2008
2009 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2010 int len, int is_write)
2011 {
2012 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2013 }
2014
2015 /* used for ROM loading : can write in RAM and ROM */
2016 void cpu_physical_memory_write_rom(hwaddr addr,
2017 const uint8_t *buf, int len)
2018 {
2019 hwaddr l;
2020 uint8_t *ptr;
2021 hwaddr addr1;
2022 MemoryRegionSection *section;
2023
2024 while (len > 0) {
2025 l = len;
2026 section = address_space_translate(&address_space_memory,
2027 addr, &addr1, &l, true);
2028
2029 if (!(memory_region_is_ram(section->mr) ||
2030 memory_region_is_romd(section->mr))) {
2031 /* do nothing */
2032 } else {
2033 addr1 += memory_region_get_ram_addr(section->mr);
2034 /* ROM/RAM case */
2035 ptr = qemu_get_ram_ptr(addr1);
2036 memcpy(ptr, buf, l);
2037 invalidate_and_set_dirty(addr1, l);
2038 }
2039 len -= l;
2040 buf += l;
2041 addr += l;
2042 }
2043 }
2044
2045 typedef struct {
2046 void *buffer;
2047 hwaddr addr;
2048 hwaddr len;
2049 } BounceBuffer;
2050
2051 static BounceBuffer bounce;
2052
2053 typedef struct MapClient {
2054 void *opaque;
2055 void (*callback)(void *opaque);
2056 QLIST_ENTRY(MapClient) link;
2057 } MapClient;
2058
2059 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2060 = QLIST_HEAD_INITIALIZER(map_client_list);
2061
2062 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2063 {
2064 MapClient *client = g_malloc(sizeof(*client));
2065
2066 client->opaque = opaque;
2067 client->callback = callback;
2068 QLIST_INSERT_HEAD(&map_client_list, client, link);
2069 return client;
2070 }
2071
2072 static void cpu_unregister_map_client(void *_client)
2073 {
2074 MapClient *client = (MapClient *)_client;
2075
2076 QLIST_REMOVE(client, link);
2077 g_free(client);
2078 }
2079
2080 static void cpu_notify_map_clients(void)
2081 {
2082 MapClient *client;
2083
2084 while (!QLIST_EMPTY(&map_client_list)) {
2085 client = QLIST_FIRST(&map_client_list);
2086 client->callback(client->opaque);
2087 cpu_unregister_map_client(client);
2088 }
2089 }
2090
2091 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2092 {
2093 MemoryRegionSection *section;
2094 hwaddr l, xlat;
2095
2096 while (len > 0) {
2097 l = len;
2098 section = address_space_translate(as, addr, &xlat, &l, is_write);
2099 if (!memory_access_is_direct(section->mr, is_write)) {
2100 l = memory_access_size(section->mr, l, addr);
2101 if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
2102 return false;
2103 }
2104 }
2105
2106 len -= l;
2107 addr += l;
2108 }
2109 return true;
2110 }
2111
2112 /* Map a physical memory region into a host virtual address.
2113 * May map a subset of the requested range, given by and returned in *plen.
2114 * May return NULL if resources needed to perform the mapping are exhausted.
2115 * Use only for reads OR writes - not for read-modify-write operations.
2116 * Use cpu_register_map_client() to know when retrying the map operation is
2117 * likely to succeed.
2118 */
2119 void *address_space_map(AddressSpace *as,
2120 hwaddr addr,
2121 hwaddr *plen,
2122 bool is_write)
2123 {
2124 hwaddr len = *plen;
2125 hwaddr todo = 0;
2126 hwaddr l, xlat;
2127 MemoryRegionSection *section;
2128 ram_addr_t raddr = RAM_ADDR_MAX;
2129 ram_addr_t rlen;
2130 void *ret;
2131
2132 while (len > 0) {
2133 l = len;
2134 section = address_space_translate(as, addr, &xlat, &l, is_write);
2135
2136 if (!memory_access_is_direct(section->mr, is_write)) {
2137 if (todo || bounce.buffer) {
2138 break;
2139 }
2140 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2141 bounce.addr = addr;
2142 bounce.len = l;
2143 if (!is_write) {
2144 address_space_read(as, addr, bounce.buffer, l);
2145 }
2146
2147 *plen = l;
2148 return bounce.buffer;
2149 }
2150 if (!todo) {
2151 raddr = memory_region_get_ram_addr(section->mr) + xlat;
2152 } else {
2153 if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2154 break;
2155 }
2156 }
2157
2158 len -= l;
2159 addr += l;
2160 todo += l;
2161 }
2162 rlen = todo;
2163 ret = qemu_ram_ptr_length(raddr, &rlen);
2164 *plen = rlen;
2165 return ret;
2166 }
2167
2168 /* Unmaps a memory region previously mapped by address_space_map().
2169 * Will also mark the memory as dirty if is_write == 1. access_len gives
2170 * the amount of memory that was actually read or written by the caller.
2171 */
2172 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2173 int is_write, hwaddr access_len)
2174 {
2175 if (buffer != bounce.buffer) {
2176 if (is_write) {
2177 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2178 while (access_len) {
2179 unsigned l;
2180 l = TARGET_PAGE_SIZE;
2181 if (l > access_len)
2182 l = access_len;
2183 invalidate_and_set_dirty(addr1, l);
2184 addr1 += l;
2185 access_len -= l;
2186 }
2187 }
2188 if (xen_enabled()) {
2189 xen_invalidate_map_cache_entry(buffer);
2190 }
2191 return;
2192 }
2193 if (is_write) {
2194 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2195 }
2196 qemu_vfree(bounce.buffer);
2197 bounce.buffer = NULL;
2198 cpu_notify_map_clients();
2199 }
2200
2201 void *cpu_physical_memory_map(hwaddr addr,
2202 hwaddr *plen,
2203 int is_write)
2204 {
2205 return address_space_map(&address_space_memory, addr, plen, is_write);
2206 }
2207
2208 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2209 int is_write, hwaddr access_len)
2210 {
2211 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2212 }
2213
2214 /* warning: addr must be aligned */
2215 static inline uint32_t ldl_phys_internal(hwaddr addr,
2216 enum device_endian endian)
2217 {
2218 uint8_t *ptr;
2219 uint64_t val;
2220 MemoryRegionSection *section;
2221 hwaddr l = 4;
2222 hwaddr addr1;
2223
2224 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2225 false);
2226 if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2227 /* I/O case */
2228 io_mem_read(section->mr, addr1, &val, 4);
2229 #if defined(TARGET_WORDS_BIGENDIAN)
2230 if (endian == DEVICE_LITTLE_ENDIAN) {
2231 val = bswap32(val);
2232 }
2233 #else
2234 if (endian == DEVICE_BIG_ENDIAN) {
2235 val = bswap32(val);
2236 }
2237 #endif
2238 } else {
2239 /* RAM case */
2240 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2241 & TARGET_PAGE_MASK)
2242 + addr1);
2243 switch (endian) {
2244 case DEVICE_LITTLE_ENDIAN:
2245 val = ldl_le_p(ptr);
2246 break;
2247 case DEVICE_BIG_ENDIAN:
2248 val = ldl_be_p(ptr);
2249 break;
2250 default:
2251 val = ldl_p(ptr);
2252 break;
2253 }
2254 }
2255 return val;
2256 }
2257
2258 uint32_t ldl_phys(hwaddr addr)
2259 {
2260 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2261 }
2262
2263 uint32_t ldl_le_phys(hwaddr addr)
2264 {
2265 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2266 }
2267
2268 uint32_t ldl_be_phys(hwaddr addr)
2269 {
2270 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2271 }
2272
2273 /* warning: addr must be aligned */
2274 static inline uint64_t ldq_phys_internal(hwaddr addr,
2275 enum device_endian endian)
2276 {
2277 uint8_t *ptr;
2278 uint64_t val;
2279 MemoryRegionSection *section;
2280 hwaddr l = 8;
2281 hwaddr addr1;
2282
2283 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2284 false);
2285 if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2286 /* I/O case */
2287 io_mem_read(section->mr, addr1, &val, 8);
2288 #if defined(TARGET_WORDS_BIGENDIAN)
2289 if (endian == DEVICE_LITTLE_ENDIAN) {
2290 val = bswap64(val);
2291 }
2292 #else
2293 if (endian == DEVICE_BIG_ENDIAN) {
2294 val = bswap64(val);
2295 }
2296 #endif
2297 } else {
2298 /* RAM case */
2299 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2300 & TARGET_PAGE_MASK)
2301 + addr1);
2302 switch (endian) {
2303 case DEVICE_LITTLE_ENDIAN:
2304 val = ldq_le_p(ptr);
2305 break;
2306 case DEVICE_BIG_ENDIAN:
2307 val = ldq_be_p(ptr);
2308 break;
2309 default:
2310 val = ldq_p(ptr);
2311 break;
2312 }
2313 }
2314 return val;
2315 }
2316
2317 uint64_t ldq_phys(hwaddr addr)
2318 {
2319 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2320 }
2321
2322 uint64_t ldq_le_phys(hwaddr addr)
2323 {
2324 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2325 }
2326
2327 uint64_t ldq_be_phys(hwaddr addr)
2328 {
2329 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2330 }
2331
2332 /* XXX: optimize */
2333 uint32_t ldub_phys(hwaddr addr)
2334 {
2335 uint8_t val;
2336 cpu_physical_memory_read(addr, &val, 1);
2337 return val;
2338 }
2339
2340 /* warning: addr must be aligned */
2341 static inline uint32_t lduw_phys_internal(hwaddr addr,
2342 enum device_endian endian)
2343 {
2344 uint8_t *ptr;
2345 uint64_t val;
2346 MemoryRegionSection *section;
2347 hwaddr l = 2;
2348 hwaddr addr1;
2349
2350 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2351 false);
2352 if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2353 /* I/O case */
2354 io_mem_read(section->mr, addr1, &val, 2);
2355 #if defined(TARGET_WORDS_BIGENDIAN)
2356 if (endian == DEVICE_LITTLE_ENDIAN) {
2357 val = bswap16(val);
2358 }
2359 #else
2360 if (endian == DEVICE_BIG_ENDIAN) {
2361 val = bswap16(val);
2362 }
2363 #endif
2364 } else {
2365 /* RAM case */
2366 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2367 & TARGET_PAGE_MASK)
2368 + addr1);
2369 switch (endian) {
2370 case DEVICE_LITTLE_ENDIAN:
2371 val = lduw_le_p(ptr);
2372 break;
2373 case DEVICE_BIG_ENDIAN:
2374 val = lduw_be_p(ptr);
2375 break;
2376 default:
2377 val = lduw_p(ptr);
2378 break;
2379 }
2380 }
2381 return val;
2382 }
2383
2384 uint32_t lduw_phys(hwaddr addr)
2385 {
2386 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2387 }
2388
2389 uint32_t lduw_le_phys(hwaddr addr)
2390 {
2391 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2392 }
2393
2394 uint32_t lduw_be_phys(hwaddr addr)
2395 {
2396 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2397 }
2398
2399 /* warning: addr must be aligned. The ram page is not masked as dirty
2400 and the code inside is not invalidated. It is useful if the dirty
2401 bits are used to track modified PTEs */
2402 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2403 {
2404 uint8_t *ptr;
2405 MemoryRegionSection *section;
2406 hwaddr l = 4;
2407 hwaddr addr1;
2408
2409 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2410 true);
2411 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2412 io_mem_write(section->mr, addr1, val, 4);
2413 } else {
2414 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2415 ptr = qemu_get_ram_ptr(addr1);
2416 stl_p(ptr, val);
2417
2418 if (unlikely(in_migration)) {
2419 if (!cpu_physical_memory_is_dirty(addr1)) {
2420 /* invalidate code */
2421 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2422 /* set dirty bit */
2423 cpu_physical_memory_set_dirty_flags(
2424 addr1, (0xff & ~CODE_DIRTY_FLAG));
2425 }
2426 }
2427 }
2428 }
2429
2430 /* warning: addr must be aligned */
2431 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2432 enum device_endian endian)
2433 {
2434 uint8_t *ptr;
2435 MemoryRegionSection *section;
2436 hwaddr l = 4;
2437 hwaddr addr1;
2438
2439 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2440 true);
2441 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2442 #if defined(TARGET_WORDS_BIGENDIAN)
2443 if (endian == DEVICE_LITTLE_ENDIAN) {
2444 val = bswap32(val);
2445 }
2446 #else
2447 if (endian == DEVICE_BIG_ENDIAN) {
2448 val = bswap32(val);
2449 }
2450 #endif
2451 io_mem_write(section->mr, addr1, val, 4);
2452 } else {
2453 /* RAM case */
2454 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2455 ptr = qemu_get_ram_ptr(addr1);
2456 switch (endian) {
2457 case DEVICE_LITTLE_ENDIAN:
2458 stl_le_p(ptr, val);
2459 break;
2460 case DEVICE_BIG_ENDIAN:
2461 stl_be_p(ptr, val);
2462 break;
2463 default:
2464 stl_p(ptr, val);
2465 break;
2466 }
2467 invalidate_and_set_dirty(addr1, 4);
2468 }
2469 }
2470
2471 void stl_phys(hwaddr addr, uint32_t val)
2472 {
2473 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2474 }
2475
2476 void stl_le_phys(hwaddr addr, uint32_t val)
2477 {
2478 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2479 }
2480
2481 void stl_be_phys(hwaddr addr, uint32_t val)
2482 {
2483 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2484 }
2485
2486 /* XXX: optimize */
2487 void stb_phys(hwaddr addr, uint32_t val)
2488 {
2489 uint8_t v = val;
2490 cpu_physical_memory_write(addr, &v, 1);
2491 }
2492
2493 /* warning: addr must be aligned */
2494 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2495 enum device_endian endian)
2496 {
2497 uint8_t *ptr;
2498 MemoryRegionSection *section;
2499 hwaddr l = 2;
2500 hwaddr addr1;
2501
2502 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2503 true);
2504 if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2505 #if defined(TARGET_WORDS_BIGENDIAN)
2506 if (endian == DEVICE_LITTLE_ENDIAN) {
2507 val = bswap16(val);
2508 }
2509 #else
2510 if (endian == DEVICE_BIG_ENDIAN) {
2511 val = bswap16(val);
2512 }
2513 #endif
2514 io_mem_write(section->mr, addr1, val, 2);
2515 } else {
2516 /* RAM case */
2517 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2518 ptr = qemu_get_ram_ptr(addr1);
2519 switch (endian) {
2520 case DEVICE_LITTLE_ENDIAN:
2521 stw_le_p(ptr, val);
2522 break;
2523 case DEVICE_BIG_ENDIAN:
2524 stw_be_p(ptr, val);
2525 break;
2526 default:
2527 stw_p(ptr, val);
2528 break;
2529 }
2530 invalidate_and_set_dirty(addr1, 2);
2531 }
2532 }
2533
2534 void stw_phys(hwaddr addr, uint32_t val)
2535 {
2536 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2537 }
2538
2539 void stw_le_phys(hwaddr addr, uint32_t val)
2540 {
2541 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2542 }
2543
2544 void stw_be_phys(hwaddr addr, uint32_t val)
2545 {
2546 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2547 }
2548
2549 /* XXX: optimize */
2550 void stq_phys(hwaddr addr, uint64_t val)
2551 {
2552 val = tswap64(val);
2553 cpu_physical_memory_write(addr, &val, 8);
2554 }
2555
2556 void stq_le_phys(hwaddr addr, uint64_t val)
2557 {
2558 val = cpu_to_le64(val);
2559 cpu_physical_memory_write(addr, &val, 8);
2560 }
2561
2562 void stq_be_phys(hwaddr addr, uint64_t val)
2563 {
2564 val = cpu_to_be64(val);
2565 cpu_physical_memory_write(addr, &val, 8);
2566 }
2567
2568 /* virtual memory access for debug (includes writing to ROM) */
2569 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2570 uint8_t *buf, int len, int is_write)
2571 {
2572 int l;
2573 hwaddr phys_addr;
2574 target_ulong page;
2575
2576 while (len > 0) {
2577 page = addr & TARGET_PAGE_MASK;
2578 phys_addr = cpu_get_phys_page_debug(env, page);
2579 /* if no physical page mapped, return an error */
2580 if (phys_addr == -1)
2581 return -1;
2582 l = (page + TARGET_PAGE_SIZE) - addr;
2583 if (l > len)
2584 l = len;
2585 phys_addr += (addr & ~TARGET_PAGE_MASK);
2586 if (is_write)
2587 cpu_physical_memory_write_rom(phys_addr, buf, l);
2588 else
2589 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2590 len -= l;
2591 buf += l;
2592 addr += l;
2593 }
2594 return 0;
2595 }
2596 #endif
2597
2598 #if !defined(CONFIG_USER_ONLY)
2599
2600 /*
2601 * A helper function for the _utterly broken_ virtio device model to find out if
2602 * it's running on a big endian machine. Don't do this at home kids!
2603 */
2604 bool virtio_is_big_endian(void);
2605 bool virtio_is_big_endian(void)
2606 {
2607 #if defined(TARGET_WORDS_BIGENDIAN)
2608 return true;
2609 #else
2610 return false;
2611 #endif
2612 }
2613
2614 #endif
2615
2616 #ifndef CONFIG_USER_ONLY
2617 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2618 {
2619 MemoryRegionSection *section;
2620 hwaddr l = 1;
2621
2622 section = address_space_translate(&address_space_memory,
2623 phys_addr, &phys_addr, &l, false);
2624
2625 return !(memory_region_is_ram(section->mr) ||
2626 memory_region_is_romd(section->mr));
2627 }
2628 #endif