]> git.proxmox.com Git - qemu.git/blob - exec.c
Merge remote-tracking branch 'quintela/migration.next' into staging
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
51
52 #include "exec/memory-internal.h"
53
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 static int in_migration;
58
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
63
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66
67 MemoryRegion io_mem_rom, io_mem_notdirty;
68 static MemoryRegion io_mem_unassigned;
69
70 #endif
71
72 CPUState *first_cpu;
73 /* current CPU in the current thread. It is only valid inside
74 cpu_exec() */
75 DEFINE_TLS(CPUState *, current_cpu);
76 /* 0 = Do not count executed instructions.
77 1 = Precise instruction counting.
78 2 = Adaptive rate instruction counting. */
79 int use_icount;
80
81 #if !defined(CONFIG_USER_ONLY)
82
83 typedef struct PhysPageEntry PhysPageEntry;
84
85 struct PhysPageEntry {
86 uint16_t is_leaf : 1;
87 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
88 uint16_t ptr : 15;
89 };
90
91 typedef PhysPageEntry Node[L2_SIZE];
92
93 struct AddressSpaceDispatch {
94 /* This is a multi-level map on the physical address space.
95 * The bottom level has pointers to MemoryRegionSections.
96 */
97 PhysPageEntry phys_map;
98 Node *nodes;
99 MemoryRegionSection *sections;
100 AddressSpace *as;
101 };
102
103 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
104 typedef struct subpage_t {
105 MemoryRegion iomem;
106 AddressSpace *as;
107 hwaddr base;
108 uint16_t sub_section[TARGET_PAGE_SIZE];
109 } subpage_t;
110
111 #define PHYS_SECTION_UNASSIGNED 0
112 #define PHYS_SECTION_NOTDIRTY 1
113 #define PHYS_SECTION_ROM 2
114 #define PHYS_SECTION_WATCH 3
115
116 typedef struct PhysPageMap {
117 unsigned sections_nb;
118 unsigned sections_nb_alloc;
119 unsigned nodes_nb;
120 unsigned nodes_nb_alloc;
121 Node *nodes;
122 MemoryRegionSection *sections;
123 } PhysPageMap;
124
125 static PhysPageMap *prev_map;
126 static PhysPageMap next_map;
127
128 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
129
130 static void io_mem_init(void);
131 static void memory_map_init(void);
132 static void *qemu_safe_ram_ptr(ram_addr_t addr);
133
134 static MemoryRegion io_mem_watch;
135 #endif
136
137 #if !defined(CONFIG_USER_ONLY)
138
139 static void phys_map_node_reserve(unsigned nodes)
140 {
141 if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
142 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
143 16);
144 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
145 next_map.nodes_nb + nodes);
146 next_map.nodes = g_renew(Node, next_map.nodes,
147 next_map.nodes_nb_alloc);
148 }
149 }
150
151 static uint16_t phys_map_node_alloc(void)
152 {
153 unsigned i;
154 uint16_t ret;
155
156 ret = next_map.nodes_nb++;
157 assert(ret != PHYS_MAP_NODE_NIL);
158 assert(ret != next_map.nodes_nb_alloc);
159 for (i = 0; i < L2_SIZE; ++i) {
160 next_map.nodes[ret][i].is_leaf = 0;
161 next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
162 }
163 return ret;
164 }
165
166 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
167 hwaddr *nb, uint16_t leaf,
168 int level)
169 {
170 PhysPageEntry *p;
171 int i;
172 hwaddr step = (hwaddr)1 << (level * L2_BITS);
173
174 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
175 lp->ptr = phys_map_node_alloc();
176 p = next_map.nodes[lp->ptr];
177 if (level == 0) {
178 for (i = 0; i < L2_SIZE; i++) {
179 p[i].is_leaf = 1;
180 p[i].ptr = PHYS_SECTION_UNASSIGNED;
181 }
182 }
183 } else {
184 p = next_map.nodes[lp->ptr];
185 }
186 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
187
188 while (*nb && lp < &p[L2_SIZE]) {
189 if ((*index & (step - 1)) == 0 && *nb >= step) {
190 lp->is_leaf = true;
191 lp->ptr = leaf;
192 *index += step;
193 *nb -= step;
194 } else {
195 phys_page_set_level(lp, index, nb, leaf, level - 1);
196 }
197 ++lp;
198 }
199 }
200
201 static void phys_page_set(AddressSpaceDispatch *d,
202 hwaddr index, hwaddr nb,
203 uint16_t leaf)
204 {
205 /* Wildly overreserve - it doesn't matter much. */
206 phys_map_node_reserve(3 * P_L2_LEVELS);
207
208 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
209 }
210
211 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
212 Node *nodes, MemoryRegionSection *sections)
213 {
214 PhysPageEntry *p;
215 int i;
216
217 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
218 if (lp.ptr == PHYS_MAP_NODE_NIL) {
219 return &sections[PHYS_SECTION_UNASSIGNED];
220 }
221 p = nodes[lp.ptr];
222 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
223 }
224 return &sections[lp.ptr];
225 }
226
227 bool memory_region_is_unassigned(MemoryRegion *mr)
228 {
229 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
230 && mr != &io_mem_watch;
231 }
232
233 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
234 hwaddr addr,
235 bool resolve_subpage)
236 {
237 MemoryRegionSection *section;
238 subpage_t *subpage;
239
240 section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
241 d->nodes, d->sections);
242 if (resolve_subpage && section->mr->subpage) {
243 subpage = container_of(section->mr, subpage_t, iomem);
244 section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
245 }
246 return section;
247 }
248
249 static MemoryRegionSection *
250 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
251 hwaddr *plen, bool resolve_subpage)
252 {
253 MemoryRegionSection *section;
254 Int128 diff;
255
256 section = address_space_lookup_region(d, addr, resolve_subpage);
257 /* Compute offset within MemoryRegionSection */
258 addr -= section->offset_within_address_space;
259
260 /* Compute offset within MemoryRegion */
261 *xlat = addr + section->offset_within_region;
262
263 diff = int128_sub(section->mr->size, int128_make64(addr));
264 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
265 return section;
266 }
267
268 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
269 hwaddr *xlat, hwaddr *plen,
270 bool is_write)
271 {
272 IOMMUTLBEntry iotlb;
273 MemoryRegionSection *section;
274 MemoryRegion *mr;
275 hwaddr len = *plen;
276
277 for (;;) {
278 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
279 mr = section->mr;
280
281 if (!mr->iommu_ops) {
282 break;
283 }
284
285 iotlb = mr->iommu_ops->translate(mr, addr);
286 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
287 | (addr & iotlb.addr_mask));
288 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
289 if (!(iotlb.perm & (1 << is_write))) {
290 mr = &io_mem_unassigned;
291 break;
292 }
293
294 as = iotlb.target_as;
295 }
296
297 *plen = len;
298 *xlat = addr;
299 return mr;
300 }
301
302 MemoryRegionSection *
303 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
304 hwaddr *plen)
305 {
306 MemoryRegionSection *section;
307 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
308
309 assert(!section->mr->iommu_ops);
310 return section;
311 }
312 #endif
313
314 void cpu_exec_init_all(void)
315 {
316 #if !defined(CONFIG_USER_ONLY)
317 qemu_mutex_init(&ram_list.mutex);
318 memory_map_init();
319 io_mem_init();
320 #endif
321 }
322
323 #if !defined(CONFIG_USER_ONLY)
324
325 static int cpu_common_post_load(void *opaque, int version_id)
326 {
327 CPUState *cpu = opaque;
328
329 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
330 version_id is increased. */
331 cpu->interrupt_request &= ~0x01;
332 tlb_flush(cpu->env_ptr, 1);
333
334 return 0;
335 }
336
337 const VMStateDescription vmstate_cpu_common = {
338 .name = "cpu_common",
339 .version_id = 1,
340 .minimum_version_id = 1,
341 .minimum_version_id_old = 1,
342 .post_load = cpu_common_post_load,
343 .fields = (VMStateField []) {
344 VMSTATE_UINT32(halted, CPUState),
345 VMSTATE_UINT32(interrupt_request, CPUState),
346 VMSTATE_END_OF_LIST()
347 }
348 };
349
350 #endif
351
352 CPUState *qemu_get_cpu(int index)
353 {
354 CPUState *cpu = first_cpu;
355
356 while (cpu) {
357 if (cpu->cpu_index == index) {
358 break;
359 }
360 cpu = cpu->next_cpu;
361 }
362
363 return cpu;
364 }
365
366 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
367 {
368 CPUState *cpu;
369
370 cpu = first_cpu;
371 while (cpu) {
372 func(cpu, data);
373 cpu = cpu->next_cpu;
374 }
375 }
376
377 void cpu_exec_init(CPUArchState *env)
378 {
379 CPUState *cpu = ENV_GET_CPU(env);
380 CPUClass *cc = CPU_GET_CLASS(cpu);
381 CPUState **pcpu;
382 int cpu_index;
383
384 #if defined(CONFIG_USER_ONLY)
385 cpu_list_lock();
386 #endif
387 cpu->next_cpu = NULL;
388 pcpu = &first_cpu;
389 cpu_index = 0;
390 while (*pcpu != NULL) {
391 pcpu = &(*pcpu)->next_cpu;
392 cpu_index++;
393 }
394 cpu->cpu_index = cpu_index;
395 cpu->numa_node = 0;
396 QTAILQ_INIT(&env->breakpoints);
397 QTAILQ_INIT(&env->watchpoints);
398 #ifndef CONFIG_USER_ONLY
399 cpu->thread_id = qemu_get_thread_id();
400 #endif
401 *pcpu = cpu;
402 #if defined(CONFIG_USER_ONLY)
403 cpu_list_unlock();
404 #endif
405 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
406 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
407 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
408 cpu_save, cpu_load, env);
409 assert(cc->vmsd == NULL);
410 #endif
411 if (cc->vmsd != NULL) {
412 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
413 }
414 }
415
416 #if defined(TARGET_HAS_ICE)
417 #if defined(CONFIG_USER_ONLY)
418 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
419 {
420 tb_invalidate_phys_page_range(pc, pc + 1, 0);
421 }
422 #else
423 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
424 {
425 tb_invalidate_phys_addr(cpu_get_phys_page_debug(cpu, pc) |
426 (pc & ~TARGET_PAGE_MASK));
427 }
428 #endif
429 #endif /* TARGET_HAS_ICE */
430
431 #if defined(CONFIG_USER_ONLY)
432 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
433
434 {
435 }
436
437 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
438 int flags, CPUWatchpoint **watchpoint)
439 {
440 return -ENOSYS;
441 }
442 #else
443 /* Add a watchpoint. */
444 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
445 int flags, CPUWatchpoint **watchpoint)
446 {
447 target_ulong len_mask = ~(len - 1);
448 CPUWatchpoint *wp;
449
450 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
451 if ((len & (len - 1)) || (addr & ~len_mask) ||
452 len == 0 || len > TARGET_PAGE_SIZE) {
453 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
454 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
455 return -EINVAL;
456 }
457 wp = g_malloc(sizeof(*wp));
458
459 wp->vaddr = addr;
460 wp->len_mask = len_mask;
461 wp->flags = flags;
462
463 /* keep all GDB-injected watchpoints in front */
464 if (flags & BP_GDB)
465 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
466 else
467 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
468
469 tlb_flush_page(env, addr);
470
471 if (watchpoint)
472 *watchpoint = wp;
473 return 0;
474 }
475
476 /* Remove a specific watchpoint. */
477 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
478 int flags)
479 {
480 target_ulong len_mask = ~(len - 1);
481 CPUWatchpoint *wp;
482
483 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
484 if (addr == wp->vaddr && len_mask == wp->len_mask
485 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
486 cpu_watchpoint_remove_by_ref(env, wp);
487 return 0;
488 }
489 }
490 return -ENOENT;
491 }
492
493 /* Remove a specific watchpoint by reference. */
494 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
495 {
496 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
497
498 tlb_flush_page(env, watchpoint->vaddr);
499
500 g_free(watchpoint);
501 }
502
503 /* Remove all matching watchpoints. */
504 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
505 {
506 CPUWatchpoint *wp, *next;
507
508 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
509 if (wp->flags & mask)
510 cpu_watchpoint_remove_by_ref(env, wp);
511 }
512 }
513 #endif
514
515 /* Add a breakpoint. */
516 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
517 CPUBreakpoint **breakpoint)
518 {
519 #if defined(TARGET_HAS_ICE)
520 CPUBreakpoint *bp;
521
522 bp = g_malloc(sizeof(*bp));
523
524 bp->pc = pc;
525 bp->flags = flags;
526
527 /* keep all GDB-injected breakpoints in front */
528 if (flags & BP_GDB) {
529 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
530 } else {
531 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
532 }
533
534 breakpoint_invalidate(ENV_GET_CPU(env), pc);
535
536 if (breakpoint) {
537 *breakpoint = bp;
538 }
539 return 0;
540 #else
541 return -ENOSYS;
542 #endif
543 }
544
545 /* Remove a specific breakpoint. */
546 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
547 {
548 #if defined(TARGET_HAS_ICE)
549 CPUBreakpoint *bp;
550
551 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
552 if (bp->pc == pc && bp->flags == flags) {
553 cpu_breakpoint_remove_by_ref(env, bp);
554 return 0;
555 }
556 }
557 return -ENOENT;
558 #else
559 return -ENOSYS;
560 #endif
561 }
562
563 /* Remove a specific breakpoint by reference. */
564 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
565 {
566 #if defined(TARGET_HAS_ICE)
567 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
568
569 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
570
571 g_free(breakpoint);
572 #endif
573 }
574
575 /* Remove all matching breakpoints. */
576 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
577 {
578 #if defined(TARGET_HAS_ICE)
579 CPUBreakpoint *bp, *next;
580
581 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
582 if (bp->flags & mask)
583 cpu_breakpoint_remove_by_ref(env, bp);
584 }
585 #endif
586 }
587
588 /* enable or disable single step mode. EXCP_DEBUG is returned by the
589 CPU loop after each instruction */
590 void cpu_single_step(CPUState *cpu, int enabled)
591 {
592 #if defined(TARGET_HAS_ICE)
593 CPUArchState *env = cpu->env_ptr;
594
595 if (cpu->singlestep_enabled != enabled) {
596 cpu->singlestep_enabled = enabled;
597 if (kvm_enabled()) {
598 kvm_update_guest_debug(env, 0);
599 } else {
600 /* must flush all the translated code to avoid inconsistencies */
601 /* XXX: only flush what is necessary */
602 tb_flush(env);
603 }
604 }
605 #endif
606 }
607
608 void cpu_abort(CPUArchState *env, const char *fmt, ...)
609 {
610 CPUState *cpu = ENV_GET_CPU(env);
611 va_list ap;
612 va_list ap2;
613
614 va_start(ap, fmt);
615 va_copy(ap2, ap);
616 fprintf(stderr, "qemu: fatal: ");
617 vfprintf(stderr, fmt, ap);
618 fprintf(stderr, "\n");
619 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
620 if (qemu_log_enabled()) {
621 qemu_log("qemu: fatal: ");
622 qemu_log_vprintf(fmt, ap2);
623 qemu_log("\n");
624 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
625 qemu_log_flush();
626 qemu_log_close();
627 }
628 va_end(ap2);
629 va_end(ap);
630 #if defined(CONFIG_USER_ONLY)
631 {
632 struct sigaction act;
633 sigfillset(&act.sa_mask);
634 act.sa_handler = SIG_DFL;
635 sigaction(SIGABRT, &act, NULL);
636 }
637 #endif
638 abort();
639 }
640
641 CPUArchState *cpu_copy(CPUArchState *env)
642 {
643 CPUArchState *new_env = cpu_init(env->cpu_model_str);
644 #if defined(TARGET_HAS_ICE)
645 CPUBreakpoint *bp;
646 CPUWatchpoint *wp;
647 #endif
648
649 memcpy(new_env, env, sizeof(CPUArchState));
650
651 /* Clone all break/watchpoints.
652 Note: Once we support ptrace with hw-debug register access, make sure
653 BP_CPU break/watchpoints are handled correctly on clone. */
654 QTAILQ_INIT(&env->breakpoints);
655 QTAILQ_INIT(&env->watchpoints);
656 #if defined(TARGET_HAS_ICE)
657 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
658 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
659 }
660 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
661 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
662 wp->flags, NULL);
663 }
664 #endif
665
666 return new_env;
667 }
668
669 #if !defined(CONFIG_USER_ONLY)
670 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
671 uintptr_t length)
672 {
673 uintptr_t start1;
674
675 /* we modify the TLB cache so that the dirty bit will be set again
676 when accessing the range */
677 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
678 /* Check that we don't span multiple blocks - this breaks the
679 address comparisons below. */
680 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
681 != (end - 1) - start) {
682 abort();
683 }
684 cpu_tlb_reset_dirty_all(start1, length);
685
686 }
687
688 /* Note: start and end must be within the same ram block. */
689 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
690 int dirty_flags)
691 {
692 uintptr_t length;
693
694 start &= TARGET_PAGE_MASK;
695 end = TARGET_PAGE_ALIGN(end);
696
697 length = end - start;
698 if (length == 0)
699 return;
700 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
701
702 if (tcg_enabled()) {
703 tlb_reset_dirty_range_all(start, end, length);
704 }
705 }
706
707 static int cpu_physical_memory_set_dirty_tracking(int enable)
708 {
709 int ret = 0;
710 in_migration = enable;
711 return ret;
712 }
713
714 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
715 MemoryRegionSection *section,
716 target_ulong vaddr,
717 hwaddr paddr, hwaddr xlat,
718 int prot,
719 target_ulong *address)
720 {
721 hwaddr iotlb;
722 CPUWatchpoint *wp;
723
724 if (memory_region_is_ram(section->mr)) {
725 /* Normal RAM. */
726 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
727 + xlat;
728 if (!section->readonly) {
729 iotlb |= PHYS_SECTION_NOTDIRTY;
730 } else {
731 iotlb |= PHYS_SECTION_ROM;
732 }
733 } else {
734 iotlb = section - address_space_memory.dispatch->sections;
735 iotlb += xlat;
736 }
737
738 /* Make accesses to pages with watchpoints go via the
739 watchpoint trap routines. */
740 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
741 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
742 /* Avoid trapping reads of pages with a write breakpoint. */
743 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
744 iotlb = PHYS_SECTION_WATCH + paddr;
745 *address |= TLB_MMIO;
746 break;
747 }
748 }
749 }
750
751 return iotlb;
752 }
753 #endif /* defined(CONFIG_USER_ONLY) */
754
755 #if !defined(CONFIG_USER_ONLY)
756
757 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
758 uint16_t section);
759 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
760
761 static uint16_t phys_section_add(MemoryRegionSection *section)
762 {
763 /* The physical section number is ORed with a page-aligned
764 * pointer to produce the iotlb entries. Thus it should
765 * never overflow into the page-aligned value.
766 */
767 assert(next_map.sections_nb < TARGET_PAGE_SIZE);
768
769 if (next_map.sections_nb == next_map.sections_nb_alloc) {
770 next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
771 16);
772 next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
773 next_map.sections_nb_alloc);
774 }
775 next_map.sections[next_map.sections_nb] = *section;
776 memory_region_ref(section->mr);
777 return next_map.sections_nb++;
778 }
779
780 static void phys_section_destroy(MemoryRegion *mr)
781 {
782 memory_region_unref(mr);
783
784 if (mr->subpage) {
785 subpage_t *subpage = container_of(mr, subpage_t, iomem);
786 memory_region_destroy(&subpage->iomem);
787 g_free(subpage);
788 }
789 }
790
791 static void phys_sections_free(PhysPageMap *map)
792 {
793 while (map->sections_nb > 0) {
794 MemoryRegionSection *section = &map->sections[--map->sections_nb];
795 phys_section_destroy(section->mr);
796 }
797 g_free(map->sections);
798 g_free(map->nodes);
799 g_free(map);
800 }
801
802 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
803 {
804 subpage_t *subpage;
805 hwaddr base = section->offset_within_address_space
806 & TARGET_PAGE_MASK;
807 MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
808 next_map.nodes, next_map.sections);
809 MemoryRegionSection subsection = {
810 .offset_within_address_space = base,
811 .size = int128_make64(TARGET_PAGE_SIZE),
812 };
813 hwaddr start, end;
814
815 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
816
817 if (!(existing->mr->subpage)) {
818 subpage = subpage_init(d->as, base);
819 subsection.mr = &subpage->iomem;
820 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
821 phys_section_add(&subsection));
822 } else {
823 subpage = container_of(existing->mr, subpage_t, iomem);
824 }
825 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
826 end = start + int128_get64(section->size) - 1;
827 subpage_register(subpage, start, end, phys_section_add(section));
828 }
829
830
831 static void register_multipage(AddressSpaceDispatch *d,
832 MemoryRegionSection *section)
833 {
834 hwaddr start_addr = section->offset_within_address_space;
835 uint16_t section_index = phys_section_add(section);
836 uint64_t num_pages = int128_get64(int128_rshift(section->size,
837 TARGET_PAGE_BITS));
838
839 assert(num_pages);
840 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
841 }
842
843 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
844 {
845 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
846 AddressSpaceDispatch *d = as->next_dispatch;
847 MemoryRegionSection now = *section, remain = *section;
848 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
849
850 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
851 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
852 - now.offset_within_address_space;
853
854 now.size = int128_min(int128_make64(left), now.size);
855 register_subpage(d, &now);
856 } else {
857 now.size = int128_zero();
858 }
859 while (int128_ne(remain.size, now.size)) {
860 remain.size = int128_sub(remain.size, now.size);
861 remain.offset_within_address_space += int128_get64(now.size);
862 remain.offset_within_region += int128_get64(now.size);
863 now = remain;
864 if (int128_lt(remain.size, page_size)) {
865 register_subpage(d, &now);
866 } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
867 now.size = page_size;
868 register_subpage(d, &now);
869 } else {
870 now.size = int128_and(now.size, int128_neg(page_size));
871 register_multipage(d, &now);
872 }
873 }
874 }
875
876 void qemu_flush_coalesced_mmio_buffer(void)
877 {
878 if (kvm_enabled())
879 kvm_flush_coalesced_mmio_buffer();
880 }
881
882 void qemu_mutex_lock_ramlist(void)
883 {
884 qemu_mutex_lock(&ram_list.mutex);
885 }
886
887 void qemu_mutex_unlock_ramlist(void)
888 {
889 qemu_mutex_unlock(&ram_list.mutex);
890 }
891
892 #if defined(__linux__) && !defined(TARGET_S390X)
893
894 #include <sys/vfs.h>
895
896 #define HUGETLBFS_MAGIC 0x958458f6
897
898 static long gethugepagesize(const char *path)
899 {
900 struct statfs fs;
901 int ret;
902
903 do {
904 ret = statfs(path, &fs);
905 } while (ret != 0 && errno == EINTR);
906
907 if (ret != 0) {
908 perror(path);
909 return 0;
910 }
911
912 if (fs.f_type != HUGETLBFS_MAGIC)
913 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
914
915 return fs.f_bsize;
916 }
917
918 static void *file_ram_alloc(RAMBlock *block,
919 ram_addr_t memory,
920 const char *path)
921 {
922 char *filename;
923 char *sanitized_name;
924 char *c;
925 void *area;
926 int fd;
927 #ifdef MAP_POPULATE
928 int flags;
929 #endif
930 unsigned long hpagesize;
931
932 hpagesize = gethugepagesize(path);
933 if (!hpagesize) {
934 return NULL;
935 }
936
937 if (memory < hpagesize) {
938 return NULL;
939 }
940
941 if (kvm_enabled() && !kvm_has_sync_mmu()) {
942 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
943 return NULL;
944 }
945
946 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
947 sanitized_name = g_strdup(block->mr->name);
948 for (c = sanitized_name; *c != '\0'; c++) {
949 if (*c == '/')
950 *c = '_';
951 }
952
953 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
954 sanitized_name);
955 g_free(sanitized_name);
956
957 fd = mkstemp(filename);
958 if (fd < 0) {
959 perror("unable to create backing store for hugepages");
960 g_free(filename);
961 return NULL;
962 }
963 unlink(filename);
964 g_free(filename);
965
966 memory = (memory+hpagesize-1) & ~(hpagesize-1);
967
968 /*
969 * ftruncate is not supported by hugetlbfs in older
970 * hosts, so don't bother bailing out on errors.
971 * If anything goes wrong with it under other filesystems,
972 * mmap will fail.
973 */
974 if (ftruncate(fd, memory))
975 perror("ftruncate");
976
977 #ifdef MAP_POPULATE
978 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
979 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
980 * to sidestep this quirk.
981 */
982 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
983 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
984 #else
985 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
986 #endif
987 if (area == MAP_FAILED) {
988 perror("file_ram_alloc: can't mmap RAM pages");
989 close(fd);
990 return (NULL);
991 }
992 block->fd = fd;
993 return area;
994 }
995 #endif
996
997 static ram_addr_t find_ram_offset(ram_addr_t size)
998 {
999 RAMBlock *block, *next_block;
1000 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1001
1002 assert(size != 0); /* it would hand out same offset multiple times */
1003
1004 if (QTAILQ_EMPTY(&ram_list.blocks))
1005 return 0;
1006
1007 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1008 ram_addr_t end, next = RAM_ADDR_MAX;
1009
1010 end = block->offset + block->length;
1011
1012 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1013 if (next_block->offset >= end) {
1014 next = MIN(next, next_block->offset);
1015 }
1016 }
1017 if (next - end >= size && next - end < mingap) {
1018 offset = end;
1019 mingap = next - end;
1020 }
1021 }
1022
1023 if (offset == RAM_ADDR_MAX) {
1024 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1025 (uint64_t)size);
1026 abort();
1027 }
1028
1029 return offset;
1030 }
1031
1032 ram_addr_t last_ram_offset(void)
1033 {
1034 RAMBlock *block;
1035 ram_addr_t last = 0;
1036
1037 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1038 last = MAX(last, block->offset + block->length);
1039
1040 return last;
1041 }
1042
1043 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1044 {
1045 int ret;
1046
1047 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1048 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1049 "dump-guest-core", true)) {
1050 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1051 if (ret) {
1052 perror("qemu_madvise");
1053 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1054 "but dump_guest_core=off specified\n");
1055 }
1056 }
1057 }
1058
1059 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1060 {
1061 RAMBlock *new_block, *block;
1062
1063 new_block = NULL;
1064 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1065 if (block->offset == addr) {
1066 new_block = block;
1067 break;
1068 }
1069 }
1070 assert(new_block);
1071 assert(!new_block->idstr[0]);
1072
1073 if (dev) {
1074 char *id = qdev_get_dev_path(dev);
1075 if (id) {
1076 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1077 g_free(id);
1078 }
1079 }
1080 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1081
1082 /* This assumes the iothread lock is taken here too. */
1083 qemu_mutex_lock_ramlist();
1084 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1085 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1086 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1087 new_block->idstr);
1088 abort();
1089 }
1090 }
1091 qemu_mutex_unlock_ramlist();
1092 }
1093
1094 static int memory_try_enable_merging(void *addr, size_t len)
1095 {
1096 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1097 /* disabled by the user */
1098 return 0;
1099 }
1100
1101 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1102 }
1103
1104 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1105 MemoryRegion *mr)
1106 {
1107 RAMBlock *block, *new_block;
1108
1109 size = TARGET_PAGE_ALIGN(size);
1110 new_block = g_malloc0(sizeof(*new_block));
1111
1112 /* This assumes the iothread lock is taken here too. */
1113 qemu_mutex_lock_ramlist();
1114 new_block->mr = mr;
1115 new_block->offset = find_ram_offset(size);
1116 if (host) {
1117 new_block->host = host;
1118 new_block->flags |= RAM_PREALLOC_MASK;
1119 } else {
1120 if (mem_path) {
1121 #if defined (__linux__) && !defined(TARGET_S390X)
1122 new_block->host = file_ram_alloc(new_block, size, mem_path);
1123 if (!new_block->host) {
1124 new_block->host = qemu_anon_ram_alloc(size);
1125 memory_try_enable_merging(new_block->host, size);
1126 }
1127 #else
1128 fprintf(stderr, "-mem-path option unsupported\n");
1129 exit(1);
1130 #endif
1131 } else {
1132 if (xen_enabled()) {
1133 xen_ram_alloc(new_block->offset, size, mr);
1134 } else if (kvm_enabled()) {
1135 /* some s390/kvm configurations have special constraints */
1136 new_block->host = kvm_ram_alloc(size);
1137 } else {
1138 new_block->host = qemu_anon_ram_alloc(size);
1139 }
1140 memory_try_enable_merging(new_block->host, size);
1141 }
1142 }
1143 new_block->length = size;
1144
1145 /* Keep the list sorted from biggest to smallest block. */
1146 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1147 if (block->length < new_block->length) {
1148 break;
1149 }
1150 }
1151 if (block) {
1152 QTAILQ_INSERT_BEFORE(block, new_block, next);
1153 } else {
1154 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1155 }
1156 ram_list.mru_block = NULL;
1157
1158 ram_list.version++;
1159 qemu_mutex_unlock_ramlist();
1160
1161 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1162 last_ram_offset() >> TARGET_PAGE_BITS);
1163 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1164 0, size >> TARGET_PAGE_BITS);
1165 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1166
1167 qemu_ram_setup_dump(new_block->host, size);
1168 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1169
1170 if (kvm_enabled())
1171 kvm_setup_guest_memory(new_block->host, size);
1172
1173 return new_block->offset;
1174 }
1175
1176 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1177 {
1178 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1179 }
1180
1181 void qemu_ram_free_from_ptr(ram_addr_t addr)
1182 {
1183 RAMBlock *block;
1184
1185 /* This assumes the iothread lock is taken here too. */
1186 qemu_mutex_lock_ramlist();
1187 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1188 if (addr == block->offset) {
1189 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1190 ram_list.mru_block = NULL;
1191 ram_list.version++;
1192 g_free(block);
1193 break;
1194 }
1195 }
1196 qemu_mutex_unlock_ramlist();
1197 }
1198
1199 void qemu_ram_free(ram_addr_t addr)
1200 {
1201 RAMBlock *block;
1202
1203 /* This assumes the iothread lock is taken here too. */
1204 qemu_mutex_lock_ramlist();
1205 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1206 if (addr == block->offset) {
1207 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1208 ram_list.mru_block = NULL;
1209 ram_list.version++;
1210 if (block->flags & RAM_PREALLOC_MASK) {
1211 ;
1212 } else if (mem_path) {
1213 #if defined (__linux__) && !defined(TARGET_S390X)
1214 if (block->fd) {
1215 munmap(block->host, block->length);
1216 close(block->fd);
1217 } else {
1218 qemu_anon_ram_free(block->host, block->length);
1219 }
1220 #else
1221 abort();
1222 #endif
1223 } else {
1224 if (xen_enabled()) {
1225 xen_invalidate_map_cache_entry(block->host);
1226 } else {
1227 qemu_anon_ram_free(block->host, block->length);
1228 }
1229 }
1230 g_free(block);
1231 break;
1232 }
1233 }
1234 qemu_mutex_unlock_ramlist();
1235
1236 }
1237
1238 #ifndef _WIN32
1239 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1240 {
1241 RAMBlock *block;
1242 ram_addr_t offset;
1243 int flags;
1244 void *area, *vaddr;
1245
1246 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1247 offset = addr - block->offset;
1248 if (offset < block->length) {
1249 vaddr = block->host + offset;
1250 if (block->flags & RAM_PREALLOC_MASK) {
1251 ;
1252 } else {
1253 flags = MAP_FIXED;
1254 munmap(vaddr, length);
1255 if (mem_path) {
1256 #if defined(__linux__) && !defined(TARGET_S390X)
1257 if (block->fd) {
1258 #ifdef MAP_POPULATE
1259 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1260 MAP_PRIVATE;
1261 #else
1262 flags |= MAP_PRIVATE;
1263 #endif
1264 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1265 flags, block->fd, offset);
1266 } else {
1267 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1268 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1269 flags, -1, 0);
1270 }
1271 #else
1272 abort();
1273 #endif
1274 } else {
1275 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1276 flags |= MAP_SHARED | MAP_ANONYMOUS;
1277 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1278 flags, -1, 0);
1279 #else
1280 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1281 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1282 flags, -1, 0);
1283 #endif
1284 }
1285 if (area != vaddr) {
1286 fprintf(stderr, "Could not remap addr: "
1287 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1288 length, addr);
1289 exit(1);
1290 }
1291 memory_try_enable_merging(vaddr, length);
1292 qemu_ram_setup_dump(vaddr, length);
1293 }
1294 return;
1295 }
1296 }
1297 }
1298 #endif /* !_WIN32 */
1299
1300 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
1301 {
1302 RAMBlock *block;
1303
1304 /* The list is protected by the iothread lock here. */
1305 block = ram_list.mru_block;
1306 if (block && addr - block->offset < block->length) {
1307 goto found;
1308 }
1309 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1310 if (addr - block->offset < block->length) {
1311 goto found;
1312 }
1313 }
1314
1315 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1316 abort();
1317
1318 found:
1319 ram_list.mru_block = block;
1320 return block;
1321 }
1322
1323 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1324 With the exception of the softmmu code in this file, this should
1325 only be used for local memory (e.g. video ram) that the device owns,
1326 and knows it isn't going to access beyond the end of the block.
1327
1328 It should not be used for general purpose DMA.
1329 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1330 */
1331 void *qemu_get_ram_ptr(ram_addr_t addr)
1332 {
1333 RAMBlock *block = qemu_get_ram_block(addr);
1334
1335 if (xen_enabled()) {
1336 /* We need to check if the requested address is in the RAM
1337 * because we don't want to map the entire memory in QEMU.
1338 * In that case just map until the end of the page.
1339 */
1340 if (block->offset == 0) {
1341 return xen_map_cache(addr, 0, 0);
1342 } else if (block->host == NULL) {
1343 block->host =
1344 xen_map_cache(block->offset, block->length, 1);
1345 }
1346 }
1347 return block->host + (addr - block->offset);
1348 }
1349
1350 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1351 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1352 *
1353 * ??? Is this still necessary?
1354 */
1355 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1356 {
1357 RAMBlock *block;
1358
1359 /* The list is protected by the iothread lock here. */
1360 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1361 if (addr - block->offset < block->length) {
1362 if (xen_enabled()) {
1363 /* We need to check if the requested address is in the RAM
1364 * because we don't want to map the entire memory in QEMU.
1365 * In that case just map until the end of the page.
1366 */
1367 if (block->offset == 0) {
1368 return xen_map_cache(addr, 0, 0);
1369 } else if (block->host == NULL) {
1370 block->host =
1371 xen_map_cache(block->offset, block->length, 1);
1372 }
1373 }
1374 return block->host + (addr - block->offset);
1375 }
1376 }
1377
1378 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1379 abort();
1380
1381 return NULL;
1382 }
1383
1384 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1385 * but takes a size argument */
1386 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1387 {
1388 if (*size == 0) {
1389 return NULL;
1390 }
1391 if (xen_enabled()) {
1392 return xen_map_cache(addr, *size, 1);
1393 } else {
1394 RAMBlock *block;
1395
1396 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1397 if (addr - block->offset < block->length) {
1398 if (addr - block->offset + *size > block->length)
1399 *size = block->length - addr + block->offset;
1400 return block->host + (addr - block->offset);
1401 }
1402 }
1403
1404 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1405 abort();
1406 }
1407 }
1408
1409 /* Some of the softmmu routines need to translate from a host pointer
1410 (typically a TLB entry) back to a ram offset. */
1411 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1412 {
1413 RAMBlock *block;
1414 uint8_t *host = ptr;
1415
1416 if (xen_enabled()) {
1417 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1418 return qemu_get_ram_block(*ram_addr)->mr;
1419 }
1420
1421 block = ram_list.mru_block;
1422 if (block && block->host && host - block->host < block->length) {
1423 goto found;
1424 }
1425
1426 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1427 /* This case append when the block is not mapped. */
1428 if (block->host == NULL) {
1429 continue;
1430 }
1431 if (host - block->host < block->length) {
1432 goto found;
1433 }
1434 }
1435
1436 return NULL;
1437
1438 found:
1439 *ram_addr = block->offset + (host - block->host);
1440 return block->mr;
1441 }
1442
1443 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1444 uint64_t val, unsigned size)
1445 {
1446 int dirty_flags;
1447 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1448 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1449 tb_invalidate_phys_page_fast(ram_addr, size);
1450 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1451 }
1452 switch (size) {
1453 case 1:
1454 stb_p(qemu_get_ram_ptr(ram_addr), val);
1455 break;
1456 case 2:
1457 stw_p(qemu_get_ram_ptr(ram_addr), val);
1458 break;
1459 case 4:
1460 stl_p(qemu_get_ram_ptr(ram_addr), val);
1461 break;
1462 default:
1463 abort();
1464 }
1465 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1466 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1467 /* we remove the notdirty callback only if the code has been
1468 flushed */
1469 if (dirty_flags == 0xff) {
1470 CPUArchState *env = current_cpu->env_ptr;
1471 tlb_set_dirty(env, env->mem_io_vaddr);
1472 }
1473 }
1474
1475 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1476 unsigned size, bool is_write)
1477 {
1478 return is_write;
1479 }
1480
1481 static const MemoryRegionOps notdirty_mem_ops = {
1482 .write = notdirty_mem_write,
1483 .valid.accepts = notdirty_mem_accepts,
1484 .endianness = DEVICE_NATIVE_ENDIAN,
1485 };
1486
1487 /* Generate a debug exception if a watchpoint has been hit. */
1488 static void check_watchpoint(int offset, int len_mask, int flags)
1489 {
1490 CPUArchState *env = current_cpu->env_ptr;
1491 target_ulong pc, cs_base;
1492 target_ulong vaddr;
1493 CPUWatchpoint *wp;
1494 int cpu_flags;
1495
1496 if (env->watchpoint_hit) {
1497 /* We re-entered the check after replacing the TB. Now raise
1498 * the debug interrupt so that is will trigger after the
1499 * current instruction. */
1500 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1501 return;
1502 }
1503 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1504 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1505 if ((vaddr == (wp->vaddr & len_mask) ||
1506 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1507 wp->flags |= BP_WATCHPOINT_HIT;
1508 if (!env->watchpoint_hit) {
1509 env->watchpoint_hit = wp;
1510 tb_check_watchpoint(env);
1511 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1512 env->exception_index = EXCP_DEBUG;
1513 cpu_loop_exit(env);
1514 } else {
1515 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1516 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1517 cpu_resume_from_signal(env, NULL);
1518 }
1519 }
1520 } else {
1521 wp->flags &= ~BP_WATCHPOINT_HIT;
1522 }
1523 }
1524 }
1525
1526 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1527 so these check for a hit then pass through to the normal out-of-line
1528 phys routines. */
1529 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1530 unsigned size)
1531 {
1532 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1533 switch (size) {
1534 case 1: return ldub_phys(addr);
1535 case 2: return lduw_phys(addr);
1536 case 4: return ldl_phys(addr);
1537 default: abort();
1538 }
1539 }
1540
1541 static void watch_mem_write(void *opaque, hwaddr addr,
1542 uint64_t val, unsigned size)
1543 {
1544 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1545 switch (size) {
1546 case 1:
1547 stb_phys(addr, val);
1548 break;
1549 case 2:
1550 stw_phys(addr, val);
1551 break;
1552 case 4:
1553 stl_phys(addr, val);
1554 break;
1555 default: abort();
1556 }
1557 }
1558
1559 static const MemoryRegionOps watch_mem_ops = {
1560 .read = watch_mem_read,
1561 .write = watch_mem_write,
1562 .endianness = DEVICE_NATIVE_ENDIAN,
1563 };
1564
1565 static uint64_t subpage_read(void *opaque, hwaddr addr,
1566 unsigned len)
1567 {
1568 subpage_t *subpage = opaque;
1569 uint8_t buf[4];
1570
1571 #if defined(DEBUG_SUBPAGE)
1572 printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1573 subpage, len, addr);
1574 #endif
1575 address_space_read(subpage->as, addr + subpage->base, buf, len);
1576 switch (len) {
1577 case 1:
1578 return ldub_p(buf);
1579 case 2:
1580 return lduw_p(buf);
1581 case 4:
1582 return ldl_p(buf);
1583 default:
1584 abort();
1585 }
1586 }
1587
1588 static void subpage_write(void *opaque, hwaddr addr,
1589 uint64_t value, unsigned len)
1590 {
1591 subpage_t *subpage = opaque;
1592 uint8_t buf[4];
1593
1594 #if defined(DEBUG_SUBPAGE)
1595 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1596 " value %"PRIx64"\n",
1597 __func__, subpage, len, addr, value);
1598 #endif
1599 switch (len) {
1600 case 1:
1601 stb_p(buf, value);
1602 break;
1603 case 2:
1604 stw_p(buf, value);
1605 break;
1606 case 4:
1607 stl_p(buf, value);
1608 break;
1609 default:
1610 abort();
1611 }
1612 address_space_write(subpage->as, addr + subpage->base, buf, len);
1613 }
1614
1615 static bool subpage_accepts(void *opaque, hwaddr addr,
1616 unsigned size, bool is_write)
1617 {
1618 subpage_t *subpage = opaque;
1619 #if defined(DEBUG_SUBPAGE)
1620 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1621 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1622 #endif
1623
1624 return address_space_access_valid(subpage->as, addr + subpage->base,
1625 size, is_write);
1626 }
1627
1628 static const MemoryRegionOps subpage_ops = {
1629 .read = subpage_read,
1630 .write = subpage_write,
1631 .valid.accepts = subpage_accepts,
1632 .endianness = DEVICE_NATIVE_ENDIAN,
1633 };
1634
1635 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1636 uint16_t section)
1637 {
1638 int idx, eidx;
1639
1640 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1641 return -1;
1642 idx = SUBPAGE_IDX(start);
1643 eidx = SUBPAGE_IDX(end);
1644 #if defined(DEBUG_SUBPAGE)
1645 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1646 mmio, start, end, idx, eidx, memory);
1647 #endif
1648 for (; idx <= eidx; idx++) {
1649 mmio->sub_section[idx] = section;
1650 }
1651
1652 return 0;
1653 }
1654
1655 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1656 {
1657 subpage_t *mmio;
1658
1659 mmio = g_malloc0(sizeof(subpage_t));
1660
1661 mmio->as = as;
1662 mmio->base = base;
1663 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1664 "subpage", TARGET_PAGE_SIZE);
1665 mmio->iomem.subpage = true;
1666 #if defined(DEBUG_SUBPAGE)
1667 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1668 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1669 #endif
1670 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1671
1672 return mmio;
1673 }
1674
1675 static uint16_t dummy_section(MemoryRegion *mr)
1676 {
1677 MemoryRegionSection section = {
1678 .mr = mr,
1679 .offset_within_address_space = 0,
1680 .offset_within_region = 0,
1681 .size = int128_2_64(),
1682 };
1683
1684 return phys_section_add(&section);
1685 }
1686
1687 MemoryRegion *iotlb_to_region(hwaddr index)
1688 {
1689 return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
1690 }
1691
1692 static void io_mem_init(void)
1693 {
1694 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1695 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1696 "unassigned", UINT64_MAX);
1697 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1698 "notdirty", UINT64_MAX);
1699 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1700 "watch", UINT64_MAX);
1701 }
1702
1703 static void mem_begin(MemoryListener *listener)
1704 {
1705 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1706 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1707
1708 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1709 d->as = as;
1710 as->next_dispatch = d;
1711 }
1712
1713 static void mem_commit(MemoryListener *listener)
1714 {
1715 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1716 AddressSpaceDispatch *cur = as->dispatch;
1717 AddressSpaceDispatch *next = as->next_dispatch;
1718
1719 next->nodes = next_map.nodes;
1720 next->sections = next_map.sections;
1721
1722 as->dispatch = next;
1723 g_free(cur);
1724 }
1725
1726 static void core_begin(MemoryListener *listener)
1727 {
1728 uint16_t n;
1729
1730 prev_map = g_new(PhysPageMap, 1);
1731 *prev_map = next_map;
1732
1733 memset(&next_map, 0, sizeof(next_map));
1734 n = dummy_section(&io_mem_unassigned);
1735 assert(n == PHYS_SECTION_UNASSIGNED);
1736 n = dummy_section(&io_mem_notdirty);
1737 assert(n == PHYS_SECTION_NOTDIRTY);
1738 n = dummy_section(&io_mem_rom);
1739 assert(n == PHYS_SECTION_ROM);
1740 n = dummy_section(&io_mem_watch);
1741 assert(n == PHYS_SECTION_WATCH);
1742 }
1743
1744 /* This listener's commit run after the other AddressSpaceDispatch listeners'.
1745 * All AddressSpaceDispatch instances have switched to the next map.
1746 */
1747 static void core_commit(MemoryListener *listener)
1748 {
1749 phys_sections_free(prev_map);
1750 }
1751
1752 static void tcg_commit(MemoryListener *listener)
1753 {
1754 CPUState *cpu;
1755
1756 /* since each CPU stores ram addresses in its TLB cache, we must
1757 reset the modified entries */
1758 /* XXX: slow ! */
1759 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1760 CPUArchState *env = cpu->env_ptr;
1761
1762 tlb_flush(env, 1);
1763 }
1764 }
1765
1766 static void core_log_global_start(MemoryListener *listener)
1767 {
1768 cpu_physical_memory_set_dirty_tracking(1);
1769 }
1770
1771 static void core_log_global_stop(MemoryListener *listener)
1772 {
1773 cpu_physical_memory_set_dirty_tracking(0);
1774 }
1775
1776 static MemoryListener core_memory_listener = {
1777 .begin = core_begin,
1778 .commit = core_commit,
1779 .log_global_start = core_log_global_start,
1780 .log_global_stop = core_log_global_stop,
1781 .priority = 1,
1782 };
1783
1784 static MemoryListener tcg_memory_listener = {
1785 .commit = tcg_commit,
1786 };
1787
1788 void address_space_init_dispatch(AddressSpace *as)
1789 {
1790 as->dispatch = NULL;
1791 as->dispatch_listener = (MemoryListener) {
1792 .begin = mem_begin,
1793 .commit = mem_commit,
1794 .region_add = mem_add,
1795 .region_nop = mem_add,
1796 .priority = 0,
1797 };
1798 memory_listener_register(&as->dispatch_listener, as);
1799 }
1800
1801 void address_space_destroy_dispatch(AddressSpace *as)
1802 {
1803 AddressSpaceDispatch *d = as->dispatch;
1804
1805 memory_listener_unregister(&as->dispatch_listener);
1806 g_free(d);
1807 as->dispatch = NULL;
1808 }
1809
1810 static void memory_map_init(void)
1811 {
1812 system_memory = g_malloc(sizeof(*system_memory));
1813 memory_region_init(system_memory, NULL, "system", INT64_MAX);
1814 address_space_init(&address_space_memory, system_memory, "memory");
1815
1816 system_io = g_malloc(sizeof(*system_io));
1817 memory_region_init(system_io, NULL, "io", 65536);
1818 address_space_init(&address_space_io, system_io, "I/O");
1819
1820 memory_listener_register(&core_memory_listener, &address_space_memory);
1821 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1822 }
1823
1824 MemoryRegion *get_system_memory(void)
1825 {
1826 return system_memory;
1827 }
1828
1829 MemoryRegion *get_system_io(void)
1830 {
1831 return system_io;
1832 }
1833
1834 #endif /* !defined(CONFIG_USER_ONLY) */
1835
1836 /* physical memory access (slow version, mainly for debug) */
1837 #if defined(CONFIG_USER_ONLY)
1838 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1839 uint8_t *buf, int len, int is_write)
1840 {
1841 int l, flags;
1842 target_ulong page;
1843 void * p;
1844
1845 while (len > 0) {
1846 page = addr & TARGET_PAGE_MASK;
1847 l = (page + TARGET_PAGE_SIZE) - addr;
1848 if (l > len)
1849 l = len;
1850 flags = page_get_flags(page);
1851 if (!(flags & PAGE_VALID))
1852 return -1;
1853 if (is_write) {
1854 if (!(flags & PAGE_WRITE))
1855 return -1;
1856 /* XXX: this code should not depend on lock_user */
1857 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1858 return -1;
1859 memcpy(p, buf, l);
1860 unlock_user(p, addr, l);
1861 } else {
1862 if (!(flags & PAGE_READ))
1863 return -1;
1864 /* XXX: this code should not depend on lock_user */
1865 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1866 return -1;
1867 memcpy(buf, p, l);
1868 unlock_user(p, addr, 0);
1869 }
1870 len -= l;
1871 buf += l;
1872 addr += l;
1873 }
1874 return 0;
1875 }
1876
1877 #else
1878
1879 static void invalidate_and_set_dirty(hwaddr addr,
1880 hwaddr length)
1881 {
1882 if (!cpu_physical_memory_is_dirty(addr)) {
1883 /* invalidate code */
1884 tb_invalidate_phys_page_range(addr, addr + length, 0);
1885 /* set dirty bit */
1886 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1887 }
1888 xen_modified_memory(addr, length);
1889 }
1890
1891 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1892 {
1893 if (memory_region_is_ram(mr)) {
1894 return !(is_write && mr->readonly);
1895 }
1896 if (memory_region_is_romd(mr)) {
1897 return !is_write;
1898 }
1899
1900 return false;
1901 }
1902
1903 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1904 {
1905 unsigned access_size_max = mr->ops->valid.max_access_size;
1906
1907 /* Regions are assumed to support 1-4 byte accesses unless
1908 otherwise specified. */
1909 if (access_size_max == 0) {
1910 access_size_max = 4;
1911 }
1912
1913 /* Bound the maximum access by the alignment of the address. */
1914 if (!mr->ops->impl.unaligned) {
1915 unsigned align_size_max = addr & -addr;
1916 if (align_size_max != 0 && align_size_max < access_size_max) {
1917 access_size_max = align_size_max;
1918 }
1919 }
1920
1921 /* Don't attempt accesses larger than the maximum. */
1922 if (l > access_size_max) {
1923 l = access_size_max;
1924 }
1925
1926 return l;
1927 }
1928
1929 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1930 int len, bool is_write)
1931 {
1932 hwaddr l;
1933 uint8_t *ptr;
1934 uint64_t val;
1935 hwaddr addr1;
1936 MemoryRegion *mr;
1937 bool error = false;
1938
1939 while (len > 0) {
1940 l = len;
1941 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1942
1943 if (is_write) {
1944 if (!memory_access_is_direct(mr, is_write)) {
1945 l = memory_access_size(mr, l, addr1);
1946 /* XXX: could force current_cpu to NULL to avoid
1947 potential bugs */
1948 switch (l) {
1949 case 8:
1950 /* 64 bit write access */
1951 val = ldq_p(buf);
1952 error |= io_mem_write(mr, addr1, val, 8);
1953 break;
1954 case 4:
1955 /* 32 bit write access */
1956 val = ldl_p(buf);
1957 error |= io_mem_write(mr, addr1, val, 4);
1958 break;
1959 case 2:
1960 /* 16 bit write access */
1961 val = lduw_p(buf);
1962 error |= io_mem_write(mr, addr1, val, 2);
1963 break;
1964 case 1:
1965 /* 8 bit write access */
1966 val = ldub_p(buf);
1967 error |= io_mem_write(mr, addr1, val, 1);
1968 break;
1969 default:
1970 abort();
1971 }
1972 } else {
1973 addr1 += memory_region_get_ram_addr(mr);
1974 /* RAM case */
1975 ptr = qemu_get_ram_ptr(addr1);
1976 memcpy(ptr, buf, l);
1977 invalidate_and_set_dirty(addr1, l);
1978 }
1979 } else {
1980 if (!memory_access_is_direct(mr, is_write)) {
1981 /* I/O case */
1982 l = memory_access_size(mr, l, addr1);
1983 switch (l) {
1984 case 8:
1985 /* 64 bit read access */
1986 error |= io_mem_read(mr, addr1, &val, 8);
1987 stq_p(buf, val);
1988 break;
1989 case 4:
1990 /* 32 bit read access */
1991 error |= io_mem_read(mr, addr1, &val, 4);
1992 stl_p(buf, val);
1993 break;
1994 case 2:
1995 /* 16 bit read access */
1996 error |= io_mem_read(mr, addr1, &val, 2);
1997 stw_p(buf, val);
1998 break;
1999 case 1:
2000 /* 8 bit read access */
2001 error |= io_mem_read(mr, addr1, &val, 1);
2002 stb_p(buf, val);
2003 break;
2004 default:
2005 abort();
2006 }
2007 } else {
2008 /* RAM case */
2009 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2010 memcpy(buf, ptr, l);
2011 }
2012 }
2013 len -= l;
2014 buf += l;
2015 addr += l;
2016 }
2017
2018 return error;
2019 }
2020
2021 bool address_space_write(AddressSpace *as, hwaddr addr,
2022 const uint8_t *buf, int len)
2023 {
2024 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2025 }
2026
2027 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2028 {
2029 return address_space_rw(as, addr, buf, len, false);
2030 }
2031
2032
2033 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2034 int len, int is_write)
2035 {
2036 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2037 }
2038
2039 /* used for ROM loading : can write in RAM and ROM */
2040 void cpu_physical_memory_write_rom(hwaddr addr,
2041 const uint8_t *buf, int len)
2042 {
2043 hwaddr l;
2044 uint8_t *ptr;
2045 hwaddr addr1;
2046 MemoryRegion *mr;
2047
2048 while (len > 0) {
2049 l = len;
2050 mr = address_space_translate(&address_space_memory,
2051 addr, &addr1, &l, true);
2052
2053 if (!(memory_region_is_ram(mr) ||
2054 memory_region_is_romd(mr))) {
2055 /* do nothing */
2056 } else {
2057 addr1 += memory_region_get_ram_addr(mr);
2058 /* ROM/RAM case */
2059 ptr = qemu_get_ram_ptr(addr1);
2060 memcpy(ptr, buf, l);
2061 invalidate_and_set_dirty(addr1, l);
2062 }
2063 len -= l;
2064 buf += l;
2065 addr += l;
2066 }
2067 }
2068
2069 typedef struct {
2070 MemoryRegion *mr;
2071 void *buffer;
2072 hwaddr addr;
2073 hwaddr len;
2074 } BounceBuffer;
2075
2076 static BounceBuffer bounce;
2077
2078 typedef struct MapClient {
2079 void *opaque;
2080 void (*callback)(void *opaque);
2081 QLIST_ENTRY(MapClient) link;
2082 } MapClient;
2083
2084 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2085 = QLIST_HEAD_INITIALIZER(map_client_list);
2086
2087 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2088 {
2089 MapClient *client = g_malloc(sizeof(*client));
2090
2091 client->opaque = opaque;
2092 client->callback = callback;
2093 QLIST_INSERT_HEAD(&map_client_list, client, link);
2094 return client;
2095 }
2096
2097 static void cpu_unregister_map_client(void *_client)
2098 {
2099 MapClient *client = (MapClient *)_client;
2100
2101 QLIST_REMOVE(client, link);
2102 g_free(client);
2103 }
2104
2105 static void cpu_notify_map_clients(void)
2106 {
2107 MapClient *client;
2108
2109 while (!QLIST_EMPTY(&map_client_list)) {
2110 client = QLIST_FIRST(&map_client_list);
2111 client->callback(client->opaque);
2112 cpu_unregister_map_client(client);
2113 }
2114 }
2115
2116 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2117 {
2118 MemoryRegion *mr;
2119 hwaddr l, xlat;
2120
2121 while (len > 0) {
2122 l = len;
2123 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2124 if (!memory_access_is_direct(mr, is_write)) {
2125 l = memory_access_size(mr, l, addr);
2126 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2127 return false;
2128 }
2129 }
2130
2131 len -= l;
2132 addr += l;
2133 }
2134 return true;
2135 }
2136
2137 /* Map a physical memory region into a host virtual address.
2138 * May map a subset of the requested range, given by and returned in *plen.
2139 * May return NULL if resources needed to perform the mapping are exhausted.
2140 * Use only for reads OR writes - not for read-modify-write operations.
2141 * Use cpu_register_map_client() to know when retrying the map operation is
2142 * likely to succeed.
2143 */
2144 void *address_space_map(AddressSpace *as,
2145 hwaddr addr,
2146 hwaddr *plen,
2147 bool is_write)
2148 {
2149 hwaddr len = *plen;
2150 hwaddr done = 0;
2151 hwaddr l, xlat, base;
2152 MemoryRegion *mr, *this_mr;
2153 ram_addr_t raddr;
2154
2155 if (len == 0) {
2156 return NULL;
2157 }
2158
2159 l = len;
2160 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2161 if (!memory_access_is_direct(mr, is_write)) {
2162 if (bounce.buffer) {
2163 return NULL;
2164 }
2165 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2166 bounce.addr = addr;
2167 bounce.len = l;
2168
2169 memory_region_ref(mr);
2170 bounce.mr = mr;
2171 if (!is_write) {
2172 address_space_read(as, addr, bounce.buffer, l);
2173 }
2174
2175 *plen = l;
2176 return bounce.buffer;
2177 }
2178
2179 base = xlat;
2180 raddr = memory_region_get_ram_addr(mr);
2181
2182 for (;;) {
2183 len -= l;
2184 addr += l;
2185 done += l;
2186 if (len == 0) {
2187 break;
2188 }
2189
2190 l = len;
2191 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2192 if (this_mr != mr || xlat != base + done) {
2193 break;
2194 }
2195 }
2196
2197 memory_region_ref(mr);
2198 *plen = done;
2199 return qemu_ram_ptr_length(raddr + base, plen);
2200 }
2201
2202 /* Unmaps a memory region previously mapped by address_space_map().
2203 * Will also mark the memory as dirty if is_write == 1. access_len gives
2204 * the amount of memory that was actually read or written by the caller.
2205 */
2206 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2207 int is_write, hwaddr access_len)
2208 {
2209 if (buffer != bounce.buffer) {
2210 MemoryRegion *mr;
2211 ram_addr_t addr1;
2212
2213 mr = qemu_ram_addr_from_host(buffer, &addr1);
2214 assert(mr != NULL);
2215 if (is_write) {
2216 while (access_len) {
2217 unsigned l;
2218 l = TARGET_PAGE_SIZE;
2219 if (l > access_len)
2220 l = access_len;
2221 invalidate_and_set_dirty(addr1, l);
2222 addr1 += l;
2223 access_len -= l;
2224 }
2225 }
2226 if (xen_enabled()) {
2227 xen_invalidate_map_cache_entry(buffer);
2228 }
2229 memory_region_unref(mr);
2230 return;
2231 }
2232 if (is_write) {
2233 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2234 }
2235 qemu_vfree(bounce.buffer);
2236 bounce.buffer = NULL;
2237 memory_region_unref(bounce.mr);
2238 cpu_notify_map_clients();
2239 }
2240
2241 void *cpu_physical_memory_map(hwaddr addr,
2242 hwaddr *plen,
2243 int is_write)
2244 {
2245 return address_space_map(&address_space_memory, addr, plen, is_write);
2246 }
2247
2248 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2249 int is_write, hwaddr access_len)
2250 {
2251 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2252 }
2253
2254 /* warning: addr must be aligned */
2255 static inline uint32_t ldl_phys_internal(hwaddr addr,
2256 enum device_endian endian)
2257 {
2258 uint8_t *ptr;
2259 uint64_t val;
2260 MemoryRegion *mr;
2261 hwaddr l = 4;
2262 hwaddr addr1;
2263
2264 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2265 false);
2266 if (l < 4 || !memory_access_is_direct(mr, false)) {
2267 /* I/O case */
2268 io_mem_read(mr, addr1, &val, 4);
2269 #if defined(TARGET_WORDS_BIGENDIAN)
2270 if (endian == DEVICE_LITTLE_ENDIAN) {
2271 val = bswap32(val);
2272 }
2273 #else
2274 if (endian == DEVICE_BIG_ENDIAN) {
2275 val = bswap32(val);
2276 }
2277 #endif
2278 } else {
2279 /* RAM case */
2280 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2281 & TARGET_PAGE_MASK)
2282 + addr1);
2283 switch (endian) {
2284 case DEVICE_LITTLE_ENDIAN:
2285 val = ldl_le_p(ptr);
2286 break;
2287 case DEVICE_BIG_ENDIAN:
2288 val = ldl_be_p(ptr);
2289 break;
2290 default:
2291 val = ldl_p(ptr);
2292 break;
2293 }
2294 }
2295 return val;
2296 }
2297
2298 uint32_t ldl_phys(hwaddr addr)
2299 {
2300 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2301 }
2302
2303 uint32_t ldl_le_phys(hwaddr addr)
2304 {
2305 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2306 }
2307
2308 uint32_t ldl_be_phys(hwaddr addr)
2309 {
2310 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2311 }
2312
2313 /* warning: addr must be aligned */
2314 static inline uint64_t ldq_phys_internal(hwaddr addr,
2315 enum device_endian endian)
2316 {
2317 uint8_t *ptr;
2318 uint64_t val;
2319 MemoryRegion *mr;
2320 hwaddr l = 8;
2321 hwaddr addr1;
2322
2323 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2324 false);
2325 if (l < 8 || !memory_access_is_direct(mr, false)) {
2326 /* I/O case */
2327 io_mem_read(mr, addr1, &val, 8);
2328 #if defined(TARGET_WORDS_BIGENDIAN)
2329 if (endian == DEVICE_LITTLE_ENDIAN) {
2330 val = bswap64(val);
2331 }
2332 #else
2333 if (endian == DEVICE_BIG_ENDIAN) {
2334 val = bswap64(val);
2335 }
2336 #endif
2337 } else {
2338 /* RAM case */
2339 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2340 & TARGET_PAGE_MASK)
2341 + addr1);
2342 switch (endian) {
2343 case DEVICE_LITTLE_ENDIAN:
2344 val = ldq_le_p(ptr);
2345 break;
2346 case DEVICE_BIG_ENDIAN:
2347 val = ldq_be_p(ptr);
2348 break;
2349 default:
2350 val = ldq_p(ptr);
2351 break;
2352 }
2353 }
2354 return val;
2355 }
2356
2357 uint64_t ldq_phys(hwaddr addr)
2358 {
2359 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2360 }
2361
2362 uint64_t ldq_le_phys(hwaddr addr)
2363 {
2364 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2365 }
2366
2367 uint64_t ldq_be_phys(hwaddr addr)
2368 {
2369 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2370 }
2371
2372 /* XXX: optimize */
2373 uint32_t ldub_phys(hwaddr addr)
2374 {
2375 uint8_t val;
2376 cpu_physical_memory_read(addr, &val, 1);
2377 return val;
2378 }
2379
2380 /* warning: addr must be aligned */
2381 static inline uint32_t lduw_phys_internal(hwaddr addr,
2382 enum device_endian endian)
2383 {
2384 uint8_t *ptr;
2385 uint64_t val;
2386 MemoryRegion *mr;
2387 hwaddr l = 2;
2388 hwaddr addr1;
2389
2390 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2391 false);
2392 if (l < 2 || !memory_access_is_direct(mr, false)) {
2393 /* I/O case */
2394 io_mem_read(mr, addr1, &val, 2);
2395 #if defined(TARGET_WORDS_BIGENDIAN)
2396 if (endian == DEVICE_LITTLE_ENDIAN) {
2397 val = bswap16(val);
2398 }
2399 #else
2400 if (endian == DEVICE_BIG_ENDIAN) {
2401 val = bswap16(val);
2402 }
2403 #endif
2404 } else {
2405 /* RAM case */
2406 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2407 & TARGET_PAGE_MASK)
2408 + addr1);
2409 switch (endian) {
2410 case DEVICE_LITTLE_ENDIAN:
2411 val = lduw_le_p(ptr);
2412 break;
2413 case DEVICE_BIG_ENDIAN:
2414 val = lduw_be_p(ptr);
2415 break;
2416 default:
2417 val = lduw_p(ptr);
2418 break;
2419 }
2420 }
2421 return val;
2422 }
2423
2424 uint32_t lduw_phys(hwaddr addr)
2425 {
2426 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2427 }
2428
2429 uint32_t lduw_le_phys(hwaddr addr)
2430 {
2431 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2432 }
2433
2434 uint32_t lduw_be_phys(hwaddr addr)
2435 {
2436 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2437 }
2438
2439 /* warning: addr must be aligned. The ram page is not masked as dirty
2440 and the code inside is not invalidated. It is useful if the dirty
2441 bits are used to track modified PTEs */
2442 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2443 {
2444 uint8_t *ptr;
2445 MemoryRegion *mr;
2446 hwaddr l = 4;
2447 hwaddr addr1;
2448
2449 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2450 true);
2451 if (l < 4 || !memory_access_is_direct(mr, true)) {
2452 io_mem_write(mr, addr1, val, 4);
2453 } else {
2454 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2455 ptr = qemu_get_ram_ptr(addr1);
2456 stl_p(ptr, val);
2457
2458 if (unlikely(in_migration)) {
2459 if (!cpu_physical_memory_is_dirty(addr1)) {
2460 /* invalidate code */
2461 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2462 /* set dirty bit */
2463 cpu_physical_memory_set_dirty_flags(
2464 addr1, (0xff & ~CODE_DIRTY_FLAG));
2465 }
2466 }
2467 }
2468 }
2469
2470 /* warning: addr must be aligned */
2471 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2472 enum device_endian endian)
2473 {
2474 uint8_t *ptr;
2475 MemoryRegion *mr;
2476 hwaddr l = 4;
2477 hwaddr addr1;
2478
2479 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2480 true);
2481 if (l < 4 || !memory_access_is_direct(mr, true)) {
2482 #if defined(TARGET_WORDS_BIGENDIAN)
2483 if (endian == DEVICE_LITTLE_ENDIAN) {
2484 val = bswap32(val);
2485 }
2486 #else
2487 if (endian == DEVICE_BIG_ENDIAN) {
2488 val = bswap32(val);
2489 }
2490 #endif
2491 io_mem_write(mr, addr1, val, 4);
2492 } else {
2493 /* RAM case */
2494 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2495 ptr = qemu_get_ram_ptr(addr1);
2496 switch (endian) {
2497 case DEVICE_LITTLE_ENDIAN:
2498 stl_le_p(ptr, val);
2499 break;
2500 case DEVICE_BIG_ENDIAN:
2501 stl_be_p(ptr, val);
2502 break;
2503 default:
2504 stl_p(ptr, val);
2505 break;
2506 }
2507 invalidate_and_set_dirty(addr1, 4);
2508 }
2509 }
2510
2511 void stl_phys(hwaddr addr, uint32_t val)
2512 {
2513 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2514 }
2515
2516 void stl_le_phys(hwaddr addr, uint32_t val)
2517 {
2518 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2519 }
2520
2521 void stl_be_phys(hwaddr addr, uint32_t val)
2522 {
2523 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2524 }
2525
2526 /* XXX: optimize */
2527 void stb_phys(hwaddr addr, uint32_t val)
2528 {
2529 uint8_t v = val;
2530 cpu_physical_memory_write(addr, &v, 1);
2531 }
2532
2533 /* warning: addr must be aligned */
2534 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2535 enum device_endian endian)
2536 {
2537 uint8_t *ptr;
2538 MemoryRegion *mr;
2539 hwaddr l = 2;
2540 hwaddr addr1;
2541
2542 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2543 true);
2544 if (l < 2 || !memory_access_is_direct(mr, true)) {
2545 #if defined(TARGET_WORDS_BIGENDIAN)
2546 if (endian == DEVICE_LITTLE_ENDIAN) {
2547 val = bswap16(val);
2548 }
2549 #else
2550 if (endian == DEVICE_BIG_ENDIAN) {
2551 val = bswap16(val);
2552 }
2553 #endif
2554 io_mem_write(mr, addr1, val, 2);
2555 } else {
2556 /* RAM case */
2557 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2558 ptr = qemu_get_ram_ptr(addr1);
2559 switch (endian) {
2560 case DEVICE_LITTLE_ENDIAN:
2561 stw_le_p(ptr, val);
2562 break;
2563 case DEVICE_BIG_ENDIAN:
2564 stw_be_p(ptr, val);
2565 break;
2566 default:
2567 stw_p(ptr, val);
2568 break;
2569 }
2570 invalidate_and_set_dirty(addr1, 2);
2571 }
2572 }
2573
2574 void stw_phys(hwaddr addr, uint32_t val)
2575 {
2576 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2577 }
2578
2579 void stw_le_phys(hwaddr addr, uint32_t val)
2580 {
2581 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2582 }
2583
2584 void stw_be_phys(hwaddr addr, uint32_t val)
2585 {
2586 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2587 }
2588
2589 /* XXX: optimize */
2590 void stq_phys(hwaddr addr, uint64_t val)
2591 {
2592 val = tswap64(val);
2593 cpu_physical_memory_write(addr, &val, 8);
2594 }
2595
2596 void stq_le_phys(hwaddr addr, uint64_t val)
2597 {
2598 val = cpu_to_le64(val);
2599 cpu_physical_memory_write(addr, &val, 8);
2600 }
2601
2602 void stq_be_phys(hwaddr addr, uint64_t val)
2603 {
2604 val = cpu_to_be64(val);
2605 cpu_physical_memory_write(addr, &val, 8);
2606 }
2607
2608 /* virtual memory access for debug (includes writing to ROM) */
2609 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2610 uint8_t *buf, int len, int is_write)
2611 {
2612 int l;
2613 hwaddr phys_addr;
2614 target_ulong page;
2615
2616 while (len > 0) {
2617 page = addr & TARGET_PAGE_MASK;
2618 phys_addr = cpu_get_phys_page_debug(cpu, page);
2619 /* if no physical page mapped, return an error */
2620 if (phys_addr == -1)
2621 return -1;
2622 l = (page + TARGET_PAGE_SIZE) - addr;
2623 if (l > len)
2624 l = len;
2625 phys_addr += (addr & ~TARGET_PAGE_MASK);
2626 if (is_write)
2627 cpu_physical_memory_write_rom(phys_addr, buf, l);
2628 else
2629 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2630 len -= l;
2631 buf += l;
2632 addr += l;
2633 }
2634 return 0;
2635 }
2636 #endif
2637
2638 #if !defined(CONFIG_USER_ONLY)
2639
2640 /*
2641 * A helper function for the _utterly broken_ virtio device model to find out if
2642 * it's running on a big endian machine. Don't do this at home kids!
2643 */
2644 bool virtio_is_big_endian(void);
2645 bool virtio_is_big_endian(void)
2646 {
2647 #if defined(TARGET_WORDS_BIGENDIAN)
2648 return true;
2649 #else
2650 return false;
2651 #endif
2652 }
2653
2654 #endif
2655
2656 #ifndef CONFIG_USER_ONLY
2657 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2658 {
2659 MemoryRegion*mr;
2660 hwaddr l = 1;
2661
2662 mr = address_space_translate(&address_space_memory,
2663 phys_addr, &phys_addr, &l, false);
2664
2665 return !(memory_region_is_ram(mr) ||
2666 memory_region_is_romd(mr));
2667 }
2668
2669 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2670 {
2671 RAMBlock *block;
2672
2673 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2674 func(block->host, block->offset, block->length, opaque);
2675 }
2676 }
2677 #endif