]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec: memory radix tree page level compression
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
51
52 #include "exec/memory-internal.h"
53
54 #include "qemu/range.h"
55
56 //#define DEBUG_SUBPAGE
57
58 #if !defined(CONFIG_USER_ONLY)
59 static int in_migration;
60
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
62
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
65
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
68
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
71
72 #endif
73
74 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
75 /* current CPU in the current thread. It is only valid inside
76 cpu_exec() */
77 DEFINE_TLS(CPUState *, current_cpu);
78 /* 0 = Do not count executed instructions.
79 1 = Precise instruction counting.
80 2 = Adaptive rate instruction counting. */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 typedef struct PhysPageEntry PhysPageEntry;
86
87 struct PhysPageEntry {
88 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
89 uint32_t skip : 6;
90 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
91 uint32_t ptr : 26;
92 };
93
94 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
95
96 /* Size of the L2 (and L3, etc) page tables. */
97 #define ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
98
99 #define P_L2_BITS 10
100 #define P_L2_SIZE (1 << P_L2_BITS)
101
102 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
103
104 typedef PhysPageEntry Node[P_L2_SIZE];
105
106 struct AddressSpaceDispatch {
107 /* This is a multi-level map on the physical address space.
108 * The bottom level has pointers to MemoryRegionSections.
109 */
110 PhysPageEntry phys_map;
111 Node *nodes;
112 MemoryRegionSection *sections;
113 AddressSpace *as;
114 };
115
116 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
117 typedef struct subpage_t {
118 MemoryRegion iomem;
119 AddressSpace *as;
120 hwaddr base;
121 uint16_t sub_section[TARGET_PAGE_SIZE];
122 } subpage_t;
123
124 #define PHYS_SECTION_UNASSIGNED 0
125 #define PHYS_SECTION_NOTDIRTY 1
126 #define PHYS_SECTION_ROM 2
127 #define PHYS_SECTION_WATCH 3
128
129 typedef struct PhysPageMap {
130 unsigned sections_nb;
131 unsigned sections_nb_alloc;
132 unsigned nodes_nb;
133 unsigned nodes_nb_alloc;
134 Node *nodes;
135 MemoryRegionSection *sections;
136 } PhysPageMap;
137
138 static PhysPageMap *prev_map;
139 static PhysPageMap next_map;
140
141 static void io_mem_init(void);
142 static void memory_map_init(void);
143
144 static MemoryRegion io_mem_watch;
145 #endif
146
147 #if !defined(CONFIG_USER_ONLY)
148
149 static void phys_map_node_reserve(unsigned nodes)
150 {
151 if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
152 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
153 16);
154 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
155 next_map.nodes_nb + nodes);
156 next_map.nodes = g_renew(Node, next_map.nodes,
157 next_map.nodes_nb_alloc);
158 }
159 }
160
161 static uint32_t phys_map_node_alloc(void)
162 {
163 unsigned i;
164 uint32_t ret;
165
166 ret = next_map.nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != next_map.nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 next_map.nodes[ret][i].skip = 1;
171 next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
172 }
173 return ret;
174 }
175
176 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
177 hwaddr *nb, uint16_t leaf,
178 int level)
179 {
180 PhysPageEntry *p;
181 int i;
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
183
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc();
186 p = next_map.nodes[lp->ptr];
187 if (level == 0) {
188 for (i = 0; i < P_L2_SIZE; i++) {
189 p[i].skip = 0;
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
191 }
192 }
193 } else {
194 p = next_map.nodes[lp->ptr];
195 }
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
197
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 lp->skip = 0;
201 lp->ptr = leaf;
202 *index += step;
203 *nb -= step;
204 } else {
205 phys_page_set_level(lp, index, nb, leaf, level - 1);
206 }
207 ++lp;
208 }
209 }
210
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
213 uint16_t leaf)
214 {
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(3 * P_L2_LEVELS);
217
218 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
219 }
220
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
223 */
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
225 {
226 unsigned valid_ptr = P_L2_SIZE;
227 int valid = 0;
228 PhysPageEntry *p;
229 int i;
230
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
232 return;
233 }
234
235 p = nodes[lp->ptr];
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 continue;
239 }
240
241 valid_ptr = i;
242 valid++;
243 if (p[i].skip) {
244 phys_page_compact(&p[i], nodes, compacted);
245 }
246 }
247
248 /* We can only compress if there's only one child. */
249 if (valid != 1) {
250 return;
251 }
252
253 assert(valid_ptr < P_L2_SIZE);
254
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
257 return;
258 }
259
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
266 * change this rule.
267 */
268 lp->skip = 0;
269 } else {
270 lp->skip += p[valid_ptr].skip;
271 }
272 }
273
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
275 {
276 DECLARE_BITMAP(compacted, nodes_nb);
277
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->nodes, compacted);
280 }
281 }
282
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
285 {
286 PhysPageEntry *p;
287 hwaddr index = addr >> TARGET_PAGE_BITS;
288 int i;
289
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return &sections[PHYS_SECTION_UNASSIGNED];
293 }
294 p = nodes[lp.ptr];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
296 }
297
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return &sections[lp.ptr];
302 } else {
303 return &sections[PHYS_SECTION_UNASSIGNED];
304 }
305 }
306
307 bool memory_region_is_unassigned(MemoryRegion *mr)
308 {
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
311 }
312
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
314 hwaddr addr,
315 bool resolve_subpage)
316 {
317 MemoryRegionSection *section;
318 subpage_t *subpage;
319
320 section = phys_page_find(d->phys_map, addr, d->nodes, d->sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
324 }
325 return section;
326 }
327
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
331 {
332 MemoryRegionSection *section;
333 Int128 diff;
334
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
338
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
341
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 return section;
345 }
346
347 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
348 hwaddr *xlat, hwaddr *plen,
349 bool is_write)
350 {
351 IOMMUTLBEntry iotlb;
352 MemoryRegionSection *section;
353 MemoryRegion *mr;
354 hwaddr len = *plen;
355
356 for (;;) {
357 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
358 mr = section->mr;
359
360 if (!mr->iommu_ops) {
361 break;
362 }
363
364 iotlb = mr->iommu_ops->translate(mr, addr);
365 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
366 | (addr & iotlb.addr_mask));
367 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
368 if (!(iotlb.perm & (1 << is_write))) {
369 mr = &io_mem_unassigned;
370 break;
371 }
372
373 as = iotlb.target_as;
374 }
375
376 *plen = len;
377 *xlat = addr;
378 return mr;
379 }
380
381 MemoryRegionSection *
382 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
383 hwaddr *plen)
384 {
385 MemoryRegionSection *section;
386 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
387
388 assert(!section->mr->iommu_ops);
389 return section;
390 }
391 #endif
392
393 void cpu_exec_init_all(void)
394 {
395 #if !defined(CONFIG_USER_ONLY)
396 qemu_mutex_init(&ram_list.mutex);
397 memory_map_init();
398 io_mem_init();
399 #endif
400 }
401
402 #if !defined(CONFIG_USER_ONLY)
403
404 static int cpu_common_post_load(void *opaque, int version_id)
405 {
406 CPUState *cpu = opaque;
407
408 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
409 version_id is increased. */
410 cpu->interrupt_request &= ~0x01;
411 tlb_flush(cpu->env_ptr, 1);
412
413 return 0;
414 }
415
416 const VMStateDescription vmstate_cpu_common = {
417 .name = "cpu_common",
418 .version_id = 1,
419 .minimum_version_id = 1,
420 .minimum_version_id_old = 1,
421 .post_load = cpu_common_post_load,
422 .fields = (VMStateField []) {
423 VMSTATE_UINT32(halted, CPUState),
424 VMSTATE_UINT32(interrupt_request, CPUState),
425 VMSTATE_END_OF_LIST()
426 }
427 };
428
429 #endif
430
431 CPUState *qemu_get_cpu(int index)
432 {
433 CPUState *cpu;
434
435 CPU_FOREACH(cpu) {
436 if (cpu->cpu_index == index) {
437 return cpu;
438 }
439 }
440
441 return NULL;
442 }
443
444 void cpu_exec_init(CPUArchState *env)
445 {
446 CPUState *cpu = ENV_GET_CPU(env);
447 CPUClass *cc = CPU_GET_CLASS(cpu);
448 CPUState *some_cpu;
449 int cpu_index;
450
451 #if defined(CONFIG_USER_ONLY)
452 cpu_list_lock();
453 #endif
454 cpu_index = 0;
455 CPU_FOREACH(some_cpu) {
456 cpu_index++;
457 }
458 cpu->cpu_index = cpu_index;
459 cpu->numa_node = 0;
460 QTAILQ_INIT(&env->breakpoints);
461 QTAILQ_INIT(&env->watchpoints);
462 #ifndef CONFIG_USER_ONLY
463 cpu->thread_id = qemu_get_thread_id();
464 #endif
465 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
466 #if defined(CONFIG_USER_ONLY)
467 cpu_list_unlock();
468 #endif
469 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
470 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
471 }
472 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
473 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
474 cpu_save, cpu_load, env);
475 assert(cc->vmsd == NULL);
476 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
477 #endif
478 if (cc->vmsd != NULL) {
479 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
480 }
481 }
482
483 #if defined(TARGET_HAS_ICE)
484 #if defined(CONFIG_USER_ONLY)
485 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
486 {
487 tb_invalidate_phys_page_range(pc, pc + 1, 0);
488 }
489 #else
490 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
491 {
492 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
493 if (phys != -1) {
494 tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
495 }
496 }
497 #endif
498 #endif /* TARGET_HAS_ICE */
499
500 #if defined(CONFIG_USER_ONLY)
501 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
502
503 {
504 }
505
506 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
507 int flags, CPUWatchpoint **watchpoint)
508 {
509 return -ENOSYS;
510 }
511 #else
512 /* Add a watchpoint. */
513 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
514 int flags, CPUWatchpoint **watchpoint)
515 {
516 target_ulong len_mask = ~(len - 1);
517 CPUWatchpoint *wp;
518
519 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
520 if ((len & (len - 1)) || (addr & ~len_mask) ||
521 len == 0 || len > TARGET_PAGE_SIZE) {
522 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
523 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
524 return -EINVAL;
525 }
526 wp = g_malloc(sizeof(*wp));
527
528 wp->vaddr = addr;
529 wp->len_mask = len_mask;
530 wp->flags = flags;
531
532 /* keep all GDB-injected watchpoints in front */
533 if (flags & BP_GDB)
534 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
535 else
536 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
537
538 tlb_flush_page(env, addr);
539
540 if (watchpoint)
541 *watchpoint = wp;
542 return 0;
543 }
544
545 /* Remove a specific watchpoint. */
546 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
547 int flags)
548 {
549 target_ulong len_mask = ~(len - 1);
550 CPUWatchpoint *wp;
551
552 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
553 if (addr == wp->vaddr && len_mask == wp->len_mask
554 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
555 cpu_watchpoint_remove_by_ref(env, wp);
556 return 0;
557 }
558 }
559 return -ENOENT;
560 }
561
562 /* Remove a specific watchpoint by reference. */
563 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
564 {
565 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
566
567 tlb_flush_page(env, watchpoint->vaddr);
568
569 g_free(watchpoint);
570 }
571
572 /* Remove all matching watchpoints. */
573 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
574 {
575 CPUWatchpoint *wp, *next;
576
577 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
578 if (wp->flags & mask)
579 cpu_watchpoint_remove_by_ref(env, wp);
580 }
581 }
582 #endif
583
584 /* Add a breakpoint. */
585 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
586 CPUBreakpoint **breakpoint)
587 {
588 #if defined(TARGET_HAS_ICE)
589 CPUBreakpoint *bp;
590
591 bp = g_malloc(sizeof(*bp));
592
593 bp->pc = pc;
594 bp->flags = flags;
595
596 /* keep all GDB-injected breakpoints in front */
597 if (flags & BP_GDB) {
598 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
599 } else {
600 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
601 }
602
603 breakpoint_invalidate(ENV_GET_CPU(env), pc);
604
605 if (breakpoint) {
606 *breakpoint = bp;
607 }
608 return 0;
609 #else
610 return -ENOSYS;
611 #endif
612 }
613
614 /* Remove a specific breakpoint. */
615 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
616 {
617 #if defined(TARGET_HAS_ICE)
618 CPUBreakpoint *bp;
619
620 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
621 if (bp->pc == pc && bp->flags == flags) {
622 cpu_breakpoint_remove_by_ref(env, bp);
623 return 0;
624 }
625 }
626 return -ENOENT;
627 #else
628 return -ENOSYS;
629 #endif
630 }
631
632 /* Remove a specific breakpoint by reference. */
633 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
634 {
635 #if defined(TARGET_HAS_ICE)
636 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
637
638 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
639
640 g_free(breakpoint);
641 #endif
642 }
643
644 /* Remove all matching breakpoints. */
645 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
646 {
647 #if defined(TARGET_HAS_ICE)
648 CPUBreakpoint *bp, *next;
649
650 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
651 if (bp->flags & mask)
652 cpu_breakpoint_remove_by_ref(env, bp);
653 }
654 #endif
655 }
656
657 /* enable or disable single step mode. EXCP_DEBUG is returned by the
658 CPU loop after each instruction */
659 void cpu_single_step(CPUState *cpu, int enabled)
660 {
661 #if defined(TARGET_HAS_ICE)
662 if (cpu->singlestep_enabled != enabled) {
663 cpu->singlestep_enabled = enabled;
664 if (kvm_enabled()) {
665 kvm_update_guest_debug(cpu, 0);
666 } else {
667 /* must flush all the translated code to avoid inconsistencies */
668 /* XXX: only flush what is necessary */
669 CPUArchState *env = cpu->env_ptr;
670 tb_flush(env);
671 }
672 }
673 #endif
674 }
675
676 void cpu_abort(CPUArchState *env, const char *fmt, ...)
677 {
678 CPUState *cpu = ENV_GET_CPU(env);
679 va_list ap;
680 va_list ap2;
681
682 va_start(ap, fmt);
683 va_copy(ap2, ap);
684 fprintf(stderr, "qemu: fatal: ");
685 vfprintf(stderr, fmt, ap);
686 fprintf(stderr, "\n");
687 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
688 if (qemu_log_enabled()) {
689 qemu_log("qemu: fatal: ");
690 qemu_log_vprintf(fmt, ap2);
691 qemu_log("\n");
692 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
693 qemu_log_flush();
694 qemu_log_close();
695 }
696 va_end(ap2);
697 va_end(ap);
698 #if defined(CONFIG_USER_ONLY)
699 {
700 struct sigaction act;
701 sigfillset(&act.sa_mask);
702 act.sa_handler = SIG_DFL;
703 sigaction(SIGABRT, &act, NULL);
704 }
705 #endif
706 abort();
707 }
708
709 #if !defined(CONFIG_USER_ONLY)
710 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
711 {
712 RAMBlock *block;
713
714 /* The list is protected by the iothread lock here. */
715 block = ram_list.mru_block;
716 if (block && addr - block->offset < block->length) {
717 goto found;
718 }
719 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
720 if (addr - block->offset < block->length) {
721 goto found;
722 }
723 }
724
725 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
726 abort();
727
728 found:
729 ram_list.mru_block = block;
730 return block;
731 }
732
733 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
734 uintptr_t length)
735 {
736 RAMBlock *block;
737 ram_addr_t start1;
738
739 block = qemu_get_ram_block(start);
740 assert(block == qemu_get_ram_block(end - 1));
741 start1 = (uintptr_t)block->host + (start - block->offset);
742 cpu_tlb_reset_dirty_all(start1, length);
743 }
744
745 /* Note: start and end must be within the same ram block. */
746 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
747 int dirty_flags)
748 {
749 uintptr_t length;
750
751 start &= TARGET_PAGE_MASK;
752 end = TARGET_PAGE_ALIGN(end);
753
754 length = end - start;
755 if (length == 0)
756 return;
757 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
758
759 if (tcg_enabled()) {
760 tlb_reset_dirty_range_all(start, end, length);
761 }
762 }
763
764 static int cpu_physical_memory_set_dirty_tracking(int enable)
765 {
766 int ret = 0;
767 in_migration = enable;
768 return ret;
769 }
770
771 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
772 MemoryRegionSection *section,
773 target_ulong vaddr,
774 hwaddr paddr, hwaddr xlat,
775 int prot,
776 target_ulong *address)
777 {
778 hwaddr iotlb;
779 CPUWatchpoint *wp;
780
781 if (memory_region_is_ram(section->mr)) {
782 /* Normal RAM. */
783 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
784 + xlat;
785 if (!section->readonly) {
786 iotlb |= PHYS_SECTION_NOTDIRTY;
787 } else {
788 iotlb |= PHYS_SECTION_ROM;
789 }
790 } else {
791 iotlb = section - address_space_memory.dispatch->sections;
792 iotlb += xlat;
793 }
794
795 /* Make accesses to pages with watchpoints go via the
796 watchpoint trap routines. */
797 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
798 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
799 /* Avoid trapping reads of pages with a write breakpoint. */
800 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
801 iotlb = PHYS_SECTION_WATCH + paddr;
802 *address |= TLB_MMIO;
803 break;
804 }
805 }
806 }
807
808 return iotlb;
809 }
810 #endif /* defined(CONFIG_USER_ONLY) */
811
812 #if !defined(CONFIG_USER_ONLY)
813
814 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
815 uint16_t section);
816 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
817
818 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
819
820 /*
821 * Set a custom physical guest memory alloator.
822 * Accelerators with unusual needs may need this. Hopefully, we can
823 * get rid of it eventually.
824 */
825 void phys_mem_set_alloc(void *(*alloc)(size_t))
826 {
827 phys_mem_alloc = alloc;
828 }
829
830 static uint16_t phys_section_add(MemoryRegionSection *section)
831 {
832 /* The physical section number is ORed with a page-aligned
833 * pointer to produce the iotlb entries. Thus it should
834 * never overflow into the page-aligned value.
835 */
836 assert(next_map.sections_nb < TARGET_PAGE_SIZE);
837
838 if (next_map.sections_nb == next_map.sections_nb_alloc) {
839 next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
840 16);
841 next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
842 next_map.sections_nb_alloc);
843 }
844 next_map.sections[next_map.sections_nb] = *section;
845 memory_region_ref(section->mr);
846 return next_map.sections_nb++;
847 }
848
849 static void phys_section_destroy(MemoryRegion *mr)
850 {
851 memory_region_unref(mr);
852
853 if (mr->subpage) {
854 subpage_t *subpage = container_of(mr, subpage_t, iomem);
855 memory_region_destroy(&subpage->iomem);
856 g_free(subpage);
857 }
858 }
859
860 static void phys_sections_free(PhysPageMap *map)
861 {
862 while (map->sections_nb > 0) {
863 MemoryRegionSection *section = &map->sections[--map->sections_nb];
864 phys_section_destroy(section->mr);
865 }
866 g_free(map->sections);
867 g_free(map->nodes);
868 g_free(map);
869 }
870
871 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
872 {
873 subpage_t *subpage;
874 hwaddr base = section->offset_within_address_space
875 & TARGET_PAGE_MASK;
876 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
877 next_map.nodes, next_map.sections);
878 MemoryRegionSection subsection = {
879 .offset_within_address_space = base,
880 .size = int128_make64(TARGET_PAGE_SIZE),
881 };
882 hwaddr start, end;
883
884 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
885
886 if (!(existing->mr->subpage)) {
887 subpage = subpage_init(d->as, base);
888 subsection.mr = &subpage->iomem;
889 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
890 phys_section_add(&subsection));
891 } else {
892 subpage = container_of(existing->mr, subpage_t, iomem);
893 }
894 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
895 end = start + int128_get64(section->size) - 1;
896 subpage_register(subpage, start, end, phys_section_add(section));
897 }
898
899
900 static void register_multipage(AddressSpaceDispatch *d,
901 MemoryRegionSection *section)
902 {
903 hwaddr start_addr = section->offset_within_address_space;
904 uint16_t section_index = phys_section_add(section);
905 uint64_t num_pages = int128_get64(int128_rshift(section->size,
906 TARGET_PAGE_BITS));
907
908 assert(num_pages);
909 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
910 }
911
912 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
913 {
914 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
915 AddressSpaceDispatch *d = as->next_dispatch;
916 MemoryRegionSection now = *section, remain = *section;
917 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
918
919 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
920 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
921 - now.offset_within_address_space;
922
923 now.size = int128_min(int128_make64(left), now.size);
924 register_subpage(d, &now);
925 } else {
926 now.size = int128_zero();
927 }
928 while (int128_ne(remain.size, now.size)) {
929 remain.size = int128_sub(remain.size, now.size);
930 remain.offset_within_address_space += int128_get64(now.size);
931 remain.offset_within_region += int128_get64(now.size);
932 now = remain;
933 if (int128_lt(remain.size, page_size)) {
934 register_subpage(d, &now);
935 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
936 now.size = page_size;
937 register_subpage(d, &now);
938 } else {
939 now.size = int128_and(now.size, int128_neg(page_size));
940 register_multipage(d, &now);
941 }
942 }
943 }
944
945 void qemu_flush_coalesced_mmio_buffer(void)
946 {
947 if (kvm_enabled())
948 kvm_flush_coalesced_mmio_buffer();
949 }
950
951 void qemu_mutex_lock_ramlist(void)
952 {
953 qemu_mutex_lock(&ram_list.mutex);
954 }
955
956 void qemu_mutex_unlock_ramlist(void)
957 {
958 qemu_mutex_unlock(&ram_list.mutex);
959 }
960
961 #ifdef __linux__
962
963 #include <sys/vfs.h>
964
965 #define HUGETLBFS_MAGIC 0x958458f6
966
967 static long gethugepagesize(const char *path)
968 {
969 struct statfs fs;
970 int ret;
971
972 do {
973 ret = statfs(path, &fs);
974 } while (ret != 0 && errno == EINTR);
975
976 if (ret != 0) {
977 perror(path);
978 return 0;
979 }
980
981 if (fs.f_type != HUGETLBFS_MAGIC)
982 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
983
984 return fs.f_bsize;
985 }
986
987 static sigjmp_buf sigjump;
988
989 static void sigbus_handler(int signal)
990 {
991 siglongjmp(sigjump, 1);
992 }
993
994 static void *file_ram_alloc(RAMBlock *block,
995 ram_addr_t memory,
996 const char *path)
997 {
998 char *filename;
999 char *sanitized_name;
1000 char *c;
1001 void *area;
1002 int fd;
1003 unsigned long hpagesize;
1004
1005 hpagesize = gethugepagesize(path);
1006 if (!hpagesize) {
1007 return NULL;
1008 }
1009
1010 if (memory < hpagesize) {
1011 return NULL;
1012 }
1013
1014 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1015 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1016 return NULL;
1017 }
1018
1019 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1020 sanitized_name = g_strdup(block->mr->name);
1021 for (c = sanitized_name; *c != '\0'; c++) {
1022 if (*c == '/')
1023 *c = '_';
1024 }
1025
1026 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1027 sanitized_name);
1028 g_free(sanitized_name);
1029
1030 fd = mkstemp(filename);
1031 if (fd < 0) {
1032 perror("unable to create backing store for hugepages");
1033 g_free(filename);
1034 return NULL;
1035 }
1036 unlink(filename);
1037 g_free(filename);
1038
1039 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1040
1041 /*
1042 * ftruncate is not supported by hugetlbfs in older
1043 * hosts, so don't bother bailing out on errors.
1044 * If anything goes wrong with it under other filesystems,
1045 * mmap will fail.
1046 */
1047 if (ftruncate(fd, memory))
1048 perror("ftruncate");
1049
1050 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1051 if (area == MAP_FAILED) {
1052 perror("file_ram_alloc: can't mmap RAM pages");
1053 close(fd);
1054 return (NULL);
1055 }
1056
1057 if (mem_prealloc) {
1058 int ret, i;
1059 struct sigaction act, oldact;
1060 sigset_t set, oldset;
1061
1062 memset(&act, 0, sizeof(act));
1063 act.sa_handler = &sigbus_handler;
1064 act.sa_flags = 0;
1065
1066 ret = sigaction(SIGBUS, &act, &oldact);
1067 if (ret) {
1068 perror("file_ram_alloc: failed to install signal handler");
1069 exit(1);
1070 }
1071
1072 /* unblock SIGBUS */
1073 sigemptyset(&set);
1074 sigaddset(&set, SIGBUS);
1075 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1076
1077 if (sigsetjmp(sigjump, 1)) {
1078 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1079 exit(1);
1080 }
1081
1082 /* MAP_POPULATE silently ignores failures */
1083 for (i = 0; i < (memory/hpagesize)-1; i++) {
1084 memset(area + (hpagesize*i), 0, 1);
1085 }
1086
1087 ret = sigaction(SIGBUS, &oldact, NULL);
1088 if (ret) {
1089 perror("file_ram_alloc: failed to reinstall signal handler");
1090 exit(1);
1091 }
1092
1093 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1094 }
1095
1096 block->fd = fd;
1097 return area;
1098 }
1099 #else
1100 static void *file_ram_alloc(RAMBlock *block,
1101 ram_addr_t memory,
1102 const char *path)
1103 {
1104 fprintf(stderr, "-mem-path not supported on this host\n");
1105 exit(1);
1106 }
1107 #endif
1108
1109 static ram_addr_t find_ram_offset(ram_addr_t size)
1110 {
1111 RAMBlock *block, *next_block;
1112 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1113
1114 assert(size != 0); /* it would hand out same offset multiple times */
1115
1116 if (QTAILQ_EMPTY(&ram_list.blocks))
1117 return 0;
1118
1119 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1120 ram_addr_t end, next = RAM_ADDR_MAX;
1121
1122 end = block->offset + block->length;
1123
1124 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1125 if (next_block->offset >= end) {
1126 next = MIN(next, next_block->offset);
1127 }
1128 }
1129 if (next - end >= size && next - end < mingap) {
1130 offset = end;
1131 mingap = next - end;
1132 }
1133 }
1134
1135 if (offset == RAM_ADDR_MAX) {
1136 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1137 (uint64_t)size);
1138 abort();
1139 }
1140
1141 return offset;
1142 }
1143
1144 ram_addr_t last_ram_offset(void)
1145 {
1146 RAMBlock *block;
1147 ram_addr_t last = 0;
1148
1149 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1150 last = MAX(last, block->offset + block->length);
1151
1152 return last;
1153 }
1154
1155 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1156 {
1157 int ret;
1158
1159 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1160 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1161 "dump-guest-core", true)) {
1162 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1163 if (ret) {
1164 perror("qemu_madvise");
1165 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1166 "but dump_guest_core=off specified\n");
1167 }
1168 }
1169 }
1170
1171 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1172 {
1173 RAMBlock *new_block, *block;
1174
1175 new_block = NULL;
1176 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1177 if (block->offset == addr) {
1178 new_block = block;
1179 break;
1180 }
1181 }
1182 assert(new_block);
1183 assert(!new_block->idstr[0]);
1184
1185 if (dev) {
1186 char *id = qdev_get_dev_path(dev);
1187 if (id) {
1188 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1189 g_free(id);
1190 }
1191 }
1192 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1193
1194 /* This assumes the iothread lock is taken here too. */
1195 qemu_mutex_lock_ramlist();
1196 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1197 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1198 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1199 new_block->idstr);
1200 abort();
1201 }
1202 }
1203 qemu_mutex_unlock_ramlist();
1204 }
1205
1206 static int memory_try_enable_merging(void *addr, size_t len)
1207 {
1208 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1209 /* disabled by the user */
1210 return 0;
1211 }
1212
1213 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1214 }
1215
1216 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1217 MemoryRegion *mr)
1218 {
1219 RAMBlock *block, *new_block;
1220
1221 size = TARGET_PAGE_ALIGN(size);
1222 new_block = g_malloc0(sizeof(*new_block));
1223 new_block->fd = -1;
1224
1225 /* This assumes the iothread lock is taken here too. */
1226 qemu_mutex_lock_ramlist();
1227 new_block->mr = mr;
1228 new_block->offset = find_ram_offset(size);
1229 if (host) {
1230 new_block->host = host;
1231 new_block->flags |= RAM_PREALLOC_MASK;
1232 } else if (xen_enabled()) {
1233 if (mem_path) {
1234 fprintf(stderr, "-mem-path not supported with Xen\n");
1235 exit(1);
1236 }
1237 xen_ram_alloc(new_block->offset, size, mr);
1238 } else {
1239 if (mem_path) {
1240 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1241 /*
1242 * file_ram_alloc() needs to allocate just like
1243 * phys_mem_alloc, but we haven't bothered to provide
1244 * a hook there.
1245 */
1246 fprintf(stderr,
1247 "-mem-path not supported with this accelerator\n");
1248 exit(1);
1249 }
1250 new_block->host = file_ram_alloc(new_block, size, mem_path);
1251 }
1252 if (!new_block->host) {
1253 new_block->host = phys_mem_alloc(size);
1254 if (!new_block->host) {
1255 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1256 new_block->mr->name, strerror(errno));
1257 exit(1);
1258 }
1259 memory_try_enable_merging(new_block->host, size);
1260 }
1261 }
1262 new_block->length = size;
1263
1264 /* Keep the list sorted from biggest to smallest block. */
1265 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1266 if (block->length < new_block->length) {
1267 break;
1268 }
1269 }
1270 if (block) {
1271 QTAILQ_INSERT_BEFORE(block, new_block, next);
1272 } else {
1273 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1274 }
1275 ram_list.mru_block = NULL;
1276
1277 ram_list.version++;
1278 qemu_mutex_unlock_ramlist();
1279
1280 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1281 last_ram_offset() >> TARGET_PAGE_BITS);
1282 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1283 0, size >> TARGET_PAGE_BITS);
1284 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1285
1286 qemu_ram_setup_dump(new_block->host, size);
1287 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1288 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1289
1290 if (kvm_enabled())
1291 kvm_setup_guest_memory(new_block->host, size);
1292
1293 return new_block->offset;
1294 }
1295
1296 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1297 {
1298 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1299 }
1300
1301 void qemu_ram_free_from_ptr(ram_addr_t addr)
1302 {
1303 RAMBlock *block;
1304
1305 /* This assumes the iothread lock is taken here too. */
1306 qemu_mutex_lock_ramlist();
1307 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1308 if (addr == block->offset) {
1309 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1310 ram_list.mru_block = NULL;
1311 ram_list.version++;
1312 g_free(block);
1313 break;
1314 }
1315 }
1316 qemu_mutex_unlock_ramlist();
1317 }
1318
1319 void qemu_ram_free(ram_addr_t addr)
1320 {
1321 RAMBlock *block;
1322
1323 /* This assumes the iothread lock is taken here too. */
1324 qemu_mutex_lock_ramlist();
1325 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1326 if (addr == block->offset) {
1327 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1328 ram_list.mru_block = NULL;
1329 ram_list.version++;
1330 if (block->flags & RAM_PREALLOC_MASK) {
1331 ;
1332 } else if (xen_enabled()) {
1333 xen_invalidate_map_cache_entry(block->host);
1334 #ifndef _WIN32
1335 } else if (block->fd >= 0) {
1336 munmap(block->host, block->length);
1337 close(block->fd);
1338 #endif
1339 } else {
1340 qemu_anon_ram_free(block->host, block->length);
1341 }
1342 g_free(block);
1343 break;
1344 }
1345 }
1346 qemu_mutex_unlock_ramlist();
1347
1348 }
1349
1350 #ifndef _WIN32
1351 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1352 {
1353 RAMBlock *block;
1354 ram_addr_t offset;
1355 int flags;
1356 void *area, *vaddr;
1357
1358 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1359 offset = addr - block->offset;
1360 if (offset < block->length) {
1361 vaddr = block->host + offset;
1362 if (block->flags & RAM_PREALLOC_MASK) {
1363 ;
1364 } else if (xen_enabled()) {
1365 abort();
1366 } else {
1367 flags = MAP_FIXED;
1368 munmap(vaddr, length);
1369 if (block->fd >= 0) {
1370 #ifdef MAP_POPULATE
1371 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1372 MAP_PRIVATE;
1373 #else
1374 flags |= MAP_PRIVATE;
1375 #endif
1376 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1377 flags, block->fd, offset);
1378 } else {
1379 /*
1380 * Remap needs to match alloc. Accelerators that
1381 * set phys_mem_alloc never remap. If they did,
1382 * we'd need a remap hook here.
1383 */
1384 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1385
1386 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1387 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1388 flags, -1, 0);
1389 }
1390 if (area != vaddr) {
1391 fprintf(stderr, "Could not remap addr: "
1392 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1393 length, addr);
1394 exit(1);
1395 }
1396 memory_try_enable_merging(vaddr, length);
1397 qemu_ram_setup_dump(vaddr, length);
1398 }
1399 return;
1400 }
1401 }
1402 }
1403 #endif /* !_WIN32 */
1404
1405 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1406 With the exception of the softmmu code in this file, this should
1407 only be used for local memory (e.g. video ram) that the device owns,
1408 and knows it isn't going to access beyond the end of the block.
1409
1410 It should not be used for general purpose DMA.
1411 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1412 */
1413 void *qemu_get_ram_ptr(ram_addr_t addr)
1414 {
1415 RAMBlock *block = qemu_get_ram_block(addr);
1416
1417 if (xen_enabled()) {
1418 /* We need to check if the requested address is in the RAM
1419 * because we don't want to map the entire memory in QEMU.
1420 * In that case just map until the end of the page.
1421 */
1422 if (block->offset == 0) {
1423 return xen_map_cache(addr, 0, 0);
1424 } else if (block->host == NULL) {
1425 block->host =
1426 xen_map_cache(block->offset, block->length, 1);
1427 }
1428 }
1429 return block->host + (addr - block->offset);
1430 }
1431
1432 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1433 * but takes a size argument */
1434 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1435 {
1436 if (*size == 0) {
1437 return NULL;
1438 }
1439 if (xen_enabled()) {
1440 return xen_map_cache(addr, *size, 1);
1441 } else {
1442 RAMBlock *block;
1443
1444 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1445 if (addr - block->offset < block->length) {
1446 if (addr - block->offset + *size > block->length)
1447 *size = block->length - addr + block->offset;
1448 return block->host + (addr - block->offset);
1449 }
1450 }
1451
1452 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1453 abort();
1454 }
1455 }
1456
1457 /* Some of the softmmu routines need to translate from a host pointer
1458 (typically a TLB entry) back to a ram offset. */
1459 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1460 {
1461 RAMBlock *block;
1462 uint8_t *host = ptr;
1463
1464 if (xen_enabled()) {
1465 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1466 return qemu_get_ram_block(*ram_addr)->mr;
1467 }
1468
1469 block = ram_list.mru_block;
1470 if (block && block->host && host - block->host < block->length) {
1471 goto found;
1472 }
1473
1474 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1475 /* This case append when the block is not mapped. */
1476 if (block->host == NULL) {
1477 continue;
1478 }
1479 if (host - block->host < block->length) {
1480 goto found;
1481 }
1482 }
1483
1484 return NULL;
1485
1486 found:
1487 *ram_addr = block->offset + (host - block->host);
1488 return block->mr;
1489 }
1490
1491 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1492 uint64_t val, unsigned size)
1493 {
1494 int dirty_flags;
1495 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1496 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1497 tb_invalidate_phys_page_fast(ram_addr, size);
1498 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1499 }
1500 switch (size) {
1501 case 1:
1502 stb_p(qemu_get_ram_ptr(ram_addr), val);
1503 break;
1504 case 2:
1505 stw_p(qemu_get_ram_ptr(ram_addr), val);
1506 break;
1507 case 4:
1508 stl_p(qemu_get_ram_ptr(ram_addr), val);
1509 break;
1510 default:
1511 abort();
1512 }
1513 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1514 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1515 /* we remove the notdirty callback only if the code has been
1516 flushed */
1517 if (dirty_flags == 0xff) {
1518 CPUArchState *env = current_cpu->env_ptr;
1519 tlb_set_dirty(env, env->mem_io_vaddr);
1520 }
1521 }
1522
1523 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1524 unsigned size, bool is_write)
1525 {
1526 return is_write;
1527 }
1528
1529 static const MemoryRegionOps notdirty_mem_ops = {
1530 .write = notdirty_mem_write,
1531 .valid.accepts = notdirty_mem_accepts,
1532 .endianness = DEVICE_NATIVE_ENDIAN,
1533 };
1534
1535 /* Generate a debug exception if a watchpoint has been hit. */
1536 static void check_watchpoint(int offset, int len_mask, int flags)
1537 {
1538 CPUArchState *env = current_cpu->env_ptr;
1539 target_ulong pc, cs_base;
1540 target_ulong vaddr;
1541 CPUWatchpoint *wp;
1542 int cpu_flags;
1543
1544 if (env->watchpoint_hit) {
1545 /* We re-entered the check after replacing the TB. Now raise
1546 * the debug interrupt so that is will trigger after the
1547 * current instruction. */
1548 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1549 return;
1550 }
1551 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1552 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1553 if ((vaddr == (wp->vaddr & len_mask) ||
1554 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1555 wp->flags |= BP_WATCHPOINT_HIT;
1556 if (!env->watchpoint_hit) {
1557 env->watchpoint_hit = wp;
1558 tb_check_watchpoint(env);
1559 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1560 env->exception_index = EXCP_DEBUG;
1561 cpu_loop_exit(env);
1562 } else {
1563 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1564 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1565 cpu_resume_from_signal(env, NULL);
1566 }
1567 }
1568 } else {
1569 wp->flags &= ~BP_WATCHPOINT_HIT;
1570 }
1571 }
1572 }
1573
1574 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1575 so these check for a hit then pass through to the normal out-of-line
1576 phys routines. */
1577 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1578 unsigned size)
1579 {
1580 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1581 switch (size) {
1582 case 1: return ldub_phys(addr);
1583 case 2: return lduw_phys(addr);
1584 case 4: return ldl_phys(addr);
1585 default: abort();
1586 }
1587 }
1588
1589 static void watch_mem_write(void *opaque, hwaddr addr,
1590 uint64_t val, unsigned size)
1591 {
1592 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1593 switch (size) {
1594 case 1:
1595 stb_phys(addr, val);
1596 break;
1597 case 2:
1598 stw_phys(addr, val);
1599 break;
1600 case 4:
1601 stl_phys(addr, val);
1602 break;
1603 default: abort();
1604 }
1605 }
1606
1607 static const MemoryRegionOps watch_mem_ops = {
1608 .read = watch_mem_read,
1609 .write = watch_mem_write,
1610 .endianness = DEVICE_NATIVE_ENDIAN,
1611 };
1612
1613 static uint64_t subpage_read(void *opaque, hwaddr addr,
1614 unsigned len)
1615 {
1616 subpage_t *subpage = opaque;
1617 uint8_t buf[4];
1618
1619 #if defined(DEBUG_SUBPAGE)
1620 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1621 subpage, len, addr);
1622 #endif
1623 address_space_read(subpage->as, addr + subpage->base, buf, len);
1624 switch (len) {
1625 case 1:
1626 return ldub_p(buf);
1627 case 2:
1628 return lduw_p(buf);
1629 case 4:
1630 return ldl_p(buf);
1631 default:
1632 abort();
1633 }
1634 }
1635
1636 static void subpage_write(void *opaque, hwaddr addr,
1637 uint64_t value, unsigned len)
1638 {
1639 subpage_t *subpage = opaque;
1640 uint8_t buf[4];
1641
1642 #if defined(DEBUG_SUBPAGE)
1643 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1644 " value %"PRIx64"\n",
1645 __func__, subpage, len, addr, value);
1646 #endif
1647 switch (len) {
1648 case 1:
1649 stb_p(buf, value);
1650 break;
1651 case 2:
1652 stw_p(buf, value);
1653 break;
1654 case 4:
1655 stl_p(buf, value);
1656 break;
1657 default:
1658 abort();
1659 }
1660 address_space_write(subpage->as, addr + subpage->base, buf, len);
1661 }
1662
1663 static bool subpage_accepts(void *opaque, hwaddr addr,
1664 unsigned len, bool is_write)
1665 {
1666 subpage_t *subpage = opaque;
1667 #if defined(DEBUG_SUBPAGE)
1668 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1669 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1670 #endif
1671
1672 return address_space_access_valid(subpage->as, addr + subpage->base,
1673 len, is_write);
1674 }
1675
1676 static const MemoryRegionOps subpage_ops = {
1677 .read = subpage_read,
1678 .write = subpage_write,
1679 .valid.accepts = subpage_accepts,
1680 .endianness = DEVICE_NATIVE_ENDIAN,
1681 };
1682
1683 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1684 uint16_t section)
1685 {
1686 int idx, eidx;
1687
1688 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1689 return -1;
1690 idx = SUBPAGE_IDX(start);
1691 eidx = SUBPAGE_IDX(end);
1692 #if defined(DEBUG_SUBPAGE)
1693 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1694 __func__, mmio, start, end, idx, eidx, section);
1695 #endif
1696 for (; idx <= eidx; idx++) {
1697 mmio->sub_section[idx] = section;
1698 }
1699
1700 return 0;
1701 }
1702
1703 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1704 {
1705 subpage_t *mmio;
1706
1707 mmio = g_malloc0(sizeof(subpage_t));
1708
1709 mmio->as = as;
1710 mmio->base = base;
1711 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1712 "subpage", TARGET_PAGE_SIZE);
1713 mmio->iomem.subpage = true;
1714 #if defined(DEBUG_SUBPAGE)
1715 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1716 mmio, base, TARGET_PAGE_SIZE);
1717 #endif
1718 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1719
1720 return mmio;
1721 }
1722
1723 static uint16_t dummy_section(MemoryRegion *mr)
1724 {
1725 MemoryRegionSection section = {
1726 .mr = mr,
1727 .offset_within_address_space = 0,
1728 .offset_within_region = 0,
1729 .size = int128_2_64(),
1730 };
1731
1732 return phys_section_add(&section);
1733 }
1734
1735 MemoryRegion *iotlb_to_region(hwaddr index)
1736 {
1737 return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
1738 }
1739
1740 static void io_mem_init(void)
1741 {
1742 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1743 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1744 "unassigned", UINT64_MAX);
1745 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1746 "notdirty", UINT64_MAX);
1747 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1748 "watch", UINT64_MAX);
1749 }
1750
1751 static void mem_begin(MemoryListener *listener)
1752 {
1753 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1754 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1755
1756 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1757 d->as = as;
1758 as->next_dispatch = d;
1759 }
1760
1761 static void mem_commit(MemoryListener *listener)
1762 {
1763 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1764 AddressSpaceDispatch *cur = as->dispatch;
1765 AddressSpaceDispatch *next = as->next_dispatch;
1766
1767 next->nodes = next_map.nodes;
1768 next->sections = next_map.sections;
1769
1770 phys_page_compact_all(next, next_map.nodes_nb);
1771
1772 as->dispatch = next;
1773 g_free(cur);
1774 }
1775
1776 static void core_begin(MemoryListener *listener)
1777 {
1778 uint16_t n;
1779
1780 prev_map = g_new(PhysPageMap, 1);
1781 *prev_map = next_map;
1782
1783 memset(&next_map, 0, sizeof(next_map));
1784 n = dummy_section(&io_mem_unassigned);
1785 assert(n == PHYS_SECTION_UNASSIGNED);
1786 n = dummy_section(&io_mem_notdirty);
1787 assert(n == PHYS_SECTION_NOTDIRTY);
1788 n = dummy_section(&io_mem_rom);
1789 assert(n == PHYS_SECTION_ROM);
1790 n = dummy_section(&io_mem_watch);
1791 assert(n == PHYS_SECTION_WATCH);
1792 }
1793
1794 /* This listener's commit run after the other AddressSpaceDispatch listeners'.
1795 * All AddressSpaceDispatch instances have switched to the next map.
1796 */
1797 static void core_commit(MemoryListener *listener)
1798 {
1799 phys_sections_free(prev_map);
1800 }
1801
1802 static void tcg_commit(MemoryListener *listener)
1803 {
1804 CPUState *cpu;
1805
1806 /* since each CPU stores ram addresses in its TLB cache, we must
1807 reset the modified entries */
1808 /* XXX: slow ! */
1809 CPU_FOREACH(cpu) {
1810 CPUArchState *env = cpu->env_ptr;
1811
1812 tlb_flush(env, 1);
1813 }
1814 }
1815
1816 static void core_log_global_start(MemoryListener *listener)
1817 {
1818 cpu_physical_memory_set_dirty_tracking(1);
1819 }
1820
1821 static void core_log_global_stop(MemoryListener *listener)
1822 {
1823 cpu_physical_memory_set_dirty_tracking(0);
1824 }
1825
1826 static MemoryListener core_memory_listener = {
1827 .begin = core_begin,
1828 .commit = core_commit,
1829 .log_global_start = core_log_global_start,
1830 .log_global_stop = core_log_global_stop,
1831 .priority = 1,
1832 };
1833
1834 static MemoryListener tcg_memory_listener = {
1835 .commit = tcg_commit,
1836 };
1837
1838 void address_space_init_dispatch(AddressSpace *as)
1839 {
1840 as->dispatch = NULL;
1841 as->dispatch_listener = (MemoryListener) {
1842 .begin = mem_begin,
1843 .commit = mem_commit,
1844 .region_add = mem_add,
1845 .region_nop = mem_add,
1846 .priority = 0,
1847 };
1848 memory_listener_register(&as->dispatch_listener, as);
1849 }
1850
1851 void address_space_destroy_dispatch(AddressSpace *as)
1852 {
1853 AddressSpaceDispatch *d = as->dispatch;
1854
1855 memory_listener_unregister(&as->dispatch_listener);
1856 g_free(d);
1857 as->dispatch = NULL;
1858 }
1859
1860 static void memory_map_init(void)
1861 {
1862 system_memory = g_malloc(sizeof(*system_memory));
1863
1864 assert(ADDR_SPACE_BITS <= 64);
1865
1866 memory_region_init(system_memory, NULL, "system",
1867 ADDR_SPACE_BITS == 64 ?
1868 UINT64_MAX : (0x1ULL << ADDR_SPACE_BITS));
1869 address_space_init(&address_space_memory, system_memory, "memory");
1870
1871 system_io = g_malloc(sizeof(*system_io));
1872 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1873 65536);
1874 address_space_init(&address_space_io, system_io, "I/O");
1875
1876 memory_listener_register(&core_memory_listener, &address_space_memory);
1877 if (tcg_enabled()) {
1878 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1879 }
1880 }
1881
1882 MemoryRegion *get_system_memory(void)
1883 {
1884 return system_memory;
1885 }
1886
1887 MemoryRegion *get_system_io(void)
1888 {
1889 return system_io;
1890 }
1891
1892 #endif /* !defined(CONFIG_USER_ONLY) */
1893
1894 /* physical memory access (slow version, mainly for debug) */
1895 #if defined(CONFIG_USER_ONLY)
1896 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1897 uint8_t *buf, int len, int is_write)
1898 {
1899 int l, flags;
1900 target_ulong page;
1901 void * p;
1902
1903 while (len > 0) {
1904 page = addr & TARGET_PAGE_MASK;
1905 l = (page + TARGET_PAGE_SIZE) - addr;
1906 if (l > len)
1907 l = len;
1908 flags = page_get_flags(page);
1909 if (!(flags & PAGE_VALID))
1910 return -1;
1911 if (is_write) {
1912 if (!(flags & PAGE_WRITE))
1913 return -1;
1914 /* XXX: this code should not depend on lock_user */
1915 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1916 return -1;
1917 memcpy(p, buf, l);
1918 unlock_user(p, addr, l);
1919 } else {
1920 if (!(flags & PAGE_READ))
1921 return -1;
1922 /* XXX: this code should not depend on lock_user */
1923 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1924 return -1;
1925 memcpy(buf, p, l);
1926 unlock_user(p, addr, 0);
1927 }
1928 len -= l;
1929 buf += l;
1930 addr += l;
1931 }
1932 return 0;
1933 }
1934
1935 #else
1936
1937 static void invalidate_and_set_dirty(hwaddr addr,
1938 hwaddr length)
1939 {
1940 if (!cpu_physical_memory_is_dirty(addr)) {
1941 /* invalidate code */
1942 tb_invalidate_phys_page_range(addr, addr + length, 0);
1943 /* set dirty bit */
1944 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1945 }
1946 xen_modified_memory(addr, length);
1947 }
1948
1949 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1950 {
1951 if (memory_region_is_ram(mr)) {
1952 return !(is_write && mr->readonly);
1953 }
1954 if (memory_region_is_romd(mr)) {
1955 return !is_write;
1956 }
1957
1958 return false;
1959 }
1960
1961 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1962 {
1963 unsigned access_size_max = mr->ops->valid.max_access_size;
1964
1965 /* Regions are assumed to support 1-4 byte accesses unless
1966 otherwise specified. */
1967 if (access_size_max == 0) {
1968 access_size_max = 4;
1969 }
1970
1971 /* Bound the maximum access by the alignment of the address. */
1972 if (!mr->ops->impl.unaligned) {
1973 unsigned align_size_max = addr & -addr;
1974 if (align_size_max != 0 && align_size_max < access_size_max) {
1975 access_size_max = align_size_max;
1976 }
1977 }
1978
1979 /* Don't attempt accesses larger than the maximum. */
1980 if (l > access_size_max) {
1981 l = access_size_max;
1982 }
1983 if (l & (l - 1)) {
1984 l = 1 << (qemu_fls(l) - 1);
1985 }
1986
1987 return l;
1988 }
1989
1990 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1991 int len, bool is_write)
1992 {
1993 hwaddr l;
1994 uint8_t *ptr;
1995 uint64_t val;
1996 hwaddr addr1;
1997 MemoryRegion *mr;
1998 bool error = false;
1999
2000 while (len > 0) {
2001 l = len;
2002 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2003
2004 if (is_write) {
2005 if (!memory_access_is_direct(mr, is_write)) {
2006 l = memory_access_size(mr, l, addr1);
2007 /* XXX: could force current_cpu to NULL to avoid
2008 potential bugs */
2009 switch (l) {
2010 case 8:
2011 /* 64 bit write access */
2012 val = ldq_p(buf);
2013 error |= io_mem_write(mr, addr1, val, 8);
2014 break;
2015 case 4:
2016 /* 32 bit write access */
2017 val = ldl_p(buf);
2018 error |= io_mem_write(mr, addr1, val, 4);
2019 break;
2020 case 2:
2021 /* 16 bit write access */
2022 val = lduw_p(buf);
2023 error |= io_mem_write(mr, addr1, val, 2);
2024 break;
2025 case 1:
2026 /* 8 bit write access */
2027 val = ldub_p(buf);
2028 error |= io_mem_write(mr, addr1, val, 1);
2029 break;
2030 default:
2031 abort();
2032 }
2033 } else {
2034 addr1 += memory_region_get_ram_addr(mr);
2035 /* RAM case */
2036 ptr = qemu_get_ram_ptr(addr1);
2037 memcpy(ptr, buf, l);
2038 invalidate_and_set_dirty(addr1, l);
2039 }
2040 } else {
2041 if (!memory_access_is_direct(mr, is_write)) {
2042 /* I/O case */
2043 l = memory_access_size(mr, l, addr1);
2044 switch (l) {
2045 case 8:
2046 /* 64 bit read access */
2047 error |= io_mem_read(mr, addr1, &val, 8);
2048 stq_p(buf, val);
2049 break;
2050 case 4:
2051 /* 32 bit read access */
2052 error |= io_mem_read(mr, addr1, &val, 4);
2053 stl_p(buf, val);
2054 break;
2055 case 2:
2056 /* 16 bit read access */
2057 error |= io_mem_read(mr, addr1, &val, 2);
2058 stw_p(buf, val);
2059 break;
2060 case 1:
2061 /* 8 bit read access */
2062 error |= io_mem_read(mr, addr1, &val, 1);
2063 stb_p(buf, val);
2064 break;
2065 default:
2066 abort();
2067 }
2068 } else {
2069 /* RAM case */
2070 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2071 memcpy(buf, ptr, l);
2072 }
2073 }
2074 len -= l;
2075 buf += l;
2076 addr += l;
2077 }
2078
2079 return error;
2080 }
2081
2082 bool address_space_write(AddressSpace *as, hwaddr addr,
2083 const uint8_t *buf, int len)
2084 {
2085 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2086 }
2087
2088 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2089 {
2090 return address_space_rw(as, addr, buf, len, false);
2091 }
2092
2093
2094 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2095 int len, int is_write)
2096 {
2097 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2098 }
2099
2100 /* used for ROM loading : can write in RAM and ROM */
2101 void cpu_physical_memory_write_rom(hwaddr addr,
2102 const uint8_t *buf, int len)
2103 {
2104 hwaddr l;
2105 uint8_t *ptr;
2106 hwaddr addr1;
2107 MemoryRegion *mr;
2108
2109 while (len > 0) {
2110 l = len;
2111 mr = address_space_translate(&address_space_memory,
2112 addr, &addr1, &l, true);
2113
2114 if (!(memory_region_is_ram(mr) ||
2115 memory_region_is_romd(mr))) {
2116 /* do nothing */
2117 } else {
2118 addr1 += memory_region_get_ram_addr(mr);
2119 /* ROM/RAM case */
2120 ptr = qemu_get_ram_ptr(addr1);
2121 memcpy(ptr, buf, l);
2122 invalidate_and_set_dirty(addr1, l);
2123 }
2124 len -= l;
2125 buf += l;
2126 addr += l;
2127 }
2128 }
2129
2130 typedef struct {
2131 MemoryRegion *mr;
2132 void *buffer;
2133 hwaddr addr;
2134 hwaddr len;
2135 } BounceBuffer;
2136
2137 static BounceBuffer bounce;
2138
2139 typedef struct MapClient {
2140 void *opaque;
2141 void (*callback)(void *opaque);
2142 QLIST_ENTRY(MapClient) link;
2143 } MapClient;
2144
2145 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2146 = QLIST_HEAD_INITIALIZER(map_client_list);
2147
2148 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2149 {
2150 MapClient *client = g_malloc(sizeof(*client));
2151
2152 client->opaque = opaque;
2153 client->callback = callback;
2154 QLIST_INSERT_HEAD(&map_client_list, client, link);
2155 return client;
2156 }
2157
2158 static void cpu_unregister_map_client(void *_client)
2159 {
2160 MapClient *client = (MapClient *)_client;
2161
2162 QLIST_REMOVE(client, link);
2163 g_free(client);
2164 }
2165
2166 static void cpu_notify_map_clients(void)
2167 {
2168 MapClient *client;
2169
2170 while (!QLIST_EMPTY(&map_client_list)) {
2171 client = QLIST_FIRST(&map_client_list);
2172 client->callback(client->opaque);
2173 cpu_unregister_map_client(client);
2174 }
2175 }
2176
2177 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2178 {
2179 MemoryRegion *mr;
2180 hwaddr l, xlat;
2181
2182 while (len > 0) {
2183 l = len;
2184 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2185 if (!memory_access_is_direct(mr, is_write)) {
2186 l = memory_access_size(mr, l, addr);
2187 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2188 return false;
2189 }
2190 }
2191
2192 len -= l;
2193 addr += l;
2194 }
2195 return true;
2196 }
2197
2198 /* Map a physical memory region into a host virtual address.
2199 * May map a subset of the requested range, given by and returned in *plen.
2200 * May return NULL if resources needed to perform the mapping are exhausted.
2201 * Use only for reads OR writes - not for read-modify-write operations.
2202 * Use cpu_register_map_client() to know when retrying the map operation is
2203 * likely to succeed.
2204 */
2205 void *address_space_map(AddressSpace *as,
2206 hwaddr addr,
2207 hwaddr *plen,
2208 bool is_write)
2209 {
2210 hwaddr len = *plen;
2211 hwaddr done = 0;
2212 hwaddr l, xlat, base;
2213 MemoryRegion *mr, *this_mr;
2214 ram_addr_t raddr;
2215
2216 if (len == 0) {
2217 return NULL;
2218 }
2219
2220 l = len;
2221 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2222 if (!memory_access_is_direct(mr, is_write)) {
2223 if (bounce.buffer) {
2224 return NULL;
2225 }
2226 /* Avoid unbounded allocations */
2227 l = MIN(l, TARGET_PAGE_SIZE);
2228 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2229 bounce.addr = addr;
2230 bounce.len = l;
2231
2232 memory_region_ref(mr);
2233 bounce.mr = mr;
2234 if (!is_write) {
2235 address_space_read(as, addr, bounce.buffer, l);
2236 }
2237
2238 *plen = l;
2239 return bounce.buffer;
2240 }
2241
2242 base = xlat;
2243 raddr = memory_region_get_ram_addr(mr);
2244
2245 for (;;) {
2246 len -= l;
2247 addr += l;
2248 done += l;
2249 if (len == 0) {
2250 break;
2251 }
2252
2253 l = len;
2254 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2255 if (this_mr != mr || xlat != base + done) {
2256 break;
2257 }
2258 }
2259
2260 memory_region_ref(mr);
2261 *plen = done;
2262 return qemu_ram_ptr_length(raddr + base, plen);
2263 }
2264
2265 /* Unmaps a memory region previously mapped by address_space_map().
2266 * Will also mark the memory as dirty if is_write == 1. access_len gives
2267 * the amount of memory that was actually read or written by the caller.
2268 */
2269 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2270 int is_write, hwaddr access_len)
2271 {
2272 if (buffer != bounce.buffer) {
2273 MemoryRegion *mr;
2274 ram_addr_t addr1;
2275
2276 mr = qemu_ram_addr_from_host(buffer, &addr1);
2277 assert(mr != NULL);
2278 if (is_write) {
2279 while (access_len) {
2280 unsigned l;
2281 l = TARGET_PAGE_SIZE;
2282 if (l > access_len)
2283 l = access_len;
2284 invalidate_and_set_dirty(addr1, l);
2285 addr1 += l;
2286 access_len -= l;
2287 }
2288 }
2289 if (xen_enabled()) {
2290 xen_invalidate_map_cache_entry(buffer);
2291 }
2292 memory_region_unref(mr);
2293 return;
2294 }
2295 if (is_write) {
2296 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2297 }
2298 qemu_vfree(bounce.buffer);
2299 bounce.buffer = NULL;
2300 memory_region_unref(bounce.mr);
2301 cpu_notify_map_clients();
2302 }
2303
2304 void *cpu_physical_memory_map(hwaddr addr,
2305 hwaddr *plen,
2306 int is_write)
2307 {
2308 return address_space_map(&address_space_memory, addr, plen, is_write);
2309 }
2310
2311 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2312 int is_write, hwaddr access_len)
2313 {
2314 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2315 }
2316
2317 /* warning: addr must be aligned */
2318 static inline uint32_t ldl_phys_internal(hwaddr addr,
2319 enum device_endian endian)
2320 {
2321 uint8_t *ptr;
2322 uint64_t val;
2323 MemoryRegion *mr;
2324 hwaddr l = 4;
2325 hwaddr addr1;
2326
2327 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2328 false);
2329 if (l < 4 || !memory_access_is_direct(mr, false)) {
2330 /* I/O case */
2331 io_mem_read(mr, addr1, &val, 4);
2332 #if defined(TARGET_WORDS_BIGENDIAN)
2333 if (endian == DEVICE_LITTLE_ENDIAN) {
2334 val = bswap32(val);
2335 }
2336 #else
2337 if (endian == DEVICE_BIG_ENDIAN) {
2338 val = bswap32(val);
2339 }
2340 #endif
2341 } else {
2342 /* RAM case */
2343 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2344 & TARGET_PAGE_MASK)
2345 + addr1);
2346 switch (endian) {
2347 case DEVICE_LITTLE_ENDIAN:
2348 val = ldl_le_p(ptr);
2349 break;
2350 case DEVICE_BIG_ENDIAN:
2351 val = ldl_be_p(ptr);
2352 break;
2353 default:
2354 val = ldl_p(ptr);
2355 break;
2356 }
2357 }
2358 return val;
2359 }
2360
2361 uint32_t ldl_phys(hwaddr addr)
2362 {
2363 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2364 }
2365
2366 uint32_t ldl_le_phys(hwaddr addr)
2367 {
2368 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2369 }
2370
2371 uint32_t ldl_be_phys(hwaddr addr)
2372 {
2373 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2374 }
2375
2376 /* warning: addr must be aligned */
2377 static inline uint64_t ldq_phys_internal(hwaddr addr,
2378 enum device_endian endian)
2379 {
2380 uint8_t *ptr;
2381 uint64_t val;
2382 MemoryRegion *mr;
2383 hwaddr l = 8;
2384 hwaddr addr1;
2385
2386 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2387 false);
2388 if (l < 8 || !memory_access_is_direct(mr, false)) {
2389 /* I/O case */
2390 io_mem_read(mr, addr1, &val, 8);
2391 #if defined(TARGET_WORDS_BIGENDIAN)
2392 if (endian == DEVICE_LITTLE_ENDIAN) {
2393 val = bswap64(val);
2394 }
2395 #else
2396 if (endian == DEVICE_BIG_ENDIAN) {
2397 val = bswap64(val);
2398 }
2399 #endif
2400 } else {
2401 /* RAM case */
2402 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2403 & TARGET_PAGE_MASK)
2404 + addr1);
2405 switch (endian) {
2406 case DEVICE_LITTLE_ENDIAN:
2407 val = ldq_le_p(ptr);
2408 break;
2409 case DEVICE_BIG_ENDIAN:
2410 val = ldq_be_p(ptr);
2411 break;
2412 default:
2413 val = ldq_p(ptr);
2414 break;
2415 }
2416 }
2417 return val;
2418 }
2419
2420 uint64_t ldq_phys(hwaddr addr)
2421 {
2422 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2423 }
2424
2425 uint64_t ldq_le_phys(hwaddr addr)
2426 {
2427 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2428 }
2429
2430 uint64_t ldq_be_phys(hwaddr addr)
2431 {
2432 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2433 }
2434
2435 /* XXX: optimize */
2436 uint32_t ldub_phys(hwaddr addr)
2437 {
2438 uint8_t val;
2439 cpu_physical_memory_read(addr, &val, 1);
2440 return val;
2441 }
2442
2443 /* warning: addr must be aligned */
2444 static inline uint32_t lduw_phys_internal(hwaddr addr,
2445 enum device_endian endian)
2446 {
2447 uint8_t *ptr;
2448 uint64_t val;
2449 MemoryRegion *mr;
2450 hwaddr l = 2;
2451 hwaddr addr1;
2452
2453 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2454 false);
2455 if (l < 2 || !memory_access_is_direct(mr, false)) {
2456 /* I/O case */
2457 io_mem_read(mr, addr1, &val, 2);
2458 #if defined(TARGET_WORDS_BIGENDIAN)
2459 if (endian == DEVICE_LITTLE_ENDIAN) {
2460 val = bswap16(val);
2461 }
2462 #else
2463 if (endian == DEVICE_BIG_ENDIAN) {
2464 val = bswap16(val);
2465 }
2466 #endif
2467 } else {
2468 /* RAM case */
2469 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2470 & TARGET_PAGE_MASK)
2471 + addr1);
2472 switch (endian) {
2473 case DEVICE_LITTLE_ENDIAN:
2474 val = lduw_le_p(ptr);
2475 break;
2476 case DEVICE_BIG_ENDIAN:
2477 val = lduw_be_p(ptr);
2478 break;
2479 default:
2480 val = lduw_p(ptr);
2481 break;
2482 }
2483 }
2484 return val;
2485 }
2486
2487 uint32_t lduw_phys(hwaddr addr)
2488 {
2489 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2490 }
2491
2492 uint32_t lduw_le_phys(hwaddr addr)
2493 {
2494 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2495 }
2496
2497 uint32_t lduw_be_phys(hwaddr addr)
2498 {
2499 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2500 }
2501
2502 /* warning: addr must be aligned. The ram page is not masked as dirty
2503 and the code inside is not invalidated. It is useful if the dirty
2504 bits are used to track modified PTEs */
2505 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2506 {
2507 uint8_t *ptr;
2508 MemoryRegion *mr;
2509 hwaddr l = 4;
2510 hwaddr addr1;
2511
2512 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2513 true);
2514 if (l < 4 || !memory_access_is_direct(mr, true)) {
2515 io_mem_write(mr, addr1, val, 4);
2516 } else {
2517 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2518 ptr = qemu_get_ram_ptr(addr1);
2519 stl_p(ptr, val);
2520
2521 if (unlikely(in_migration)) {
2522 if (!cpu_physical_memory_is_dirty(addr1)) {
2523 /* invalidate code */
2524 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2525 /* set dirty bit */
2526 cpu_physical_memory_set_dirty_flags(
2527 addr1, (0xff & ~CODE_DIRTY_FLAG));
2528 }
2529 }
2530 }
2531 }
2532
2533 /* warning: addr must be aligned */
2534 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2535 enum device_endian endian)
2536 {
2537 uint8_t *ptr;
2538 MemoryRegion *mr;
2539 hwaddr l = 4;
2540 hwaddr addr1;
2541
2542 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2543 true);
2544 if (l < 4 || !memory_access_is_direct(mr, true)) {
2545 #if defined(TARGET_WORDS_BIGENDIAN)
2546 if (endian == DEVICE_LITTLE_ENDIAN) {
2547 val = bswap32(val);
2548 }
2549 #else
2550 if (endian == DEVICE_BIG_ENDIAN) {
2551 val = bswap32(val);
2552 }
2553 #endif
2554 io_mem_write(mr, addr1, val, 4);
2555 } else {
2556 /* RAM case */
2557 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2558 ptr = qemu_get_ram_ptr(addr1);
2559 switch (endian) {
2560 case DEVICE_LITTLE_ENDIAN:
2561 stl_le_p(ptr, val);
2562 break;
2563 case DEVICE_BIG_ENDIAN:
2564 stl_be_p(ptr, val);
2565 break;
2566 default:
2567 stl_p(ptr, val);
2568 break;
2569 }
2570 invalidate_and_set_dirty(addr1, 4);
2571 }
2572 }
2573
2574 void stl_phys(hwaddr addr, uint32_t val)
2575 {
2576 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2577 }
2578
2579 void stl_le_phys(hwaddr addr, uint32_t val)
2580 {
2581 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2582 }
2583
2584 void stl_be_phys(hwaddr addr, uint32_t val)
2585 {
2586 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2587 }
2588
2589 /* XXX: optimize */
2590 void stb_phys(hwaddr addr, uint32_t val)
2591 {
2592 uint8_t v = val;
2593 cpu_physical_memory_write(addr, &v, 1);
2594 }
2595
2596 /* warning: addr must be aligned */
2597 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2598 enum device_endian endian)
2599 {
2600 uint8_t *ptr;
2601 MemoryRegion *mr;
2602 hwaddr l = 2;
2603 hwaddr addr1;
2604
2605 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2606 true);
2607 if (l < 2 || !memory_access_is_direct(mr, true)) {
2608 #if defined(TARGET_WORDS_BIGENDIAN)
2609 if (endian == DEVICE_LITTLE_ENDIAN) {
2610 val = bswap16(val);
2611 }
2612 #else
2613 if (endian == DEVICE_BIG_ENDIAN) {
2614 val = bswap16(val);
2615 }
2616 #endif
2617 io_mem_write(mr, addr1, val, 2);
2618 } else {
2619 /* RAM case */
2620 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2621 ptr = qemu_get_ram_ptr(addr1);
2622 switch (endian) {
2623 case DEVICE_LITTLE_ENDIAN:
2624 stw_le_p(ptr, val);
2625 break;
2626 case DEVICE_BIG_ENDIAN:
2627 stw_be_p(ptr, val);
2628 break;
2629 default:
2630 stw_p(ptr, val);
2631 break;
2632 }
2633 invalidate_and_set_dirty(addr1, 2);
2634 }
2635 }
2636
2637 void stw_phys(hwaddr addr, uint32_t val)
2638 {
2639 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2640 }
2641
2642 void stw_le_phys(hwaddr addr, uint32_t val)
2643 {
2644 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2645 }
2646
2647 void stw_be_phys(hwaddr addr, uint32_t val)
2648 {
2649 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2650 }
2651
2652 /* XXX: optimize */
2653 void stq_phys(hwaddr addr, uint64_t val)
2654 {
2655 val = tswap64(val);
2656 cpu_physical_memory_write(addr, &val, 8);
2657 }
2658
2659 void stq_le_phys(hwaddr addr, uint64_t val)
2660 {
2661 val = cpu_to_le64(val);
2662 cpu_physical_memory_write(addr, &val, 8);
2663 }
2664
2665 void stq_be_phys(hwaddr addr, uint64_t val)
2666 {
2667 val = cpu_to_be64(val);
2668 cpu_physical_memory_write(addr, &val, 8);
2669 }
2670
2671 /* virtual memory access for debug (includes writing to ROM) */
2672 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2673 uint8_t *buf, int len, int is_write)
2674 {
2675 int l;
2676 hwaddr phys_addr;
2677 target_ulong page;
2678
2679 while (len > 0) {
2680 page = addr & TARGET_PAGE_MASK;
2681 phys_addr = cpu_get_phys_page_debug(cpu, page);
2682 /* if no physical page mapped, return an error */
2683 if (phys_addr == -1)
2684 return -1;
2685 l = (page + TARGET_PAGE_SIZE) - addr;
2686 if (l > len)
2687 l = len;
2688 phys_addr += (addr & ~TARGET_PAGE_MASK);
2689 if (is_write)
2690 cpu_physical_memory_write_rom(phys_addr, buf, l);
2691 else
2692 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2693 len -= l;
2694 buf += l;
2695 addr += l;
2696 }
2697 return 0;
2698 }
2699 #endif
2700
2701 #if !defined(CONFIG_USER_ONLY)
2702
2703 /*
2704 * A helper function for the _utterly broken_ virtio device model to find out if
2705 * it's running on a big endian machine. Don't do this at home kids!
2706 */
2707 bool virtio_is_big_endian(void);
2708 bool virtio_is_big_endian(void)
2709 {
2710 #if defined(TARGET_WORDS_BIGENDIAN)
2711 return true;
2712 #else
2713 return false;
2714 #endif
2715 }
2716
2717 #endif
2718
2719 #ifndef CONFIG_USER_ONLY
2720 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2721 {
2722 MemoryRegion*mr;
2723 hwaddr l = 1;
2724
2725 mr = address_space_translate(&address_space_memory,
2726 phys_addr, &phys_addr, &l, false);
2727
2728 return !(memory_region_is_ram(mr) ||
2729 memory_region_is_romd(mr));
2730 }
2731
2732 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2733 {
2734 RAMBlock *block;
2735
2736 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2737 func(block->host, block->offset, block->length, opaque);
2738 }
2739 }
2740 #endif