]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
memory: cpu_physical_memory_mask_dirty_range() always clears a single flag
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
51
52 #include "exec/memory-internal.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static int in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 #endif
74
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
83
84 #if !defined(CONFIG_USER_ONLY)
85
86 typedef struct PhysPageEntry PhysPageEntry;
87
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
93 };
94
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
99
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
102
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104
105 typedef PhysPageEntry Node[P_L2_SIZE];
106
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
115
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
119 */
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
123 };
124
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
132
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
137
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140
141 static MemoryRegion io_mem_watch;
142 #endif
143
144 #if !defined(CONFIG_USER_ONLY)
145
146 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
147 {
148 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
149 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
151 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
152 }
153 }
154
155 static uint32_t phys_map_node_alloc(PhysPageMap *map)
156 {
157 unsigned i;
158 uint32_t ret;
159
160 ret = map->nodes_nb++;
161 assert(ret != PHYS_MAP_NODE_NIL);
162 assert(ret != map->nodes_nb_alloc);
163 for (i = 0; i < P_L2_SIZE; ++i) {
164 map->nodes[ret][i].skip = 1;
165 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
166 }
167 return ret;
168 }
169
170 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
171 hwaddr *index, hwaddr *nb, uint16_t leaf,
172 int level)
173 {
174 PhysPageEntry *p;
175 int i;
176 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
177
178 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
179 lp->ptr = phys_map_node_alloc(map);
180 p = map->nodes[lp->ptr];
181 if (level == 0) {
182 for (i = 0; i < P_L2_SIZE; i++) {
183 p[i].skip = 0;
184 p[i].ptr = PHYS_SECTION_UNASSIGNED;
185 }
186 }
187 } else {
188 p = map->nodes[lp->ptr];
189 }
190 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
191
192 while (*nb && lp < &p[P_L2_SIZE]) {
193 if ((*index & (step - 1)) == 0 && *nb >= step) {
194 lp->skip = 0;
195 lp->ptr = leaf;
196 *index += step;
197 *nb -= step;
198 } else {
199 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
200 }
201 ++lp;
202 }
203 }
204
205 static void phys_page_set(AddressSpaceDispatch *d,
206 hwaddr index, hwaddr nb,
207 uint16_t leaf)
208 {
209 /* Wildly overreserve - it doesn't matter much. */
210 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
211
212 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
213 }
214
215 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
216 * and update our entry so we can skip it and go directly to the destination.
217 */
218 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
219 {
220 unsigned valid_ptr = P_L2_SIZE;
221 int valid = 0;
222 PhysPageEntry *p;
223 int i;
224
225 if (lp->ptr == PHYS_MAP_NODE_NIL) {
226 return;
227 }
228
229 p = nodes[lp->ptr];
230 for (i = 0; i < P_L2_SIZE; i++) {
231 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
232 continue;
233 }
234
235 valid_ptr = i;
236 valid++;
237 if (p[i].skip) {
238 phys_page_compact(&p[i], nodes, compacted);
239 }
240 }
241
242 /* We can only compress if there's only one child. */
243 if (valid != 1) {
244 return;
245 }
246
247 assert(valid_ptr < P_L2_SIZE);
248
249 /* Don't compress if it won't fit in the # of bits we have. */
250 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
251 return;
252 }
253
254 lp->ptr = p[valid_ptr].ptr;
255 if (!p[valid_ptr].skip) {
256 /* If our only child is a leaf, make this a leaf. */
257 /* By design, we should have made this node a leaf to begin with so we
258 * should never reach here.
259 * But since it's so simple to handle this, let's do it just in case we
260 * change this rule.
261 */
262 lp->skip = 0;
263 } else {
264 lp->skip += p[valid_ptr].skip;
265 }
266 }
267
268 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
269 {
270 DECLARE_BITMAP(compacted, nodes_nb);
271
272 if (d->phys_map.skip) {
273 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
274 }
275 }
276
277 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
278 Node *nodes, MemoryRegionSection *sections)
279 {
280 PhysPageEntry *p;
281 hwaddr index = addr >> TARGET_PAGE_BITS;
282 int i;
283
284 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
285 if (lp.ptr == PHYS_MAP_NODE_NIL) {
286 return &sections[PHYS_SECTION_UNASSIGNED];
287 }
288 p = nodes[lp.ptr];
289 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
290 }
291
292 if (sections[lp.ptr].size.hi ||
293 range_covers_byte(sections[lp.ptr].offset_within_address_space,
294 sections[lp.ptr].size.lo, addr)) {
295 return &sections[lp.ptr];
296 } else {
297 return &sections[PHYS_SECTION_UNASSIGNED];
298 }
299 }
300
301 bool memory_region_is_unassigned(MemoryRegion *mr)
302 {
303 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
304 && mr != &io_mem_watch;
305 }
306
307 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
308 hwaddr addr,
309 bool resolve_subpage)
310 {
311 MemoryRegionSection *section;
312 subpage_t *subpage;
313
314 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
315 if (resolve_subpage && section->mr->subpage) {
316 subpage = container_of(section->mr, subpage_t, iomem);
317 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
318 }
319 return section;
320 }
321
322 static MemoryRegionSection *
323 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
324 hwaddr *plen, bool resolve_subpage)
325 {
326 MemoryRegionSection *section;
327 Int128 diff;
328
329 section = address_space_lookup_region(d, addr, resolve_subpage);
330 /* Compute offset within MemoryRegionSection */
331 addr -= section->offset_within_address_space;
332
333 /* Compute offset within MemoryRegion */
334 *xlat = addr + section->offset_within_region;
335
336 diff = int128_sub(section->mr->size, int128_make64(addr));
337 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
338 return section;
339 }
340
341 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
342 hwaddr *xlat, hwaddr *plen,
343 bool is_write)
344 {
345 IOMMUTLBEntry iotlb;
346 MemoryRegionSection *section;
347 MemoryRegion *mr;
348 hwaddr len = *plen;
349
350 for (;;) {
351 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
352 mr = section->mr;
353
354 if (!mr->iommu_ops) {
355 break;
356 }
357
358 iotlb = mr->iommu_ops->translate(mr, addr);
359 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
360 | (addr & iotlb.addr_mask));
361 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
362 if (!(iotlb.perm & (1 << is_write))) {
363 mr = &io_mem_unassigned;
364 break;
365 }
366
367 as = iotlb.target_as;
368 }
369
370 *plen = len;
371 *xlat = addr;
372 return mr;
373 }
374
375 MemoryRegionSection *
376 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
377 hwaddr *plen)
378 {
379 MemoryRegionSection *section;
380 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
381
382 assert(!section->mr->iommu_ops);
383 return section;
384 }
385 #endif
386
387 void cpu_exec_init_all(void)
388 {
389 #if !defined(CONFIG_USER_ONLY)
390 qemu_mutex_init(&ram_list.mutex);
391 memory_map_init();
392 io_mem_init();
393 #endif
394 }
395
396 #if !defined(CONFIG_USER_ONLY)
397
398 static int cpu_common_post_load(void *opaque, int version_id)
399 {
400 CPUState *cpu = opaque;
401
402 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
403 version_id is increased. */
404 cpu->interrupt_request &= ~0x01;
405 tlb_flush(cpu->env_ptr, 1);
406
407 return 0;
408 }
409
410 const VMStateDescription vmstate_cpu_common = {
411 .name = "cpu_common",
412 .version_id = 1,
413 .minimum_version_id = 1,
414 .minimum_version_id_old = 1,
415 .post_load = cpu_common_post_load,
416 .fields = (VMStateField []) {
417 VMSTATE_UINT32(halted, CPUState),
418 VMSTATE_UINT32(interrupt_request, CPUState),
419 VMSTATE_END_OF_LIST()
420 }
421 };
422
423 #endif
424
425 CPUState *qemu_get_cpu(int index)
426 {
427 CPUState *cpu;
428
429 CPU_FOREACH(cpu) {
430 if (cpu->cpu_index == index) {
431 return cpu;
432 }
433 }
434
435 return NULL;
436 }
437
438 void cpu_exec_init(CPUArchState *env)
439 {
440 CPUState *cpu = ENV_GET_CPU(env);
441 CPUClass *cc = CPU_GET_CLASS(cpu);
442 CPUState *some_cpu;
443 int cpu_index;
444
445 #if defined(CONFIG_USER_ONLY)
446 cpu_list_lock();
447 #endif
448 cpu_index = 0;
449 CPU_FOREACH(some_cpu) {
450 cpu_index++;
451 }
452 cpu->cpu_index = cpu_index;
453 cpu->numa_node = 0;
454 QTAILQ_INIT(&env->breakpoints);
455 QTAILQ_INIT(&env->watchpoints);
456 #ifndef CONFIG_USER_ONLY
457 cpu->thread_id = qemu_get_thread_id();
458 #endif
459 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
460 #if defined(CONFIG_USER_ONLY)
461 cpu_list_unlock();
462 #endif
463 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
464 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
465 }
466 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
467 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
468 cpu_save, cpu_load, env);
469 assert(cc->vmsd == NULL);
470 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
471 #endif
472 if (cc->vmsd != NULL) {
473 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
474 }
475 }
476
477 #if defined(TARGET_HAS_ICE)
478 #if defined(CONFIG_USER_ONLY)
479 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
480 {
481 tb_invalidate_phys_page_range(pc, pc + 1, 0);
482 }
483 #else
484 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
485 {
486 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
487 if (phys != -1) {
488 tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
489 }
490 }
491 #endif
492 #endif /* TARGET_HAS_ICE */
493
494 #if defined(CONFIG_USER_ONLY)
495 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
496
497 {
498 }
499
500 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
501 int flags, CPUWatchpoint **watchpoint)
502 {
503 return -ENOSYS;
504 }
505 #else
506 /* Add a watchpoint. */
507 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
508 int flags, CPUWatchpoint **watchpoint)
509 {
510 target_ulong len_mask = ~(len - 1);
511 CPUWatchpoint *wp;
512
513 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
514 if ((len & (len - 1)) || (addr & ~len_mask) ||
515 len == 0 || len > TARGET_PAGE_SIZE) {
516 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
517 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
518 return -EINVAL;
519 }
520 wp = g_malloc(sizeof(*wp));
521
522 wp->vaddr = addr;
523 wp->len_mask = len_mask;
524 wp->flags = flags;
525
526 /* keep all GDB-injected watchpoints in front */
527 if (flags & BP_GDB)
528 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
529 else
530 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
531
532 tlb_flush_page(env, addr);
533
534 if (watchpoint)
535 *watchpoint = wp;
536 return 0;
537 }
538
539 /* Remove a specific watchpoint. */
540 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
541 int flags)
542 {
543 target_ulong len_mask = ~(len - 1);
544 CPUWatchpoint *wp;
545
546 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
547 if (addr == wp->vaddr && len_mask == wp->len_mask
548 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
549 cpu_watchpoint_remove_by_ref(env, wp);
550 return 0;
551 }
552 }
553 return -ENOENT;
554 }
555
556 /* Remove a specific watchpoint by reference. */
557 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
558 {
559 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
560
561 tlb_flush_page(env, watchpoint->vaddr);
562
563 g_free(watchpoint);
564 }
565
566 /* Remove all matching watchpoints. */
567 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
568 {
569 CPUWatchpoint *wp, *next;
570
571 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
572 if (wp->flags & mask)
573 cpu_watchpoint_remove_by_ref(env, wp);
574 }
575 }
576 #endif
577
578 /* Add a breakpoint. */
579 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
580 CPUBreakpoint **breakpoint)
581 {
582 #if defined(TARGET_HAS_ICE)
583 CPUBreakpoint *bp;
584
585 bp = g_malloc(sizeof(*bp));
586
587 bp->pc = pc;
588 bp->flags = flags;
589
590 /* keep all GDB-injected breakpoints in front */
591 if (flags & BP_GDB) {
592 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
593 } else {
594 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
595 }
596
597 breakpoint_invalidate(ENV_GET_CPU(env), pc);
598
599 if (breakpoint) {
600 *breakpoint = bp;
601 }
602 return 0;
603 #else
604 return -ENOSYS;
605 #endif
606 }
607
608 /* Remove a specific breakpoint. */
609 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
610 {
611 #if defined(TARGET_HAS_ICE)
612 CPUBreakpoint *bp;
613
614 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
615 if (bp->pc == pc && bp->flags == flags) {
616 cpu_breakpoint_remove_by_ref(env, bp);
617 return 0;
618 }
619 }
620 return -ENOENT;
621 #else
622 return -ENOSYS;
623 #endif
624 }
625
626 /* Remove a specific breakpoint by reference. */
627 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
628 {
629 #if defined(TARGET_HAS_ICE)
630 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
631
632 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
633
634 g_free(breakpoint);
635 #endif
636 }
637
638 /* Remove all matching breakpoints. */
639 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
640 {
641 #if defined(TARGET_HAS_ICE)
642 CPUBreakpoint *bp, *next;
643
644 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
645 if (bp->flags & mask)
646 cpu_breakpoint_remove_by_ref(env, bp);
647 }
648 #endif
649 }
650
651 /* enable or disable single step mode. EXCP_DEBUG is returned by the
652 CPU loop after each instruction */
653 void cpu_single_step(CPUState *cpu, int enabled)
654 {
655 #if defined(TARGET_HAS_ICE)
656 if (cpu->singlestep_enabled != enabled) {
657 cpu->singlestep_enabled = enabled;
658 if (kvm_enabled()) {
659 kvm_update_guest_debug(cpu, 0);
660 } else {
661 /* must flush all the translated code to avoid inconsistencies */
662 /* XXX: only flush what is necessary */
663 CPUArchState *env = cpu->env_ptr;
664 tb_flush(env);
665 }
666 }
667 #endif
668 }
669
670 void cpu_abort(CPUArchState *env, const char *fmt, ...)
671 {
672 CPUState *cpu = ENV_GET_CPU(env);
673 va_list ap;
674 va_list ap2;
675
676 va_start(ap, fmt);
677 va_copy(ap2, ap);
678 fprintf(stderr, "qemu: fatal: ");
679 vfprintf(stderr, fmt, ap);
680 fprintf(stderr, "\n");
681 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
682 if (qemu_log_enabled()) {
683 qemu_log("qemu: fatal: ");
684 qemu_log_vprintf(fmt, ap2);
685 qemu_log("\n");
686 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
687 qemu_log_flush();
688 qemu_log_close();
689 }
690 va_end(ap2);
691 va_end(ap);
692 #if defined(CONFIG_USER_ONLY)
693 {
694 struct sigaction act;
695 sigfillset(&act.sa_mask);
696 act.sa_handler = SIG_DFL;
697 sigaction(SIGABRT, &act, NULL);
698 }
699 #endif
700 abort();
701 }
702
703 #if !defined(CONFIG_USER_ONLY)
704 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
705 {
706 RAMBlock *block;
707
708 /* The list is protected by the iothread lock here. */
709 block = ram_list.mru_block;
710 if (block && addr - block->offset < block->length) {
711 goto found;
712 }
713 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
714 if (addr - block->offset < block->length) {
715 goto found;
716 }
717 }
718
719 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
720 abort();
721
722 found:
723 ram_list.mru_block = block;
724 return block;
725 }
726
727 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
728 uintptr_t length)
729 {
730 RAMBlock *block;
731 ram_addr_t start1;
732
733 block = qemu_get_ram_block(start);
734 assert(block == qemu_get_ram_block(end - 1));
735 start1 = (uintptr_t)block->host + (start - block->offset);
736 cpu_tlb_reset_dirty_all(start1, length);
737 }
738
739 /* Note: start and end must be within the same ram block. */
740 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
741 unsigned client)
742 {
743 uintptr_t length;
744
745 start &= TARGET_PAGE_MASK;
746 end = TARGET_PAGE_ALIGN(end);
747
748 length = end - start;
749 if (length == 0)
750 return;
751 cpu_physical_memory_mask_dirty_range(start, length, client);
752
753 if (tcg_enabled()) {
754 tlb_reset_dirty_range_all(start, end, length);
755 }
756 }
757
758 static int cpu_physical_memory_set_dirty_tracking(int enable)
759 {
760 int ret = 0;
761 in_migration = enable;
762 return ret;
763 }
764
765 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
766 MemoryRegionSection *section,
767 target_ulong vaddr,
768 hwaddr paddr, hwaddr xlat,
769 int prot,
770 target_ulong *address)
771 {
772 hwaddr iotlb;
773 CPUWatchpoint *wp;
774
775 if (memory_region_is_ram(section->mr)) {
776 /* Normal RAM. */
777 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
778 + xlat;
779 if (!section->readonly) {
780 iotlb |= PHYS_SECTION_NOTDIRTY;
781 } else {
782 iotlb |= PHYS_SECTION_ROM;
783 }
784 } else {
785 iotlb = section - address_space_memory.dispatch->map.sections;
786 iotlb += xlat;
787 }
788
789 /* Make accesses to pages with watchpoints go via the
790 watchpoint trap routines. */
791 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
792 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
793 /* Avoid trapping reads of pages with a write breakpoint. */
794 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
795 iotlb = PHYS_SECTION_WATCH + paddr;
796 *address |= TLB_MMIO;
797 break;
798 }
799 }
800 }
801
802 return iotlb;
803 }
804 #endif /* defined(CONFIG_USER_ONLY) */
805
806 #if !defined(CONFIG_USER_ONLY)
807
808 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
809 uint16_t section);
810 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
811
812 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
813
814 /*
815 * Set a custom physical guest memory alloator.
816 * Accelerators with unusual needs may need this. Hopefully, we can
817 * get rid of it eventually.
818 */
819 void phys_mem_set_alloc(void *(*alloc)(size_t))
820 {
821 phys_mem_alloc = alloc;
822 }
823
824 static uint16_t phys_section_add(PhysPageMap *map,
825 MemoryRegionSection *section)
826 {
827 /* The physical section number is ORed with a page-aligned
828 * pointer to produce the iotlb entries. Thus it should
829 * never overflow into the page-aligned value.
830 */
831 assert(map->sections_nb < TARGET_PAGE_SIZE);
832
833 if (map->sections_nb == map->sections_nb_alloc) {
834 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
835 map->sections = g_renew(MemoryRegionSection, map->sections,
836 map->sections_nb_alloc);
837 }
838 map->sections[map->sections_nb] = *section;
839 memory_region_ref(section->mr);
840 return map->sections_nb++;
841 }
842
843 static void phys_section_destroy(MemoryRegion *mr)
844 {
845 memory_region_unref(mr);
846
847 if (mr->subpage) {
848 subpage_t *subpage = container_of(mr, subpage_t, iomem);
849 memory_region_destroy(&subpage->iomem);
850 g_free(subpage);
851 }
852 }
853
854 static void phys_sections_free(PhysPageMap *map)
855 {
856 while (map->sections_nb > 0) {
857 MemoryRegionSection *section = &map->sections[--map->sections_nb];
858 phys_section_destroy(section->mr);
859 }
860 g_free(map->sections);
861 g_free(map->nodes);
862 }
863
864 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
865 {
866 subpage_t *subpage;
867 hwaddr base = section->offset_within_address_space
868 & TARGET_PAGE_MASK;
869 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
870 d->map.nodes, d->map.sections);
871 MemoryRegionSection subsection = {
872 .offset_within_address_space = base,
873 .size = int128_make64(TARGET_PAGE_SIZE),
874 };
875 hwaddr start, end;
876
877 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
878
879 if (!(existing->mr->subpage)) {
880 subpage = subpage_init(d->as, base);
881 subsection.mr = &subpage->iomem;
882 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
883 phys_section_add(&d->map, &subsection));
884 } else {
885 subpage = container_of(existing->mr, subpage_t, iomem);
886 }
887 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
888 end = start + int128_get64(section->size) - 1;
889 subpage_register(subpage, start, end,
890 phys_section_add(&d->map, section));
891 }
892
893
894 static void register_multipage(AddressSpaceDispatch *d,
895 MemoryRegionSection *section)
896 {
897 hwaddr start_addr = section->offset_within_address_space;
898 uint16_t section_index = phys_section_add(&d->map, section);
899 uint64_t num_pages = int128_get64(int128_rshift(section->size,
900 TARGET_PAGE_BITS));
901
902 assert(num_pages);
903 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
904 }
905
906 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
907 {
908 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
909 AddressSpaceDispatch *d = as->next_dispatch;
910 MemoryRegionSection now = *section, remain = *section;
911 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
912
913 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
914 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
915 - now.offset_within_address_space;
916
917 now.size = int128_min(int128_make64(left), now.size);
918 register_subpage(d, &now);
919 } else {
920 now.size = int128_zero();
921 }
922 while (int128_ne(remain.size, now.size)) {
923 remain.size = int128_sub(remain.size, now.size);
924 remain.offset_within_address_space += int128_get64(now.size);
925 remain.offset_within_region += int128_get64(now.size);
926 now = remain;
927 if (int128_lt(remain.size, page_size)) {
928 register_subpage(d, &now);
929 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
930 now.size = page_size;
931 register_subpage(d, &now);
932 } else {
933 now.size = int128_and(now.size, int128_neg(page_size));
934 register_multipage(d, &now);
935 }
936 }
937 }
938
939 void qemu_flush_coalesced_mmio_buffer(void)
940 {
941 if (kvm_enabled())
942 kvm_flush_coalesced_mmio_buffer();
943 }
944
945 void qemu_mutex_lock_ramlist(void)
946 {
947 qemu_mutex_lock(&ram_list.mutex);
948 }
949
950 void qemu_mutex_unlock_ramlist(void)
951 {
952 qemu_mutex_unlock(&ram_list.mutex);
953 }
954
955 #ifdef __linux__
956
957 #include <sys/vfs.h>
958
959 #define HUGETLBFS_MAGIC 0x958458f6
960
961 static long gethugepagesize(const char *path)
962 {
963 struct statfs fs;
964 int ret;
965
966 do {
967 ret = statfs(path, &fs);
968 } while (ret != 0 && errno == EINTR);
969
970 if (ret != 0) {
971 perror(path);
972 return 0;
973 }
974
975 if (fs.f_type != HUGETLBFS_MAGIC)
976 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
977
978 return fs.f_bsize;
979 }
980
981 static sigjmp_buf sigjump;
982
983 static void sigbus_handler(int signal)
984 {
985 siglongjmp(sigjump, 1);
986 }
987
988 static void *file_ram_alloc(RAMBlock *block,
989 ram_addr_t memory,
990 const char *path)
991 {
992 char *filename;
993 char *sanitized_name;
994 char *c;
995 void *area;
996 int fd;
997 unsigned long hpagesize;
998
999 hpagesize = gethugepagesize(path);
1000 if (!hpagesize) {
1001 return NULL;
1002 }
1003
1004 if (memory < hpagesize) {
1005 return NULL;
1006 }
1007
1008 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1009 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1010 return NULL;
1011 }
1012
1013 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1014 sanitized_name = g_strdup(block->mr->name);
1015 for (c = sanitized_name; *c != '\0'; c++) {
1016 if (*c == '/')
1017 *c = '_';
1018 }
1019
1020 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1021 sanitized_name);
1022 g_free(sanitized_name);
1023
1024 fd = mkstemp(filename);
1025 if (fd < 0) {
1026 perror("unable to create backing store for hugepages");
1027 g_free(filename);
1028 return NULL;
1029 }
1030 unlink(filename);
1031 g_free(filename);
1032
1033 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1034
1035 /*
1036 * ftruncate is not supported by hugetlbfs in older
1037 * hosts, so don't bother bailing out on errors.
1038 * If anything goes wrong with it under other filesystems,
1039 * mmap will fail.
1040 */
1041 if (ftruncate(fd, memory))
1042 perror("ftruncate");
1043
1044 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1045 if (area == MAP_FAILED) {
1046 perror("file_ram_alloc: can't mmap RAM pages");
1047 close(fd);
1048 return (NULL);
1049 }
1050
1051 if (mem_prealloc) {
1052 int ret, i;
1053 struct sigaction act, oldact;
1054 sigset_t set, oldset;
1055
1056 memset(&act, 0, sizeof(act));
1057 act.sa_handler = &sigbus_handler;
1058 act.sa_flags = 0;
1059
1060 ret = sigaction(SIGBUS, &act, &oldact);
1061 if (ret) {
1062 perror("file_ram_alloc: failed to install signal handler");
1063 exit(1);
1064 }
1065
1066 /* unblock SIGBUS */
1067 sigemptyset(&set);
1068 sigaddset(&set, SIGBUS);
1069 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1070
1071 if (sigsetjmp(sigjump, 1)) {
1072 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1073 exit(1);
1074 }
1075
1076 /* MAP_POPULATE silently ignores failures */
1077 for (i = 0; i < (memory/hpagesize)-1; i++) {
1078 memset(area + (hpagesize*i), 0, 1);
1079 }
1080
1081 ret = sigaction(SIGBUS, &oldact, NULL);
1082 if (ret) {
1083 perror("file_ram_alloc: failed to reinstall signal handler");
1084 exit(1);
1085 }
1086
1087 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1088 }
1089
1090 block->fd = fd;
1091 return area;
1092 }
1093 #else
1094 static void *file_ram_alloc(RAMBlock *block,
1095 ram_addr_t memory,
1096 const char *path)
1097 {
1098 fprintf(stderr, "-mem-path not supported on this host\n");
1099 exit(1);
1100 }
1101 #endif
1102
1103 static ram_addr_t find_ram_offset(ram_addr_t size)
1104 {
1105 RAMBlock *block, *next_block;
1106 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1107
1108 assert(size != 0); /* it would hand out same offset multiple times */
1109
1110 if (QTAILQ_EMPTY(&ram_list.blocks))
1111 return 0;
1112
1113 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1114 ram_addr_t end, next = RAM_ADDR_MAX;
1115
1116 end = block->offset + block->length;
1117
1118 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1119 if (next_block->offset >= end) {
1120 next = MIN(next, next_block->offset);
1121 }
1122 }
1123 if (next - end >= size && next - end < mingap) {
1124 offset = end;
1125 mingap = next - end;
1126 }
1127 }
1128
1129 if (offset == RAM_ADDR_MAX) {
1130 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1131 (uint64_t)size);
1132 abort();
1133 }
1134
1135 return offset;
1136 }
1137
1138 ram_addr_t last_ram_offset(void)
1139 {
1140 RAMBlock *block;
1141 ram_addr_t last = 0;
1142
1143 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1144 last = MAX(last, block->offset + block->length);
1145
1146 return last;
1147 }
1148
1149 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1150 {
1151 int ret;
1152
1153 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1154 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1155 "dump-guest-core", true)) {
1156 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1157 if (ret) {
1158 perror("qemu_madvise");
1159 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1160 "but dump_guest_core=off specified\n");
1161 }
1162 }
1163 }
1164
1165 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1166 {
1167 RAMBlock *new_block, *block;
1168
1169 new_block = NULL;
1170 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1171 if (block->offset == addr) {
1172 new_block = block;
1173 break;
1174 }
1175 }
1176 assert(new_block);
1177 assert(!new_block->idstr[0]);
1178
1179 if (dev) {
1180 char *id = qdev_get_dev_path(dev);
1181 if (id) {
1182 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1183 g_free(id);
1184 }
1185 }
1186 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1187
1188 /* This assumes the iothread lock is taken here too. */
1189 qemu_mutex_lock_ramlist();
1190 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1191 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1192 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1193 new_block->idstr);
1194 abort();
1195 }
1196 }
1197 qemu_mutex_unlock_ramlist();
1198 }
1199
1200 static int memory_try_enable_merging(void *addr, size_t len)
1201 {
1202 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1203 /* disabled by the user */
1204 return 0;
1205 }
1206
1207 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1208 }
1209
1210 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1211 MemoryRegion *mr)
1212 {
1213 RAMBlock *block, *new_block;
1214
1215 size = TARGET_PAGE_ALIGN(size);
1216 new_block = g_malloc0(sizeof(*new_block));
1217 new_block->fd = -1;
1218
1219 /* This assumes the iothread lock is taken here too. */
1220 qemu_mutex_lock_ramlist();
1221 new_block->mr = mr;
1222 new_block->offset = find_ram_offset(size);
1223 if (host) {
1224 new_block->host = host;
1225 new_block->flags |= RAM_PREALLOC_MASK;
1226 } else if (xen_enabled()) {
1227 if (mem_path) {
1228 fprintf(stderr, "-mem-path not supported with Xen\n");
1229 exit(1);
1230 }
1231 xen_ram_alloc(new_block->offset, size, mr);
1232 } else {
1233 if (mem_path) {
1234 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1235 /*
1236 * file_ram_alloc() needs to allocate just like
1237 * phys_mem_alloc, but we haven't bothered to provide
1238 * a hook there.
1239 */
1240 fprintf(stderr,
1241 "-mem-path not supported with this accelerator\n");
1242 exit(1);
1243 }
1244 new_block->host = file_ram_alloc(new_block, size, mem_path);
1245 }
1246 if (!new_block->host) {
1247 new_block->host = phys_mem_alloc(size);
1248 if (!new_block->host) {
1249 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1250 new_block->mr->name, strerror(errno));
1251 exit(1);
1252 }
1253 memory_try_enable_merging(new_block->host, size);
1254 }
1255 }
1256 new_block->length = size;
1257
1258 /* Keep the list sorted from biggest to smallest block. */
1259 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1260 if (block->length < new_block->length) {
1261 break;
1262 }
1263 }
1264 if (block) {
1265 QTAILQ_INSERT_BEFORE(block, new_block, next);
1266 } else {
1267 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1268 }
1269 ram_list.mru_block = NULL;
1270
1271 ram_list.version++;
1272 qemu_mutex_unlock_ramlist();
1273
1274 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1275 last_ram_offset() >> TARGET_PAGE_BITS);
1276 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1277 0, size >> TARGET_PAGE_BITS);
1278 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1279
1280 qemu_ram_setup_dump(new_block->host, size);
1281 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1282 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1283
1284 if (kvm_enabled())
1285 kvm_setup_guest_memory(new_block->host, size);
1286
1287 return new_block->offset;
1288 }
1289
1290 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1291 {
1292 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1293 }
1294
1295 void qemu_ram_free_from_ptr(ram_addr_t addr)
1296 {
1297 RAMBlock *block;
1298
1299 /* This assumes the iothread lock is taken here too. */
1300 qemu_mutex_lock_ramlist();
1301 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1302 if (addr == block->offset) {
1303 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1304 ram_list.mru_block = NULL;
1305 ram_list.version++;
1306 g_free(block);
1307 break;
1308 }
1309 }
1310 qemu_mutex_unlock_ramlist();
1311 }
1312
1313 void qemu_ram_free(ram_addr_t addr)
1314 {
1315 RAMBlock *block;
1316
1317 /* This assumes the iothread lock is taken here too. */
1318 qemu_mutex_lock_ramlist();
1319 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1320 if (addr == block->offset) {
1321 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1322 ram_list.mru_block = NULL;
1323 ram_list.version++;
1324 if (block->flags & RAM_PREALLOC_MASK) {
1325 ;
1326 } else if (xen_enabled()) {
1327 xen_invalidate_map_cache_entry(block->host);
1328 #ifndef _WIN32
1329 } else if (block->fd >= 0) {
1330 munmap(block->host, block->length);
1331 close(block->fd);
1332 #endif
1333 } else {
1334 qemu_anon_ram_free(block->host, block->length);
1335 }
1336 g_free(block);
1337 break;
1338 }
1339 }
1340 qemu_mutex_unlock_ramlist();
1341
1342 }
1343
1344 #ifndef _WIN32
1345 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1346 {
1347 RAMBlock *block;
1348 ram_addr_t offset;
1349 int flags;
1350 void *area, *vaddr;
1351
1352 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1353 offset = addr - block->offset;
1354 if (offset < block->length) {
1355 vaddr = block->host + offset;
1356 if (block->flags & RAM_PREALLOC_MASK) {
1357 ;
1358 } else if (xen_enabled()) {
1359 abort();
1360 } else {
1361 flags = MAP_FIXED;
1362 munmap(vaddr, length);
1363 if (block->fd >= 0) {
1364 #ifdef MAP_POPULATE
1365 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1366 MAP_PRIVATE;
1367 #else
1368 flags |= MAP_PRIVATE;
1369 #endif
1370 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1371 flags, block->fd, offset);
1372 } else {
1373 /*
1374 * Remap needs to match alloc. Accelerators that
1375 * set phys_mem_alloc never remap. If they did,
1376 * we'd need a remap hook here.
1377 */
1378 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1379
1380 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1381 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1382 flags, -1, 0);
1383 }
1384 if (area != vaddr) {
1385 fprintf(stderr, "Could not remap addr: "
1386 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1387 length, addr);
1388 exit(1);
1389 }
1390 memory_try_enable_merging(vaddr, length);
1391 qemu_ram_setup_dump(vaddr, length);
1392 }
1393 return;
1394 }
1395 }
1396 }
1397 #endif /* !_WIN32 */
1398
1399 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1400 With the exception of the softmmu code in this file, this should
1401 only be used for local memory (e.g. video ram) that the device owns,
1402 and knows it isn't going to access beyond the end of the block.
1403
1404 It should not be used for general purpose DMA.
1405 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1406 */
1407 void *qemu_get_ram_ptr(ram_addr_t addr)
1408 {
1409 RAMBlock *block = qemu_get_ram_block(addr);
1410
1411 if (xen_enabled()) {
1412 /* We need to check if the requested address is in the RAM
1413 * because we don't want to map the entire memory in QEMU.
1414 * In that case just map until the end of the page.
1415 */
1416 if (block->offset == 0) {
1417 return xen_map_cache(addr, 0, 0);
1418 } else if (block->host == NULL) {
1419 block->host =
1420 xen_map_cache(block->offset, block->length, 1);
1421 }
1422 }
1423 return block->host + (addr - block->offset);
1424 }
1425
1426 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1427 * but takes a size argument */
1428 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1429 {
1430 if (*size == 0) {
1431 return NULL;
1432 }
1433 if (xen_enabled()) {
1434 return xen_map_cache(addr, *size, 1);
1435 } else {
1436 RAMBlock *block;
1437
1438 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1439 if (addr - block->offset < block->length) {
1440 if (addr - block->offset + *size > block->length)
1441 *size = block->length - addr + block->offset;
1442 return block->host + (addr - block->offset);
1443 }
1444 }
1445
1446 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1447 abort();
1448 }
1449 }
1450
1451 /* Some of the softmmu routines need to translate from a host pointer
1452 (typically a TLB entry) back to a ram offset. */
1453 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1454 {
1455 RAMBlock *block;
1456 uint8_t *host = ptr;
1457
1458 if (xen_enabled()) {
1459 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1460 return qemu_get_ram_block(*ram_addr)->mr;
1461 }
1462
1463 block = ram_list.mru_block;
1464 if (block && block->host && host - block->host < block->length) {
1465 goto found;
1466 }
1467
1468 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1469 /* This case append when the block is not mapped. */
1470 if (block->host == NULL) {
1471 continue;
1472 }
1473 if (host - block->host < block->length) {
1474 goto found;
1475 }
1476 }
1477
1478 return NULL;
1479
1480 found:
1481 *ram_addr = block->offset + (host - block->host);
1482 return block->mr;
1483 }
1484
1485 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1486 uint64_t val, unsigned size)
1487 {
1488 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1489 tb_invalidate_phys_page_fast(ram_addr, size);
1490 }
1491 switch (size) {
1492 case 1:
1493 stb_p(qemu_get_ram_ptr(ram_addr), val);
1494 break;
1495 case 2:
1496 stw_p(qemu_get_ram_ptr(ram_addr), val);
1497 break;
1498 case 4:
1499 stl_p(qemu_get_ram_ptr(ram_addr), val);
1500 break;
1501 default:
1502 abort();
1503 }
1504 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1505 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1506 /* we remove the notdirty callback only if the code has been
1507 flushed */
1508 if (cpu_physical_memory_is_dirty(ram_addr)) {
1509 CPUArchState *env = current_cpu->env_ptr;
1510 tlb_set_dirty(env, env->mem_io_vaddr);
1511 }
1512 }
1513
1514 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1515 unsigned size, bool is_write)
1516 {
1517 return is_write;
1518 }
1519
1520 static const MemoryRegionOps notdirty_mem_ops = {
1521 .write = notdirty_mem_write,
1522 .valid.accepts = notdirty_mem_accepts,
1523 .endianness = DEVICE_NATIVE_ENDIAN,
1524 };
1525
1526 /* Generate a debug exception if a watchpoint has been hit. */
1527 static void check_watchpoint(int offset, int len_mask, int flags)
1528 {
1529 CPUArchState *env = current_cpu->env_ptr;
1530 target_ulong pc, cs_base;
1531 target_ulong vaddr;
1532 CPUWatchpoint *wp;
1533 int cpu_flags;
1534
1535 if (env->watchpoint_hit) {
1536 /* We re-entered the check after replacing the TB. Now raise
1537 * the debug interrupt so that is will trigger after the
1538 * current instruction. */
1539 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1540 return;
1541 }
1542 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1543 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1544 if ((vaddr == (wp->vaddr & len_mask) ||
1545 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1546 wp->flags |= BP_WATCHPOINT_HIT;
1547 if (!env->watchpoint_hit) {
1548 env->watchpoint_hit = wp;
1549 tb_check_watchpoint(env);
1550 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1551 env->exception_index = EXCP_DEBUG;
1552 cpu_loop_exit(env);
1553 } else {
1554 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1555 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1556 cpu_resume_from_signal(env, NULL);
1557 }
1558 }
1559 } else {
1560 wp->flags &= ~BP_WATCHPOINT_HIT;
1561 }
1562 }
1563 }
1564
1565 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1566 so these check for a hit then pass through to the normal out-of-line
1567 phys routines. */
1568 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1569 unsigned size)
1570 {
1571 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1572 switch (size) {
1573 case 1: return ldub_phys(addr);
1574 case 2: return lduw_phys(addr);
1575 case 4: return ldl_phys(addr);
1576 default: abort();
1577 }
1578 }
1579
1580 static void watch_mem_write(void *opaque, hwaddr addr,
1581 uint64_t val, unsigned size)
1582 {
1583 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1584 switch (size) {
1585 case 1:
1586 stb_phys(addr, val);
1587 break;
1588 case 2:
1589 stw_phys(addr, val);
1590 break;
1591 case 4:
1592 stl_phys(addr, val);
1593 break;
1594 default: abort();
1595 }
1596 }
1597
1598 static const MemoryRegionOps watch_mem_ops = {
1599 .read = watch_mem_read,
1600 .write = watch_mem_write,
1601 .endianness = DEVICE_NATIVE_ENDIAN,
1602 };
1603
1604 static uint64_t subpage_read(void *opaque, hwaddr addr,
1605 unsigned len)
1606 {
1607 subpage_t *subpage = opaque;
1608 uint8_t buf[4];
1609
1610 #if defined(DEBUG_SUBPAGE)
1611 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1612 subpage, len, addr);
1613 #endif
1614 address_space_read(subpage->as, addr + subpage->base, buf, len);
1615 switch (len) {
1616 case 1:
1617 return ldub_p(buf);
1618 case 2:
1619 return lduw_p(buf);
1620 case 4:
1621 return ldl_p(buf);
1622 default:
1623 abort();
1624 }
1625 }
1626
1627 static void subpage_write(void *opaque, hwaddr addr,
1628 uint64_t value, unsigned len)
1629 {
1630 subpage_t *subpage = opaque;
1631 uint8_t buf[4];
1632
1633 #if defined(DEBUG_SUBPAGE)
1634 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1635 " value %"PRIx64"\n",
1636 __func__, subpage, len, addr, value);
1637 #endif
1638 switch (len) {
1639 case 1:
1640 stb_p(buf, value);
1641 break;
1642 case 2:
1643 stw_p(buf, value);
1644 break;
1645 case 4:
1646 stl_p(buf, value);
1647 break;
1648 default:
1649 abort();
1650 }
1651 address_space_write(subpage->as, addr + subpage->base, buf, len);
1652 }
1653
1654 static bool subpage_accepts(void *opaque, hwaddr addr,
1655 unsigned len, bool is_write)
1656 {
1657 subpage_t *subpage = opaque;
1658 #if defined(DEBUG_SUBPAGE)
1659 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1660 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1661 #endif
1662
1663 return address_space_access_valid(subpage->as, addr + subpage->base,
1664 len, is_write);
1665 }
1666
1667 static const MemoryRegionOps subpage_ops = {
1668 .read = subpage_read,
1669 .write = subpage_write,
1670 .valid.accepts = subpage_accepts,
1671 .endianness = DEVICE_NATIVE_ENDIAN,
1672 };
1673
1674 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1675 uint16_t section)
1676 {
1677 int idx, eidx;
1678
1679 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1680 return -1;
1681 idx = SUBPAGE_IDX(start);
1682 eidx = SUBPAGE_IDX(end);
1683 #if defined(DEBUG_SUBPAGE)
1684 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1685 __func__, mmio, start, end, idx, eidx, section);
1686 #endif
1687 for (; idx <= eidx; idx++) {
1688 mmio->sub_section[idx] = section;
1689 }
1690
1691 return 0;
1692 }
1693
1694 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1695 {
1696 subpage_t *mmio;
1697
1698 mmio = g_malloc0(sizeof(subpage_t));
1699
1700 mmio->as = as;
1701 mmio->base = base;
1702 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1703 "subpage", TARGET_PAGE_SIZE);
1704 mmio->iomem.subpage = true;
1705 #if defined(DEBUG_SUBPAGE)
1706 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1707 mmio, base, TARGET_PAGE_SIZE);
1708 #endif
1709 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1710
1711 return mmio;
1712 }
1713
1714 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1715 {
1716 MemoryRegionSection section = {
1717 .mr = mr,
1718 .offset_within_address_space = 0,
1719 .offset_within_region = 0,
1720 .size = int128_2_64(),
1721 };
1722
1723 return phys_section_add(map, &section);
1724 }
1725
1726 MemoryRegion *iotlb_to_region(hwaddr index)
1727 {
1728 return address_space_memory.dispatch->map.sections[
1729 index & ~TARGET_PAGE_MASK].mr;
1730 }
1731
1732 static void io_mem_init(void)
1733 {
1734 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1735 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1736 "unassigned", UINT64_MAX);
1737 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1738 "notdirty", UINT64_MAX);
1739 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1740 "watch", UINT64_MAX);
1741 }
1742
1743 static void mem_begin(MemoryListener *listener)
1744 {
1745 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1746 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1747 uint16_t n;
1748
1749 n = dummy_section(&d->map, &io_mem_unassigned);
1750 assert(n == PHYS_SECTION_UNASSIGNED);
1751 n = dummy_section(&d->map, &io_mem_notdirty);
1752 assert(n == PHYS_SECTION_NOTDIRTY);
1753 n = dummy_section(&d->map, &io_mem_rom);
1754 assert(n == PHYS_SECTION_ROM);
1755 n = dummy_section(&d->map, &io_mem_watch);
1756 assert(n == PHYS_SECTION_WATCH);
1757
1758 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1759 d->as = as;
1760 as->next_dispatch = d;
1761 }
1762
1763 static void mem_commit(MemoryListener *listener)
1764 {
1765 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1766 AddressSpaceDispatch *cur = as->dispatch;
1767 AddressSpaceDispatch *next = as->next_dispatch;
1768
1769 phys_page_compact_all(next, next->map.nodes_nb);
1770
1771 as->dispatch = next;
1772
1773 if (cur) {
1774 phys_sections_free(&cur->map);
1775 g_free(cur);
1776 }
1777 }
1778
1779 static void tcg_commit(MemoryListener *listener)
1780 {
1781 CPUState *cpu;
1782
1783 /* since each CPU stores ram addresses in its TLB cache, we must
1784 reset the modified entries */
1785 /* XXX: slow ! */
1786 CPU_FOREACH(cpu) {
1787 CPUArchState *env = cpu->env_ptr;
1788
1789 tlb_flush(env, 1);
1790 }
1791 }
1792
1793 static void core_log_global_start(MemoryListener *listener)
1794 {
1795 cpu_physical_memory_set_dirty_tracking(1);
1796 }
1797
1798 static void core_log_global_stop(MemoryListener *listener)
1799 {
1800 cpu_physical_memory_set_dirty_tracking(0);
1801 }
1802
1803 static MemoryListener core_memory_listener = {
1804 .log_global_start = core_log_global_start,
1805 .log_global_stop = core_log_global_stop,
1806 .priority = 1,
1807 };
1808
1809 static MemoryListener tcg_memory_listener = {
1810 .commit = tcg_commit,
1811 };
1812
1813 void address_space_init_dispatch(AddressSpace *as)
1814 {
1815 as->dispatch = NULL;
1816 as->dispatch_listener = (MemoryListener) {
1817 .begin = mem_begin,
1818 .commit = mem_commit,
1819 .region_add = mem_add,
1820 .region_nop = mem_add,
1821 .priority = 0,
1822 };
1823 memory_listener_register(&as->dispatch_listener, as);
1824 }
1825
1826 void address_space_destroy_dispatch(AddressSpace *as)
1827 {
1828 AddressSpaceDispatch *d = as->dispatch;
1829
1830 memory_listener_unregister(&as->dispatch_listener);
1831 g_free(d);
1832 as->dispatch = NULL;
1833 }
1834
1835 static void memory_map_init(void)
1836 {
1837 system_memory = g_malloc(sizeof(*system_memory));
1838
1839 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1840 address_space_init(&address_space_memory, system_memory, "memory");
1841
1842 system_io = g_malloc(sizeof(*system_io));
1843 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1844 65536);
1845 address_space_init(&address_space_io, system_io, "I/O");
1846
1847 memory_listener_register(&core_memory_listener, &address_space_memory);
1848 if (tcg_enabled()) {
1849 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1850 }
1851 }
1852
1853 MemoryRegion *get_system_memory(void)
1854 {
1855 return system_memory;
1856 }
1857
1858 MemoryRegion *get_system_io(void)
1859 {
1860 return system_io;
1861 }
1862
1863 #endif /* !defined(CONFIG_USER_ONLY) */
1864
1865 /* physical memory access (slow version, mainly for debug) */
1866 #if defined(CONFIG_USER_ONLY)
1867 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1868 uint8_t *buf, int len, int is_write)
1869 {
1870 int l, flags;
1871 target_ulong page;
1872 void * p;
1873
1874 while (len > 0) {
1875 page = addr & TARGET_PAGE_MASK;
1876 l = (page + TARGET_PAGE_SIZE) - addr;
1877 if (l > len)
1878 l = len;
1879 flags = page_get_flags(page);
1880 if (!(flags & PAGE_VALID))
1881 return -1;
1882 if (is_write) {
1883 if (!(flags & PAGE_WRITE))
1884 return -1;
1885 /* XXX: this code should not depend on lock_user */
1886 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1887 return -1;
1888 memcpy(p, buf, l);
1889 unlock_user(p, addr, l);
1890 } else {
1891 if (!(flags & PAGE_READ))
1892 return -1;
1893 /* XXX: this code should not depend on lock_user */
1894 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1895 return -1;
1896 memcpy(buf, p, l);
1897 unlock_user(p, addr, 0);
1898 }
1899 len -= l;
1900 buf += l;
1901 addr += l;
1902 }
1903 return 0;
1904 }
1905
1906 #else
1907
1908 static void invalidate_and_set_dirty(hwaddr addr,
1909 hwaddr length)
1910 {
1911 if (!cpu_physical_memory_is_dirty(addr)) {
1912 /* invalidate code */
1913 tb_invalidate_phys_page_range(addr, addr + length, 0);
1914 /* set dirty bit */
1915 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1916 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1917 }
1918 xen_modified_memory(addr, length);
1919 }
1920
1921 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1922 {
1923 if (memory_region_is_ram(mr)) {
1924 return !(is_write && mr->readonly);
1925 }
1926 if (memory_region_is_romd(mr)) {
1927 return !is_write;
1928 }
1929
1930 return false;
1931 }
1932
1933 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1934 {
1935 unsigned access_size_max = mr->ops->valid.max_access_size;
1936
1937 /* Regions are assumed to support 1-4 byte accesses unless
1938 otherwise specified. */
1939 if (access_size_max == 0) {
1940 access_size_max = 4;
1941 }
1942
1943 /* Bound the maximum access by the alignment of the address. */
1944 if (!mr->ops->impl.unaligned) {
1945 unsigned align_size_max = addr & -addr;
1946 if (align_size_max != 0 && align_size_max < access_size_max) {
1947 access_size_max = align_size_max;
1948 }
1949 }
1950
1951 /* Don't attempt accesses larger than the maximum. */
1952 if (l > access_size_max) {
1953 l = access_size_max;
1954 }
1955 if (l & (l - 1)) {
1956 l = 1 << (qemu_fls(l) - 1);
1957 }
1958
1959 return l;
1960 }
1961
1962 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1963 int len, bool is_write)
1964 {
1965 hwaddr l;
1966 uint8_t *ptr;
1967 uint64_t val;
1968 hwaddr addr1;
1969 MemoryRegion *mr;
1970 bool error = false;
1971
1972 while (len > 0) {
1973 l = len;
1974 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1975
1976 if (is_write) {
1977 if (!memory_access_is_direct(mr, is_write)) {
1978 l = memory_access_size(mr, l, addr1);
1979 /* XXX: could force current_cpu to NULL to avoid
1980 potential bugs */
1981 switch (l) {
1982 case 8:
1983 /* 64 bit write access */
1984 val = ldq_p(buf);
1985 error |= io_mem_write(mr, addr1, val, 8);
1986 break;
1987 case 4:
1988 /* 32 bit write access */
1989 val = ldl_p(buf);
1990 error |= io_mem_write(mr, addr1, val, 4);
1991 break;
1992 case 2:
1993 /* 16 bit write access */
1994 val = lduw_p(buf);
1995 error |= io_mem_write(mr, addr1, val, 2);
1996 break;
1997 case 1:
1998 /* 8 bit write access */
1999 val = ldub_p(buf);
2000 error |= io_mem_write(mr, addr1, val, 1);
2001 break;
2002 default:
2003 abort();
2004 }
2005 } else {
2006 addr1 += memory_region_get_ram_addr(mr);
2007 /* RAM case */
2008 ptr = qemu_get_ram_ptr(addr1);
2009 memcpy(ptr, buf, l);
2010 invalidate_and_set_dirty(addr1, l);
2011 }
2012 } else {
2013 if (!memory_access_is_direct(mr, is_write)) {
2014 /* I/O case */
2015 l = memory_access_size(mr, l, addr1);
2016 switch (l) {
2017 case 8:
2018 /* 64 bit read access */
2019 error |= io_mem_read(mr, addr1, &val, 8);
2020 stq_p(buf, val);
2021 break;
2022 case 4:
2023 /* 32 bit read access */
2024 error |= io_mem_read(mr, addr1, &val, 4);
2025 stl_p(buf, val);
2026 break;
2027 case 2:
2028 /* 16 bit read access */
2029 error |= io_mem_read(mr, addr1, &val, 2);
2030 stw_p(buf, val);
2031 break;
2032 case 1:
2033 /* 8 bit read access */
2034 error |= io_mem_read(mr, addr1, &val, 1);
2035 stb_p(buf, val);
2036 break;
2037 default:
2038 abort();
2039 }
2040 } else {
2041 /* RAM case */
2042 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2043 memcpy(buf, ptr, l);
2044 }
2045 }
2046 len -= l;
2047 buf += l;
2048 addr += l;
2049 }
2050
2051 return error;
2052 }
2053
2054 bool address_space_write(AddressSpace *as, hwaddr addr,
2055 const uint8_t *buf, int len)
2056 {
2057 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2058 }
2059
2060 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2061 {
2062 return address_space_rw(as, addr, buf, len, false);
2063 }
2064
2065
2066 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2067 int len, int is_write)
2068 {
2069 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2070 }
2071
2072 enum write_rom_type {
2073 WRITE_DATA,
2074 FLUSH_CACHE,
2075 };
2076
2077 static inline void cpu_physical_memory_write_rom_internal(
2078 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2079 {
2080 hwaddr l;
2081 uint8_t *ptr;
2082 hwaddr addr1;
2083 MemoryRegion *mr;
2084
2085 while (len > 0) {
2086 l = len;
2087 mr = address_space_translate(&address_space_memory,
2088 addr, &addr1, &l, true);
2089
2090 if (!(memory_region_is_ram(mr) ||
2091 memory_region_is_romd(mr))) {
2092 /* do nothing */
2093 } else {
2094 addr1 += memory_region_get_ram_addr(mr);
2095 /* ROM/RAM case */
2096 ptr = qemu_get_ram_ptr(addr1);
2097 switch (type) {
2098 case WRITE_DATA:
2099 memcpy(ptr, buf, l);
2100 invalidate_and_set_dirty(addr1, l);
2101 break;
2102 case FLUSH_CACHE:
2103 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2104 break;
2105 }
2106 }
2107 len -= l;
2108 buf += l;
2109 addr += l;
2110 }
2111 }
2112
2113 /* used for ROM loading : can write in RAM and ROM */
2114 void cpu_physical_memory_write_rom(hwaddr addr,
2115 const uint8_t *buf, int len)
2116 {
2117 cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
2118 }
2119
2120 void cpu_flush_icache_range(hwaddr start, int len)
2121 {
2122 /*
2123 * This function should do the same thing as an icache flush that was
2124 * triggered from within the guest. For TCG we are always cache coherent,
2125 * so there is no need to flush anything. For KVM / Xen we need to flush
2126 * the host's instruction cache at least.
2127 */
2128 if (tcg_enabled()) {
2129 return;
2130 }
2131
2132 cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
2133 }
2134
2135 typedef struct {
2136 MemoryRegion *mr;
2137 void *buffer;
2138 hwaddr addr;
2139 hwaddr len;
2140 } BounceBuffer;
2141
2142 static BounceBuffer bounce;
2143
2144 typedef struct MapClient {
2145 void *opaque;
2146 void (*callback)(void *opaque);
2147 QLIST_ENTRY(MapClient) link;
2148 } MapClient;
2149
2150 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2151 = QLIST_HEAD_INITIALIZER(map_client_list);
2152
2153 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2154 {
2155 MapClient *client = g_malloc(sizeof(*client));
2156
2157 client->opaque = opaque;
2158 client->callback = callback;
2159 QLIST_INSERT_HEAD(&map_client_list, client, link);
2160 return client;
2161 }
2162
2163 static void cpu_unregister_map_client(void *_client)
2164 {
2165 MapClient *client = (MapClient *)_client;
2166
2167 QLIST_REMOVE(client, link);
2168 g_free(client);
2169 }
2170
2171 static void cpu_notify_map_clients(void)
2172 {
2173 MapClient *client;
2174
2175 while (!QLIST_EMPTY(&map_client_list)) {
2176 client = QLIST_FIRST(&map_client_list);
2177 client->callback(client->opaque);
2178 cpu_unregister_map_client(client);
2179 }
2180 }
2181
2182 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2183 {
2184 MemoryRegion *mr;
2185 hwaddr l, xlat;
2186
2187 while (len > 0) {
2188 l = len;
2189 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2190 if (!memory_access_is_direct(mr, is_write)) {
2191 l = memory_access_size(mr, l, addr);
2192 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2193 return false;
2194 }
2195 }
2196
2197 len -= l;
2198 addr += l;
2199 }
2200 return true;
2201 }
2202
2203 /* Map a physical memory region into a host virtual address.
2204 * May map a subset of the requested range, given by and returned in *plen.
2205 * May return NULL if resources needed to perform the mapping are exhausted.
2206 * Use only for reads OR writes - not for read-modify-write operations.
2207 * Use cpu_register_map_client() to know when retrying the map operation is
2208 * likely to succeed.
2209 */
2210 void *address_space_map(AddressSpace *as,
2211 hwaddr addr,
2212 hwaddr *plen,
2213 bool is_write)
2214 {
2215 hwaddr len = *plen;
2216 hwaddr done = 0;
2217 hwaddr l, xlat, base;
2218 MemoryRegion *mr, *this_mr;
2219 ram_addr_t raddr;
2220
2221 if (len == 0) {
2222 return NULL;
2223 }
2224
2225 l = len;
2226 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2227 if (!memory_access_is_direct(mr, is_write)) {
2228 if (bounce.buffer) {
2229 return NULL;
2230 }
2231 /* Avoid unbounded allocations */
2232 l = MIN(l, TARGET_PAGE_SIZE);
2233 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2234 bounce.addr = addr;
2235 bounce.len = l;
2236
2237 memory_region_ref(mr);
2238 bounce.mr = mr;
2239 if (!is_write) {
2240 address_space_read(as, addr, bounce.buffer, l);
2241 }
2242
2243 *plen = l;
2244 return bounce.buffer;
2245 }
2246
2247 base = xlat;
2248 raddr = memory_region_get_ram_addr(mr);
2249
2250 for (;;) {
2251 len -= l;
2252 addr += l;
2253 done += l;
2254 if (len == 0) {
2255 break;
2256 }
2257
2258 l = len;
2259 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2260 if (this_mr != mr || xlat != base + done) {
2261 break;
2262 }
2263 }
2264
2265 memory_region_ref(mr);
2266 *plen = done;
2267 return qemu_ram_ptr_length(raddr + base, plen);
2268 }
2269
2270 /* Unmaps a memory region previously mapped by address_space_map().
2271 * Will also mark the memory as dirty if is_write == 1. access_len gives
2272 * the amount of memory that was actually read or written by the caller.
2273 */
2274 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2275 int is_write, hwaddr access_len)
2276 {
2277 if (buffer != bounce.buffer) {
2278 MemoryRegion *mr;
2279 ram_addr_t addr1;
2280
2281 mr = qemu_ram_addr_from_host(buffer, &addr1);
2282 assert(mr != NULL);
2283 if (is_write) {
2284 while (access_len) {
2285 unsigned l;
2286 l = TARGET_PAGE_SIZE;
2287 if (l > access_len)
2288 l = access_len;
2289 invalidate_and_set_dirty(addr1, l);
2290 addr1 += l;
2291 access_len -= l;
2292 }
2293 }
2294 if (xen_enabled()) {
2295 xen_invalidate_map_cache_entry(buffer);
2296 }
2297 memory_region_unref(mr);
2298 return;
2299 }
2300 if (is_write) {
2301 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2302 }
2303 qemu_vfree(bounce.buffer);
2304 bounce.buffer = NULL;
2305 memory_region_unref(bounce.mr);
2306 cpu_notify_map_clients();
2307 }
2308
2309 void *cpu_physical_memory_map(hwaddr addr,
2310 hwaddr *plen,
2311 int is_write)
2312 {
2313 return address_space_map(&address_space_memory, addr, plen, is_write);
2314 }
2315
2316 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2317 int is_write, hwaddr access_len)
2318 {
2319 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2320 }
2321
2322 /* warning: addr must be aligned */
2323 static inline uint32_t ldl_phys_internal(hwaddr addr,
2324 enum device_endian endian)
2325 {
2326 uint8_t *ptr;
2327 uint64_t val;
2328 MemoryRegion *mr;
2329 hwaddr l = 4;
2330 hwaddr addr1;
2331
2332 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2333 false);
2334 if (l < 4 || !memory_access_is_direct(mr, false)) {
2335 /* I/O case */
2336 io_mem_read(mr, addr1, &val, 4);
2337 #if defined(TARGET_WORDS_BIGENDIAN)
2338 if (endian == DEVICE_LITTLE_ENDIAN) {
2339 val = bswap32(val);
2340 }
2341 #else
2342 if (endian == DEVICE_BIG_ENDIAN) {
2343 val = bswap32(val);
2344 }
2345 #endif
2346 } else {
2347 /* RAM case */
2348 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2349 & TARGET_PAGE_MASK)
2350 + addr1);
2351 switch (endian) {
2352 case DEVICE_LITTLE_ENDIAN:
2353 val = ldl_le_p(ptr);
2354 break;
2355 case DEVICE_BIG_ENDIAN:
2356 val = ldl_be_p(ptr);
2357 break;
2358 default:
2359 val = ldl_p(ptr);
2360 break;
2361 }
2362 }
2363 return val;
2364 }
2365
2366 uint32_t ldl_phys(hwaddr addr)
2367 {
2368 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2369 }
2370
2371 uint32_t ldl_le_phys(hwaddr addr)
2372 {
2373 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2374 }
2375
2376 uint32_t ldl_be_phys(hwaddr addr)
2377 {
2378 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2379 }
2380
2381 /* warning: addr must be aligned */
2382 static inline uint64_t ldq_phys_internal(hwaddr addr,
2383 enum device_endian endian)
2384 {
2385 uint8_t *ptr;
2386 uint64_t val;
2387 MemoryRegion *mr;
2388 hwaddr l = 8;
2389 hwaddr addr1;
2390
2391 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2392 false);
2393 if (l < 8 || !memory_access_is_direct(mr, false)) {
2394 /* I/O case */
2395 io_mem_read(mr, addr1, &val, 8);
2396 #if defined(TARGET_WORDS_BIGENDIAN)
2397 if (endian == DEVICE_LITTLE_ENDIAN) {
2398 val = bswap64(val);
2399 }
2400 #else
2401 if (endian == DEVICE_BIG_ENDIAN) {
2402 val = bswap64(val);
2403 }
2404 #endif
2405 } else {
2406 /* RAM case */
2407 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2408 & TARGET_PAGE_MASK)
2409 + addr1);
2410 switch (endian) {
2411 case DEVICE_LITTLE_ENDIAN:
2412 val = ldq_le_p(ptr);
2413 break;
2414 case DEVICE_BIG_ENDIAN:
2415 val = ldq_be_p(ptr);
2416 break;
2417 default:
2418 val = ldq_p(ptr);
2419 break;
2420 }
2421 }
2422 return val;
2423 }
2424
2425 uint64_t ldq_phys(hwaddr addr)
2426 {
2427 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2428 }
2429
2430 uint64_t ldq_le_phys(hwaddr addr)
2431 {
2432 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2433 }
2434
2435 uint64_t ldq_be_phys(hwaddr addr)
2436 {
2437 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2438 }
2439
2440 /* XXX: optimize */
2441 uint32_t ldub_phys(hwaddr addr)
2442 {
2443 uint8_t val;
2444 cpu_physical_memory_read(addr, &val, 1);
2445 return val;
2446 }
2447
2448 /* warning: addr must be aligned */
2449 static inline uint32_t lduw_phys_internal(hwaddr addr,
2450 enum device_endian endian)
2451 {
2452 uint8_t *ptr;
2453 uint64_t val;
2454 MemoryRegion *mr;
2455 hwaddr l = 2;
2456 hwaddr addr1;
2457
2458 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2459 false);
2460 if (l < 2 || !memory_access_is_direct(mr, false)) {
2461 /* I/O case */
2462 io_mem_read(mr, addr1, &val, 2);
2463 #if defined(TARGET_WORDS_BIGENDIAN)
2464 if (endian == DEVICE_LITTLE_ENDIAN) {
2465 val = bswap16(val);
2466 }
2467 #else
2468 if (endian == DEVICE_BIG_ENDIAN) {
2469 val = bswap16(val);
2470 }
2471 #endif
2472 } else {
2473 /* RAM case */
2474 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2475 & TARGET_PAGE_MASK)
2476 + addr1);
2477 switch (endian) {
2478 case DEVICE_LITTLE_ENDIAN:
2479 val = lduw_le_p(ptr);
2480 break;
2481 case DEVICE_BIG_ENDIAN:
2482 val = lduw_be_p(ptr);
2483 break;
2484 default:
2485 val = lduw_p(ptr);
2486 break;
2487 }
2488 }
2489 return val;
2490 }
2491
2492 uint32_t lduw_phys(hwaddr addr)
2493 {
2494 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2495 }
2496
2497 uint32_t lduw_le_phys(hwaddr addr)
2498 {
2499 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2500 }
2501
2502 uint32_t lduw_be_phys(hwaddr addr)
2503 {
2504 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2505 }
2506
2507 /* warning: addr must be aligned. The ram page is not masked as dirty
2508 and the code inside is not invalidated. It is useful if the dirty
2509 bits are used to track modified PTEs */
2510 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2511 {
2512 uint8_t *ptr;
2513 MemoryRegion *mr;
2514 hwaddr l = 4;
2515 hwaddr addr1;
2516
2517 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2518 true);
2519 if (l < 4 || !memory_access_is_direct(mr, true)) {
2520 io_mem_write(mr, addr1, val, 4);
2521 } else {
2522 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2523 ptr = qemu_get_ram_ptr(addr1);
2524 stl_p(ptr, val);
2525
2526 if (unlikely(in_migration)) {
2527 if (!cpu_physical_memory_is_dirty(addr1)) {
2528 /* invalidate code */
2529 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2530 /* set dirty bit */
2531 cpu_physical_memory_set_dirty_flag(addr1,
2532 DIRTY_MEMORY_MIGRATION);
2533 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2534 }
2535 }
2536 }
2537 }
2538
2539 /* warning: addr must be aligned */
2540 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2541 enum device_endian endian)
2542 {
2543 uint8_t *ptr;
2544 MemoryRegion *mr;
2545 hwaddr l = 4;
2546 hwaddr addr1;
2547
2548 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2549 true);
2550 if (l < 4 || !memory_access_is_direct(mr, true)) {
2551 #if defined(TARGET_WORDS_BIGENDIAN)
2552 if (endian == DEVICE_LITTLE_ENDIAN) {
2553 val = bswap32(val);
2554 }
2555 #else
2556 if (endian == DEVICE_BIG_ENDIAN) {
2557 val = bswap32(val);
2558 }
2559 #endif
2560 io_mem_write(mr, addr1, val, 4);
2561 } else {
2562 /* RAM case */
2563 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2564 ptr = qemu_get_ram_ptr(addr1);
2565 switch (endian) {
2566 case DEVICE_LITTLE_ENDIAN:
2567 stl_le_p(ptr, val);
2568 break;
2569 case DEVICE_BIG_ENDIAN:
2570 stl_be_p(ptr, val);
2571 break;
2572 default:
2573 stl_p(ptr, val);
2574 break;
2575 }
2576 invalidate_and_set_dirty(addr1, 4);
2577 }
2578 }
2579
2580 void stl_phys(hwaddr addr, uint32_t val)
2581 {
2582 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2583 }
2584
2585 void stl_le_phys(hwaddr addr, uint32_t val)
2586 {
2587 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2588 }
2589
2590 void stl_be_phys(hwaddr addr, uint32_t val)
2591 {
2592 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2593 }
2594
2595 /* XXX: optimize */
2596 void stb_phys(hwaddr addr, uint32_t val)
2597 {
2598 uint8_t v = val;
2599 cpu_physical_memory_write(addr, &v, 1);
2600 }
2601
2602 /* warning: addr must be aligned */
2603 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2604 enum device_endian endian)
2605 {
2606 uint8_t *ptr;
2607 MemoryRegion *mr;
2608 hwaddr l = 2;
2609 hwaddr addr1;
2610
2611 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2612 true);
2613 if (l < 2 || !memory_access_is_direct(mr, true)) {
2614 #if defined(TARGET_WORDS_BIGENDIAN)
2615 if (endian == DEVICE_LITTLE_ENDIAN) {
2616 val = bswap16(val);
2617 }
2618 #else
2619 if (endian == DEVICE_BIG_ENDIAN) {
2620 val = bswap16(val);
2621 }
2622 #endif
2623 io_mem_write(mr, addr1, val, 2);
2624 } else {
2625 /* RAM case */
2626 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2627 ptr = qemu_get_ram_ptr(addr1);
2628 switch (endian) {
2629 case DEVICE_LITTLE_ENDIAN:
2630 stw_le_p(ptr, val);
2631 break;
2632 case DEVICE_BIG_ENDIAN:
2633 stw_be_p(ptr, val);
2634 break;
2635 default:
2636 stw_p(ptr, val);
2637 break;
2638 }
2639 invalidate_and_set_dirty(addr1, 2);
2640 }
2641 }
2642
2643 void stw_phys(hwaddr addr, uint32_t val)
2644 {
2645 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2646 }
2647
2648 void stw_le_phys(hwaddr addr, uint32_t val)
2649 {
2650 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2651 }
2652
2653 void stw_be_phys(hwaddr addr, uint32_t val)
2654 {
2655 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2656 }
2657
2658 /* XXX: optimize */
2659 void stq_phys(hwaddr addr, uint64_t val)
2660 {
2661 val = tswap64(val);
2662 cpu_physical_memory_write(addr, &val, 8);
2663 }
2664
2665 void stq_le_phys(hwaddr addr, uint64_t val)
2666 {
2667 val = cpu_to_le64(val);
2668 cpu_physical_memory_write(addr, &val, 8);
2669 }
2670
2671 void stq_be_phys(hwaddr addr, uint64_t val)
2672 {
2673 val = cpu_to_be64(val);
2674 cpu_physical_memory_write(addr, &val, 8);
2675 }
2676
2677 /* virtual memory access for debug (includes writing to ROM) */
2678 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2679 uint8_t *buf, int len, int is_write)
2680 {
2681 int l;
2682 hwaddr phys_addr;
2683 target_ulong page;
2684
2685 while (len > 0) {
2686 page = addr & TARGET_PAGE_MASK;
2687 phys_addr = cpu_get_phys_page_debug(cpu, page);
2688 /* if no physical page mapped, return an error */
2689 if (phys_addr == -1)
2690 return -1;
2691 l = (page + TARGET_PAGE_SIZE) - addr;
2692 if (l > len)
2693 l = len;
2694 phys_addr += (addr & ~TARGET_PAGE_MASK);
2695 if (is_write)
2696 cpu_physical_memory_write_rom(phys_addr, buf, l);
2697 else
2698 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2699 len -= l;
2700 buf += l;
2701 addr += l;
2702 }
2703 return 0;
2704 }
2705 #endif
2706
2707 #if !defined(CONFIG_USER_ONLY)
2708
2709 /*
2710 * A helper function for the _utterly broken_ virtio device model to find out if
2711 * it's running on a big endian machine. Don't do this at home kids!
2712 */
2713 bool virtio_is_big_endian(void);
2714 bool virtio_is_big_endian(void)
2715 {
2716 #if defined(TARGET_WORDS_BIGENDIAN)
2717 return true;
2718 #else
2719 return false;
2720 #endif
2721 }
2722
2723 #endif
2724
2725 #ifndef CONFIG_USER_ONLY
2726 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2727 {
2728 MemoryRegion*mr;
2729 hwaddr l = 1;
2730
2731 mr = address_space_translate(&address_space_memory,
2732 phys_addr, &phys_addr, &l, false);
2733
2734 return !(memory_region_is_ram(mr) ||
2735 memory_region_is_romd(mr));
2736 }
2737
2738 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2739 {
2740 RAMBlock *block;
2741
2742 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2743 func(block->host, block->offset, block->length, opaque);
2744 }
2745 }
2746 #endif