]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
dataplane: fix implicit IOThread refcount
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 #endif
74
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
83
84 #if !defined(CONFIG_USER_ONLY)
85
86 typedef struct PhysPageEntry PhysPageEntry;
87
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
93 };
94
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
99
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
102
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104
105 typedef PhysPageEntry Node[P_L2_SIZE];
106
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
115
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
119 */
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
123 };
124
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
132
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
137
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
141
142 static MemoryRegion io_mem_watch;
143 #endif
144
145 #if !defined(CONFIG_USER_ONLY)
146
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148 {
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153 }
154 }
155
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
157 {
158 unsigned i;
159 uint32_t ret;
160
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167 }
168 return ret;
169 }
170
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
174 {
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
186 }
187 }
188 } else {
189 p = map->nodes[lp->ptr];
190 }
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 }
202 ++lp;
203 }
204 }
205
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
209 {
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
214 }
215
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
218 */
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
220 {
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
225
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
228 }
229
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
234 }
235
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
240 }
241 }
242
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
246 }
247
248 assert(valid_ptr < P_L2_SIZE);
249
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
253 }
254
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
262 */
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
266 }
267 }
268
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
270 {
271 DECLARE_BITMAP(compacted, nodes_nb);
272
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 }
276 }
277
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
280 {
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
284
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
288 }
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291 }
292
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
299 }
300 }
301
302 bool memory_region_is_unassigned(MemoryRegion *mr)
303 {
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
306 }
307
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
311 {
312 MemoryRegionSection *section;
313 subpage_t *subpage;
314
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 }
320 return section;
321 }
322
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
326 {
327 MemoryRegionSection *section;
328 Int128 diff;
329
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
333
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
336
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
340 }
341
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
343 {
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
346 }
347 if (memory_region_is_romd(mr)) {
348 return !is_write;
349 }
350
351 return false;
352 }
353
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
356 bool is_write)
357 {
358 IOMMUTLBEntry iotlb;
359 MemoryRegionSection *section;
360 MemoryRegion *mr;
361 hwaddr len = *plen;
362
363 for (;;) {
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
365 mr = section->mr;
366
367 if (!mr->iommu_ops) {
368 break;
369 }
370
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
377 break;
378 }
379
380 as = iotlb.target_as;
381 }
382
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
386 }
387
388 *plen = len;
389 *xlat = addr;
390 return mr;
391 }
392
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
395 hwaddr *plen)
396 {
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
399
400 assert(!section->mr->iommu_ops);
401 return section;
402 }
403 #endif
404
405 void cpu_exec_init_all(void)
406 {
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
409 memory_map_init();
410 io_mem_init();
411 #endif
412 }
413
414 #if !defined(CONFIG_USER_ONLY)
415
416 static int cpu_common_post_load(void *opaque, int version_id)
417 {
418 CPUState *cpu = opaque;
419
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu->env_ptr, 1);
424
425 return 0;
426 }
427
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
430 .version_id = 1,
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
438 }
439 };
440
441 #endif
442
443 CPUState *qemu_get_cpu(int index)
444 {
445 CPUState *cpu;
446
447 CPU_FOREACH(cpu) {
448 if (cpu->cpu_index == index) {
449 return cpu;
450 }
451 }
452
453 return NULL;
454 }
455
456 #if !defined(CONFIG_USER_ONLY)
457 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
458 {
459 /* We only support one address space per cpu at the moment. */
460 assert(cpu->as == as);
461
462 if (cpu->tcg_as_listener) {
463 memory_listener_unregister(cpu->tcg_as_listener);
464 } else {
465 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
466 }
467 cpu->tcg_as_listener->commit = tcg_commit;
468 memory_listener_register(cpu->tcg_as_listener, as);
469 }
470 #endif
471
472 void cpu_exec_init(CPUArchState *env)
473 {
474 CPUState *cpu = ENV_GET_CPU(env);
475 CPUClass *cc = CPU_GET_CLASS(cpu);
476 CPUState *some_cpu;
477 int cpu_index;
478
479 #if defined(CONFIG_USER_ONLY)
480 cpu_list_lock();
481 #endif
482 cpu_index = 0;
483 CPU_FOREACH(some_cpu) {
484 cpu_index++;
485 }
486 cpu->cpu_index = cpu_index;
487 cpu->numa_node = 0;
488 QTAILQ_INIT(&cpu->breakpoints);
489 QTAILQ_INIT(&cpu->watchpoints);
490 #ifndef CONFIG_USER_ONLY
491 cpu->as = &address_space_memory;
492 cpu->thread_id = qemu_get_thread_id();
493 #endif
494 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
495 #if defined(CONFIG_USER_ONLY)
496 cpu_list_unlock();
497 #endif
498 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
499 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
500 }
501 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
502 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
503 cpu_save, cpu_load, env);
504 assert(cc->vmsd == NULL);
505 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
506 #endif
507 if (cc->vmsd != NULL) {
508 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
509 }
510 }
511
512 #if defined(TARGET_HAS_ICE)
513 #if defined(CONFIG_USER_ONLY)
514 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
515 {
516 tb_invalidate_phys_page_range(pc, pc + 1, 0);
517 }
518 #else
519 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
520 {
521 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
522 if (phys != -1) {
523 tb_invalidate_phys_addr(cpu->as,
524 phys | (pc & ~TARGET_PAGE_MASK));
525 }
526 }
527 #endif
528 #endif /* TARGET_HAS_ICE */
529
530 #if defined(CONFIG_USER_ONLY)
531 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
532
533 {
534 }
535
536 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
537 int flags, CPUWatchpoint **watchpoint)
538 {
539 return -ENOSYS;
540 }
541 #else
542 /* Add a watchpoint. */
543 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
544 int flags, CPUWatchpoint **watchpoint)
545 {
546 vaddr len_mask = ~(len - 1);
547 CPUWatchpoint *wp;
548
549 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
550 if ((len & (len - 1)) || (addr & ~len_mask) ||
551 len == 0 || len > TARGET_PAGE_SIZE) {
552 error_report("tried to set invalid watchpoint at %"
553 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
554 return -EINVAL;
555 }
556 wp = g_malloc(sizeof(*wp));
557
558 wp->vaddr = addr;
559 wp->len_mask = len_mask;
560 wp->flags = flags;
561
562 /* keep all GDB-injected watchpoints in front */
563 if (flags & BP_GDB) {
564 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
565 } else {
566 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
567 }
568
569 tlb_flush_page(cpu, addr);
570
571 if (watchpoint)
572 *watchpoint = wp;
573 return 0;
574 }
575
576 /* Remove a specific watchpoint. */
577 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
578 int flags)
579 {
580 vaddr len_mask = ~(len - 1);
581 CPUWatchpoint *wp;
582
583 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
584 if (addr == wp->vaddr && len_mask == wp->len_mask
585 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
586 cpu_watchpoint_remove_by_ref(cpu, wp);
587 return 0;
588 }
589 }
590 return -ENOENT;
591 }
592
593 /* Remove a specific watchpoint by reference. */
594 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
595 {
596 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
597
598 tlb_flush_page(cpu, watchpoint->vaddr);
599
600 g_free(watchpoint);
601 }
602
603 /* Remove all matching watchpoints. */
604 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
605 {
606 CPUWatchpoint *wp, *next;
607
608 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
609 if (wp->flags & mask) {
610 cpu_watchpoint_remove_by_ref(cpu, wp);
611 }
612 }
613 }
614 #endif
615
616 /* Add a breakpoint. */
617 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
618 CPUBreakpoint **breakpoint)
619 {
620 #if defined(TARGET_HAS_ICE)
621 CPUBreakpoint *bp;
622
623 bp = g_malloc(sizeof(*bp));
624
625 bp->pc = pc;
626 bp->flags = flags;
627
628 /* keep all GDB-injected breakpoints in front */
629 if (flags & BP_GDB) {
630 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
631 } else {
632 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
633 }
634
635 breakpoint_invalidate(cpu, pc);
636
637 if (breakpoint) {
638 *breakpoint = bp;
639 }
640 return 0;
641 #else
642 return -ENOSYS;
643 #endif
644 }
645
646 /* Remove a specific breakpoint. */
647 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
648 {
649 #if defined(TARGET_HAS_ICE)
650 CPUBreakpoint *bp;
651
652 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
653 if (bp->pc == pc && bp->flags == flags) {
654 cpu_breakpoint_remove_by_ref(cpu, bp);
655 return 0;
656 }
657 }
658 return -ENOENT;
659 #else
660 return -ENOSYS;
661 #endif
662 }
663
664 /* Remove a specific breakpoint by reference. */
665 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
666 {
667 #if defined(TARGET_HAS_ICE)
668 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
669
670 breakpoint_invalidate(cpu, breakpoint->pc);
671
672 g_free(breakpoint);
673 #endif
674 }
675
676 /* Remove all matching breakpoints. */
677 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
678 {
679 #if defined(TARGET_HAS_ICE)
680 CPUBreakpoint *bp, *next;
681
682 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
683 if (bp->flags & mask) {
684 cpu_breakpoint_remove_by_ref(cpu, bp);
685 }
686 }
687 #endif
688 }
689
690 /* enable or disable single step mode. EXCP_DEBUG is returned by the
691 CPU loop after each instruction */
692 void cpu_single_step(CPUState *cpu, int enabled)
693 {
694 #if defined(TARGET_HAS_ICE)
695 if (cpu->singlestep_enabled != enabled) {
696 cpu->singlestep_enabled = enabled;
697 if (kvm_enabled()) {
698 kvm_update_guest_debug(cpu, 0);
699 } else {
700 /* must flush all the translated code to avoid inconsistencies */
701 /* XXX: only flush what is necessary */
702 CPUArchState *env = cpu->env_ptr;
703 tb_flush(env);
704 }
705 }
706 #endif
707 }
708
709 void cpu_abort(CPUState *cpu, const char *fmt, ...)
710 {
711 va_list ap;
712 va_list ap2;
713
714 va_start(ap, fmt);
715 va_copy(ap2, ap);
716 fprintf(stderr, "qemu: fatal: ");
717 vfprintf(stderr, fmt, ap);
718 fprintf(stderr, "\n");
719 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
720 if (qemu_log_enabled()) {
721 qemu_log("qemu: fatal: ");
722 qemu_log_vprintf(fmt, ap2);
723 qemu_log("\n");
724 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
725 qemu_log_flush();
726 qemu_log_close();
727 }
728 va_end(ap2);
729 va_end(ap);
730 #if defined(CONFIG_USER_ONLY)
731 {
732 struct sigaction act;
733 sigfillset(&act.sa_mask);
734 act.sa_handler = SIG_DFL;
735 sigaction(SIGABRT, &act, NULL);
736 }
737 #endif
738 abort();
739 }
740
741 #if !defined(CONFIG_USER_ONLY)
742 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
743 {
744 RAMBlock *block;
745
746 /* The list is protected by the iothread lock here. */
747 block = ram_list.mru_block;
748 if (block && addr - block->offset < block->length) {
749 goto found;
750 }
751 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
752 if (addr - block->offset < block->length) {
753 goto found;
754 }
755 }
756
757 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
758 abort();
759
760 found:
761 ram_list.mru_block = block;
762 return block;
763 }
764
765 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
766 {
767 ram_addr_t start1;
768 RAMBlock *block;
769 ram_addr_t end;
770
771 end = TARGET_PAGE_ALIGN(start + length);
772 start &= TARGET_PAGE_MASK;
773
774 block = qemu_get_ram_block(start);
775 assert(block == qemu_get_ram_block(end - 1));
776 start1 = (uintptr_t)block->host + (start - block->offset);
777 cpu_tlb_reset_dirty_all(start1, length);
778 }
779
780 /* Note: start and end must be within the same ram block. */
781 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
782 unsigned client)
783 {
784 if (length == 0)
785 return;
786 cpu_physical_memory_clear_dirty_range(start, length, client);
787
788 if (tcg_enabled()) {
789 tlb_reset_dirty_range_all(start, length);
790 }
791 }
792
793 static void cpu_physical_memory_set_dirty_tracking(bool enable)
794 {
795 in_migration = enable;
796 }
797
798 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
799 MemoryRegionSection *section,
800 target_ulong vaddr,
801 hwaddr paddr, hwaddr xlat,
802 int prot,
803 target_ulong *address)
804 {
805 hwaddr iotlb;
806 CPUWatchpoint *wp;
807
808 if (memory_region_is_ram(section->mr)) {
809 /* Normal RAM. */
810 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
811 + xlat;
812 if (!section->readonly) {
813 iotlb |= PHYS_SECTION_NOTDIRTY;
814 } else {
815 iotlb |= PHYS_SECTION_ROM;
816 }
817 } else {
818 iotlb = section - section->address_space->dispatch->map.sections;
819 iotlb += xlat;
820 }
821
822 /* Make accesses to pages with watchpoints go via the
823 watchpoint trap routines. */
824 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
825 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
826 /* Avoid trapping reads of pages with a write breakpoint. */
827 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
828 iotlb = PHYS_SECTION_WATCH + paddr;
829 *address |= TLB_MMIO;
830 break;
831 }
832 }
833 }
834
835 return iotlb;
836 }
837 #endif /* defined(CONFIG_USER_ONLY) */
838
839 #if !defined(CONFIG_USER_ONLY)
840
841 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
842 uint16_t section);
843 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
844
845 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
846
847 /*
848 * Set a custom physical guest memory alloator.
849 * Accelerators with unusual needs may need this. Hopefully, we can
850 * get rid of it eventually.
851 */
852 void phys_mem_set_alloc(void *(*alloc)(size_t))
853 {
854 phys_mem_alloc = alloc;
855 }
856
857 static uint16_t phys_section_add(PhysPageMap *map,
858 MemoryRegionSection *section)
859 {
860 /* The physical section number is ORed with a page-aligned
861 * pointer to produce the iotlb entries. Thus it should
862 * never overflow into the page-aligned value.
863 */
864 assert(map->sections_nb < TARGET_PAGE_SIZE);
865
866 if (map->sections_nb == map->sections_nb_alloc) {
867 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
868 map->sections = g_renew(MemoryRegionSection, map->sections,
869 map->sections_nb_alloc);
870 }
871 map->sections[map->sections_nb] = *section;
872 memory_region_ref(section->mr);
873 return map->sections_nb++;
874 }
875
876 static void phys_section_destroy(MemoryRegion *mr)
877 {
878 memory_region_unref(mr);
879
880 if (mr->subpage) {
881 subpage_t *subpage = container_of(mr, subpage_t, iomem);
882 memory_region_destroy(&subpage->iomem);
883 g_free(subpage);
884 }
885 }
886
887 static void phys_sections_free(PhysPageMap *map)
888 {
889 while (map->sections_nb > 0) {
890 MemoryRegionSection *section = &map->sections[--map->sections_nb];
891 phys_section_destroy(section->mr);
892 }
893 g_free(map->sections);
894 g_free(map->nodes);
895 }
896
897 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
898 {
899 subpage_t *subpage;
900 hwaddr base = section->offset_within_address_space
901 & TARGET_PAGE_MASK;
902 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
903 d->map.nodes, d->map.sections);
904 MemoryRegionSection subsection = {
905 .offset_within_address_space = base,
906 .size = int128_make64(TARGET_PAGE_SIZE),
907 };
908 hwaddr start, end;
909
910 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
911
912 if (!(existing->mr->subpage)) {
913 subpage = subpage_init(d->as, base);
914 subsection.address_space = d->as;
915 subsection.mr = &subpage->iomem;
916 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
917 phys_section_add(&d->map, &subsection));
918 } else {
919 subpage = container_of(existing->mr, subpage_t, iomem);
920 }
921 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
922 end = start + int128_get64(section->size) - 1;
923 subpage_register(subpage, start, end,
924 phys_section_add(&d->map, section));
925 }
926
927
928 static void register_multipage(AddressSpaceDispatch *d,
929 MemoryRegionSection *section)
930 {
931 hwaddr start_addr = section->offset_within_address_space;
932 uint16_t section_index = phys_section_add(&d->map, section);
933 uint64_t num_pages = int128_get64(int128_rshift(section->size,
934 TARGET_PAGE_BITS));
935
936 assert(num_pages);
937 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
938 }
939
940 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
941 {
942 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
943 AddressSpaceDispatch *d = as->next_dispatch;
944 MemoryRegionSection now = *section, remain = *section;
945 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
946
947 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
948 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
949 - now.offset_within_address_space;
950
951 now.size = int128_min(int128_make64(left), now.size);
952 register_subpage(d, &now);
953 } else {
954 now.size = int128_zero();
955 }
956 while (int128_ne(remain.size, now.size)) {
957 remain.size = int128_sub(remain.size, now.size);
958 remain.offset_within_address_space += int128_get64(now.size);
959 remain.offset_within_region += int128_get64(now.size);
960 now = remain;
961 if (int128_lt(remain.size, page_size)) {
962 register_subpage(d, &now);
963 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
964 now.size = page_size;
965 register_subpage(d, &now);
966 } else {
967 now.size = int128_and(now.size, int128_neg(page_size));
968 register_multipage(d, &now);
969 }
970 }
971 }
972
973 void qemu_flush_coalesced_mmio_buffer(void)
974 {
975 if (kvm_enabled())
976 kvm_flush_coalesced_mmio_buffer();
977 }
978
979 void qemu_mutex_lock_ramlist(void)
980 {
981 qemu_mutex_lock(&ram_list.mutex);
982 }
983
984 void qemu_mutex_unlock_ramlist(void)
985 {
986 qemu_mutex_unlock(&ram_list.mutex);
987 }
988
989 #ifdef __linux__
990
991 #include <sys/vfs.h>
992
993 #define HUGETLBFS_MAGIC 0x958458f6
994
995 static long gethugepagesize(const char *path)
996 {
997 struct statfs fs;
998 int ret;
999
1000 do {
1001 ret = statfs(path, &fs);
1002 } while (ret != 0 && errno == EINTR);
1003
1004 if (ret != 0) {
1005 perror(path);
1006 return 0;
1007 }
1008
1009 if (fs.f_type != HUGETLBFS_MAGIC)
1010 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1011
1012 return fs.f_bsize;
1013 }
1014
1015 static sigjmp_buf sigjump;
1016
1017 static void sigbus_handler(int signal)
1018 {
1019 siglongjmp(sigjump, 1);
1020 }
1021
1022 static void *file_ram_alloc(RAMBlock *block,
1023 ram_addr_t memory,
1024 const char *path)
1025 {
1026 char *filename;
1027 char *sanitized_name;
1028 char *c;
1029 void *area;
1030 int fd;
1031 unsigned long hpagesize;
1032
1033 hpagesize = gethugepagesize(path);
1034 if (!hpagesize) {
1035 goto error;
1036 }
1037
1038 if (memory < hpagesize) {
1039 return NULL;
1040 }
1041
1042 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1043 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1044 goto error;
1045 }
1046
1047 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1048 sanitized_name = g_strdup(block->mr->name);
1049 for (c = sanitized_name; *c != '\0'; c++) {
1050 if (*c == '/')
1051 *c = '_';
1052 }
1053
1054 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1055 sanitized_name);
1056 g_free(sanitized_name);
1057
1058 fd = mkstemp(filename);
1059 if (fd < 0) {
1060 perror("unable to create backing store for hugepages");
1061 g_free(filename);
1062 goto error;
1063 }
1064 unlink(filename);
1065 g_free(filename);
1066
1067 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1068
1069 /*
1070 * ftruncate is not supported by hugetlbfs in older
1071 * hosts, so don't bother bailing out on errors.
1072 * If anything goes wrong with it under other filesystems,
1073 * mmap will fail.
1074 */
1075 if (ftruncate(fd, memory))
1076 perror("ftruncate");
1077
1078 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1079 if (area == MAP_FAILED) {
1080 perror("file_ram_alloc: can't mmap RAM pages");
1081 close(fd);
1082 goto error;
1083 }
1084
1085 if (mem_prealloc) {
1086 int ret, i;
1087 struct sigaction act, oldact;
1088 sigset_t set, oldset;
1089
1090 memset(&act, 0, sizeof(act));
1091 act.sa_handler = &sigbus_handler;
1092 act.sa_flags = 0;
1093
1094 ret = sigaction(SIGBUS, &act, &oldact);
1095 if (ret) {
1096 perror("file_ram_alloc: failed to install signal handler");
1097 exit(1);
1098 }
1099
1100 /* unblock SIGBUS */
1101 sigemptyset(&set);
1102 sigaddset(&set, SIGBUS);
1103 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1104
1105 if (sigsetjmp(sigjump, 1)) {
1106 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1107 exit(1);
1108 }
1109
1110 /* MAP_POPULATE silently ignores failures */
1111 for (i = 0; i < (memory/hpagesize); i++) {
1112 memset(area + (hpagesize*i), 0, 1);
1113 }
1114
1115 ret = sigaction(SIGBUS, &oldact, NULL);
1116 if (ret) {
1117 perror("file_ram_alloc: failed to reinstall signal handler");
1118 exit(1);
1119 }
1120
1121 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1122 }
1123
1124 block->fd = fd;
1125 return area;
1126
1127 error:
1128 if (mem_prealloc) {
1129 exit(1);
1130 }
1131 return NULL;
1132 }
1133 #else
1134 static void *file_ram_alloc(RAMBlock *block,
1135 ram_addr_t memory,
1136 const char *path)
1137 {
1138 fprintf(stderr, "-mem-path not supported on this host\n");
1139 exit(1);
1140 }
1141 #endif
1142
1143 static ram_addr_t find_ram_offset(ram_addr_t size)
1144 {
1145 RAMBlock *block, *next_block;
1146 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1147
1148 assert(size != 0); /* it would hand out same offset multiple times */
1149
1150 if (QTAILQ_EMPTY(&ram_list.blocks))
1151 return 0;
1152
1153 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1154 ram_addr_t end, next = RAM_ADDR_MAX;
1155
1156 end = block->offset + block->length;
1157
1158 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1159 if (next_block->offset >= end) {
1160 next = MIN(next, next_block->offset);
1161 }
1162 }
1163 if (next - end >= size && next - end < mingap) {
1164 offset = end;
1165 mingap = next - end;
1166 }
1167 }
1168
1169 if (offset == RAM_ADDR_MAX) {
1170 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1171 (uint64_t)size);
1172 abort();
1173 }
1174
1175 return offset;
1176 }
1177
1178 ram_addr_t last_ram_offset(void)
1179 {
1180 RAMBlock *block;
1181 ram_addr_t last = 0;
1182
1183 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1184 last = MAX(last, block->offset + block->length);
1185
1186 return last;
1187 }
1188
1189 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1190 {
1191 int ret;
1192
1193 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1194 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1195 "dump-guest-core", true)) {
1196 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1197 if (ret) {
1198 perror("qemu_madvise");
1199 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1200 "but dump_guest_core=off specified\n");
1201 }
1202 }
1203 }
1204
1205 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1206 {
1207 RAMBlock *new_block, *block;
1208
1209 new_block = NULL;
1210 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1211 if (block->offset == addr) {
1212 new_block = block;
1213 break;
1214 }
1215 }
1216 assert(new_block);
1217 assert(!new_block->idstr[0]);
1218
1219 if (dev) {
1220 char *id = qdev_get_dev_path(dev);
1221 if (id) {
1222 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1223 g_free(id);
1224 }
1225 }
1226 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1227
1228 /* This assumes the iothread lock is taken here too. */
1229 qemu_mutex_lock_ramlist();
1230 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1231 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1232 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1233 new_block->idstr);
1234 abort();
1235 }
1236 }
1237 qemu_mutex_unlock_ramlist();
1238 }
1239
1240 static int memory_try_enable_merging(void *addr, size_t len)
1241 {
1242 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1243 /* disabled by the user */
1244 return 0;
1245 }
1246
1247 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1248 }
1249
1250 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1251 MemoryRegion *mr)
1252 {
1253 RAMBlock *block, *new_block;
1254 ram_addr_t old_ram_size, new_ram_size;
1255
1256 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1257
1258 size = TARGET_PAGE_ALIGN(size);
1259 new_block = g_malloc0(sizeof(*new_block));
1260 new_block->fd = -1;
1261
1262 /* This assumes the iothread lock is taken here too. */
1263 qemu_mutex_lock_ramlist();
1264 new_block->mr = mr;
1265 new_block->offset = find_ram_offset(size);
1266 if (host) {
1267 new_block->host = host;
1268 new_block->flags |= RAM_PREALLOC_MASK;
1269 } else if (xen_enabled()) {
1270 if (mem_path) {
1271 fprintf(stderr, "-mem-path not supported with Xen\n");
1272 exit(1);
1273 }
1274 xen_ram_alloc(new_block->offset, size, mr);
1275 } else {
1276 if (mem_path) {
1277 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1278 /*
1279 * file_ram_alloc() needs to allocate just like
1280 * phys_mem_alloc, but we haven't bothered to provide
1281 * a hook there.
1282 */
1283 fprintf(stderr,
1284 "-mem-path not supported with this accelerator\n");
1285 exit(1);
1286 }
1287 new_block->host = file_ram_alloc(new_block, size, mem_path);
1288 }
1289 if (!new_block->host) {
1290 new_block->host = phys_mem_alloc(size);
1291 if (!new_block->host) {
1292 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1293 new_block->mr->name, strerror(errno));
1294 exit(1);
1295 }
1296 memory_try_enable_merging(new_block->host, size);
1297 }
1298 }
1299 new_block->length = size;
1300
1301 /* Keep the list sorted from biggest to smallest block. */
1302 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1303 if (block->length < new_block->length) {
1304 break;
1305 }
1306 }
1307 if (block) {
1308 QTAILQ_INSERT_BEFORE(block, new_block, next);
1309 } else {
1310 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1311 }
1312 ram_list.mru_block = NULL;
1313
1314 ram_list.version++;
1315 qemu_mutex_unlock_ramlist();
1316
1317 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1318
1319 if (new_ram_size > old_ram_size) {
1320 int i;
1321 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1322 ram_list.dirty_memory[i] =
1323 bitmap_zero_extend(ram_list.dirty_memory[i],
1324 old_ram_size, new_ram_size);
1325 }
1326 }
1327 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1328
1329 qemu_ram_setup_dump(new_block->host, size);
1330 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1331 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1332
1333 if (kvm_enabled())
1334 kvm_setup_guest_memory(new_block->host, size);
1335
1336 return new_block->offset;
1337 }
1338
1339 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1340 {
1341 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1342 }
1343
1344 void qemu_ram_free_from_ptr(ram_addr_t addr)
1345 {
1346 RAMBlock *block;
1347
1348 /* This assumes the iothread lock is taken here too. */
1349 qemu_mutex_lock_ramlist();
1350 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1351 if (addr == block->offset) {
1352 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1353 ram_list.mru_block = NULL;
1354 ram_list.version++;
1355 g_free(block);
1356 break;
1357 }
1358 }
1359 qemu_mutex_unlock_ramlist();
1360 }
1361
1362 void qemu_ram_free(ram_addr_t addr)
1363 {
1364 RAMBlock *block;
1365
1366 /* This assumes the iothread lock is taken here too. */
1367 qemu_mutex_lock_ramlist();
1368 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1369 if (addr == block->offset) {
1370 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1371 ram_list.mru_block = NULL;
1372 ram_list.version++;
1373 if (block->flags & RAM_PREALLOC_MASK) {
1374 ;
1375 } else if (xen_enabled()) {
1376 xen_invalidate_map_cache_entry(block->host);
1377 #ifndef _WIN32
1378 } else if (block->fd >= 0) {
1379 munmap(block->host, block->length);
1380 close(block->fd);
1381 #endif
1382 } else {
1383 qemu_anon_ram_free(block->host, block->length);
1384 }
1385 g_free(block);
1386 break;
1387 }
1388 }
1389 qemu_mutex_unlock_ramlist();
1390
1391 }
1392
1393 #ifndef _WIN32
1394 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1395 {
1396 RAMBlock *block;
1397 ram_addr_t offset;
1398 int flags;
1399 void *area, *vaddr;
1400
1401 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1402 offset = addr - block->offset;
1403 if (offset < block->length) {
1404 vaddr = block->host + offset;
1405 if (block->flags & RAM_PREALLOC_MASK) {
1406 ;
1407 } else if (xen_enabled()) {
1408 abort();
1409 } else {
1410 flags = MAP_FIXED;
1411 munmap(vaddr, length);
1412 if (block->fd >= 0) {
1413 #ifdef MAP_POPULATE
1414 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1415 MAP_PRIVATE;
1416 #else
1417 flags |= MAP_PRIVATE;
1418 #endif
1419 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1420 flags, block->fd, offset);
1421 } else {
1422 /*
1423 * Remap needs to match alloc. Accelerators that
1424 * set phys_mem_alloc never remap. If they did,
1425 * we'd need a remap hook here.
1426 */
1427 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1428
1429 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1430 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1431 flags, -1, 0);
1432 }
1433 if (area != vaddr) {
1434 fprintf(stderr, "Could not remap addr: "
1435 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1436 length, addr);
1437 exit(1);
1438 }
1439 memory_try_enable_merging(vaddr, length);
1440 qemu_ram_setup_dump(vaddr, length);
1441 }
1442 return;
1443 }
1444 }
1445 }
1446 #endif /* !_WIN32 */
1447
1448 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1449 With the exception of the softmmu code in this file, this should
1450 only be used for local memory (e.g. video ram) that the device owns,
1451 and knows it isn't going to access beyond the end of the block.
1452
1453 It should not be used for general purpose DMA.
1454 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1455 */
1456 void *qemu_get_ram_ptr(ram_addr_t addr)
1457 {
1458 RAMBlock *block = qemu_get_ram_block(addr);
1459
1460 if (xen_enabled()) {
1461 /* We need to check if the requested address is in the RAM
1462 * because we don't want to map the entire memory in QEMU.
1463 * In that case just map until the end of the page.
1464 */
1465 if (block->offset == 0) {
1466 return xen_map_cache(addr, 0, 0);
1467 } else if (block->host == NULL) {
1468 block->host =
1469 xen_map_cache(block->offset, block->length, 1);
1470 }
1471 }
1472 return block->host + (addr - block->offset);
1473 }
1474
1475 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1476 * but takes a size argument */
1477 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1478 {
1479 if (*size == 0) {
1480 return NULL;
1481 }
1482 if (xen_enabled()) {
1483 return xen_map_cache(addr, *size, 1);
1484 } else {
1485 RAMBlock *block;
1486
1487 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1488 if (addr - block->offset < block->length) {
1489 if (addr - block->offset + *size > block->length)
1490 *size = block->length - addr + block->offset;
1491 return block->host + (addr - block->offset);
1492 }
1493 }
1494
1495 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1496 abort();
1497 }
1498 }
1499
1500 /* Some of the softmmu routines need to translate from a host pointer
1501 (typically a TLB entry) back to a ram offset. */
1502 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1503 {
1504 RAMBlock *block;
1505 uint8_t *host = ptr;
1506
1507 if (xen_enabled()) {
1508 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1509 return qemu_get_ram_block(*ram_addr)->mr;
1510 }
1511
1512 block = ram_list.mru_block;
1513 if (block && block->host && host - block->host < block->length) {
1514 goto found;
1515 }
1516
1517 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1518 /* This case append when the block is not mapped. */
1519 if (block->host == NULL) {
1520 continue;
1521 }
1522 if (host - block->host < block->length) {
1523 goto found;
1524 }
1525 }
1526
1527 return NULL;
1528
1529 found:
1530 *ram_addr = block->offset + (host - block->host);
1531 return block->mr;
1532 }
1533
1534 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1535 uint64_t val, unsigned size)
1536 {
1537 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1538 tb_invalidate_phys_page_fast(ram_addr, size);
1539 }
1540 switch (size) {
1541 case 1:
1542 stb_p(qemu_get_ram_ptr(ram_addr), val);
1543 break;
1544 case 2:
1545 stw_p(qemu_get_ram_ptr(ram_addr), val);
1546 break;
1547 case 4:
1548 stl_p(qemu_get_ram_ptr(ram_addr), val);
1549 break;
1550 default:
1551 abort();
1552 }
1553 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1554 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1555 /* we remove the notdirty callback only if the code has been
1556 flushed */
1557 if (!cpu_physical_memory_is_clean(ram_addr)) {
1558 CPUArchState *env = current_cpu->env_ptr;
1559 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1560 }
1561 }
1562
1563 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1564 unsigned size, bool is_write)
1565 {
1566 return is_write;
1567 }
1568
1569 static const MemoryRegionOps notdirty_mem_ops = {
1570 .write = notdirty_mem_write,
1571 .valid.accepts = notdirty_mem_accepts,
1572 .endianness = DEVICE_NATIVE_ENDIAN,
1573 };
1574
1575 /* Generate a debug exception if a watchpoint has been hit. */
1576 static void check_watchpoint(int offset, int len_mask, int flags)
1577 {
1578 CPUState *cpu = current_cpu;
1579 CPUArchState *env = cpu->env_ptr;
1580 target_ulong pc, cs_base;
1581 target_ulong vaddr;
1582 CPUWatchpoint *wp;
1583 int cpu_flags;
1584
1585 if (cpu->watchpoint_hit) {
1586 /* We re-entered the check after replacing the TB. Now raise
1587 * the debug interrupt so that is will trigger after the
1588 * current instruction. */
1589 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1590 return;
1591 }
1592 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1593 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1594 if ((vaddr == (wp->vaddr & len_mask) ||
1595 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1596 wp->flags |= BP_WATCHPOINT_HIT;
1597 if (!cpu->watchpoint_hit) {
1598 cpu->watchpoint_hit = wp;
1599 tb_check_watchpoint(cpu);
1600 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1601 cpu->exception_index = EXCP_DEBUG;
1602 cpu_loop_exit(cpu);
1603 } else {
1604 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1605 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1606 cpu_resume_from_signal(cpu, NULL);
1607 }
1608 }
1609 } else {
1610 wp->flags &= ~BP_WATCHPOINT_HIT;
1611 }
1612 }
1613 }
1614
1615 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1616 so these check for a hit then pass through to the normal out-of-line
1617 phys routines. */
1618 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1619 unsigned size)
1620 {
1621 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1622 switch (size) {
1623 case 1: return ldub_phys(&address_space_memory, addr);
1624 case 2: return lduw_phys(&address_space_memory, addr);
1625 case 4: return ldl_phys(&address_space_memory, addr);
1626 default: abort();
1627 }
1628 }
1629
1630 static void watch_mem_write(void *opaque, hwaddr addr,
1631 uint64_t val, unsigned size)
1632 {
1633 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1634 switch (size) {
1635 case 1:
1636 stb_phys(&address_space_memory, addr, val);
1637 break;
1638 case 2:
1639 stw_phys(&address_space_memory, addr, val);
1640 break;
1641 case 4:
1642 stl_phys(&address_space_memory, addr, val);
1643 break;
1644 default: abort();
1645 }
1646 }
1647
1648 static const MemoryRegionOps watch_mem_ops = {
1649 .read = watch_mem_read,
1650 .write = watch_mem_write,
1651 .endianness = DEVICE_NATIVE_ENDIAN,
1652 };
1653
1654 static uint64_t subpage_read(void *opaque, hwaddr addr,
1655 unsigned len)
1656 {
1657 subpage_t *subpage = opaque;
1658 uint8_t buf[4];
1659
1660 #if defined(DEBUG_SUBPAGE)
1661 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1662 subpage, len, addr);
1663 #endif
1664 address_space_read(subpage->as, addr + subpage->base, buf, len);
1665 switch (len) {
1666 case 1:
1667 return ldub_p(buf);
1668 case 2:
1669 return lduw_p(buf);
1670 case 4:
1671 return ldl_p(buf);
1672 default:
1673 abort();
1674 }
1675 }
1676
1677 static void subpage_write(void *opaque, hwaddr addr,
1678 uint64_t value, unsigned len)
1679 {
1680 subpage_t *subpage = opaque;
1681 uint8_t buf[4];
1682
1683 #if defined(DEBUG_SUBPAGE)
1684 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1685 " value %"PRIx64"\n",
1686 __func__, subpage, len, addr, value);
1687 #endif
1688 switch (len) {
1689 case 1:
1690 stb_p(buf, value);
1691 break;
1692 case 2:
1693 stw_p(buf, value);
1694 break;
1695 case 4:
1696 stl_p(buf, value);
1697 break;
1698 default:
1699 abort();
1700 }
1701 address_space_write(subpage->as, addr + subpage->base, buf, len);
1702 }
1703
1704 static bool subpage_accepts(void *opaque, hwaddr addr,
1705 unsigned len, bool is_write)
1706 {
1707 subpage_t *subpage = opaque;
1708 #if defined(DEBUG_SUBPAGE)
1709 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1710 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1711 #endif
1712
1713 return address_space_access_valid(subpage->as, addr + subpage->base,
1714 len, is_write);
1715 }
1716
1717 static const MemoryRegionOps subpage_ops = {
1718 .read = subpage_read,
1719 .write = subpage_write,
1720 .valid.accepts = subpage_accepts,
1721 .endianness = DEVICE_NATIVE_ENDIAN,
1722 };
1723
1724 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1725 uint16_t section)
1726 {
1727 int idx, eidx;
1728
1729 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1730 return -1;
1731 idx = SUBPAGE_IDX(start);
1732 eidx = SUBPAGE_IDX(end);
1733 #if defined(DEBUG_SUBPAGE)
1734 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1735 __func__, mmio, start, end, idx, eidx, section);
1736 #endif
1737 for (; idx <= eidx; idx++) {
1738 mmio->sub_section[idx] = section;
1739 }
1740
1741 return 0;
1742 }
1743
1744 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1745 {
1746 subpage_t *mmio;
1747
1748 mmio = g_malloc0(sizeof(subpage_t));
1749
1750 mmio->as = as;
1751 mmio->base = base;
1752 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1753 "subpage", TARGET_PAGE_SIZE);
1754 mmio->iomem.subpage = true;
1755 #if defined(DEBUG_SUBPAGE)
1756 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1757 mmio, base, TARGET_PAGE_SIZE);
1758 #endif
1759 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1760
1761 return mmio;
1762 }
1763
1764 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1765 {
1766 MemoryRegionSection section = {
1767 .address_space = &address_space_memory,
1768 .mr = mr,
1769 .offset_within_address_space = 0,
1770 .offset_within_region = 0,
1771 .size = int128_2_64(),
1772 };
1773
1774 return phys_section_add(map, &section);
1775 }
1776
1777 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1778 {
1779 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1780 }
1781
1782 static void io_mem_init(void)
1783 {
1784 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1785 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1786 "unassigned", UINT64_MAX);
1787 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1788 "notdirty", UINT64_MAX);
1789 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1790 "watch", UINT64_MAX);
1791 }
1792
1793 static void mem_begin(MemoryListener *listener)
1794 {
1795 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1796 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1797 uint16_t n;
1798
1799 n = dummy_section(&d->map, &io_mem_unassigned);
1800 assert(n == PHYS_SECTION_UNASSIGNED);
1801 n = dummy_section(&d->map, &io_mem_notdirty);
1802 assert(n == PHYS_SECTION_NOTDIRTY);
1803 n = dummy_section(&d->map, &io_mem_rom);
1804 assert(n == PHYS_SECTION_ROM);
1805 n = dummy_section(&d->map, &io_mem_watch);
1806 assert(n == PHYS_SECTION_WATCH);
1807
1808 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1809 d->as = as;
1810 as->next_dispatch = d;
1811 }
1812
1813 static void mem_commit(MemoryListener *listener)
1814 {
1815 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1816 AddressSpaceDispatch *cur = as->dispatch;
1817 AddressSpaceDispatch *next = as->next_dispatch;
1818
1819 phys_page_compact_all(next, next->map.nodes_nb);
1820
1821 as->dispatch = next;
1822
1823 if (cur) {
1824 phys_sections_free(&cur->map);
1825 g_free(cur);
1826 }
1827 }
1828
1829 static void tcg_commit(MemoryListener *listener)
1830 {
1831 CPUState *cpu;
1832
1833 /* since each CPU stores ram addresses in its TLB cache, we must
1834 reset the modified entries */
1835 /* XXX: slow ! */
1836 CPU_FOREACH(cpu) {
1837 /* FIXME: Disentangle the cpu.h circular files deps so we can
1838 directly get the right CPU from listener. */
1839 if (cpu->tcg_as_listener != listener) {
1840 continue;
1841 }
1842 tlb_flush(cpu, 1);
1843 }
1844 }
1845
1846 static void core_log_global_start(MemoryListener *listener)
1847 {
1848 cpu_physical_memory_set_dirty_tracking(true);
1849 }
1850
1851 static void core_log_global_stop(MemoryListener *listener)
1852 {
1853 cpu_physical_memory_set_dirty_tracking(false);
1854 }
1855
1856 static MemoryListener core_memory_listener = {
1857 .log_global_start = core_log_global_start,
1858 .log_global_stop = core_log_global_stop,
1859 .priority = 1,
1860 };
1861
1862 void address_space_init_dispatch(AddressSpace *as)
1863 {
1864 as->dispatch = NULL;
1865 as->dispatch_listener = (MemoryListener) {
1866 .begin = mem_begin,
1867 .commit = mem_commit,
1868 .region_add = mem_add,
1869 .region_nop = mem_add,
1870 .priority = 0,
1871 };
1872 memory_listener_register(&as->dispatch_listener, as);
1873 }
1874
1875 void address_space_destroy_dispatch(AddressSpace *as)
1876 {
1877 AddressSpaceDispatch *d = as->dispatch;
1878
1879 memory_listener_unregister(&as->dispatch_listener);
1880 g_free(d);
1881 as->dispatch = NULL;
1882 }
1883
1884 static void memory_map_init(void)
1885 {
1886 system_memory = g_malloc(sizeof(*system_memory));
1887
1888 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1889 address_space_init(&address_space_memory, system_memory, "memory");
1890
1891 system_io = g_malloc(sizeof(*system_io));
1892 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1893 65536);
1894 address_space_init(&address_space_io, system_io, "I/O");
1895
1896 memory_listener_register(&core_memory_listener, &address_space_memory);
1897 }
1898
1899 MemoryRegion *get_system_memory(void)
1900 {
1901 return system_memory;
1902 }
1903
1904 MemoryRegion *get_system_io(void)
1905 {
1906 return system_io;
1907 }
1908
1909 #endif /* !defined(CONFIG_USER_ONLY) */
1910
1911 /* physical memory access (slow version, mainly for debug) */
1912 #if defined(CONFIG_USER_ONLY)
1913 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1914 uint8_t *buf, int len, int is_write)
1915 {
1916 int l, flags;
1917 target_ulong page;
1918 void * p;
1919
1920 while (len > 0) {
1921 page = addr & TARGET_PAGE_MASK;
1922 l = (page + TARGET_PAGE_SIZE) - addr;
1923 if (l > len)
1924 l = len;
1925 flags = page_get_flags(page);
1926 if (!(flags & PAGE_VALID))
1927 return -1;
1928 if (is_write) {
1929 if (!(flags & PAGE_WRITE))
1930 return -1;
1931 /* XXX: this code should not depend on lock_user */
1932 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1933 return -1;
1934 memcpy(p, buf, l);
1935 unlock_user(p, addr, l);
1936 } else {
1937 if (!(flags & PAGE_READ))
1938 return -1;
1939 /* XXX: this code should not depend on lock_user */
1940 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1941 return -1;
1942 memcpy(buf, p, l);
1943 unlock_user(p, addr, 0);
1944 }
1945 len -= l;
1946 buf += l;
1947 addr += l;
1948 }
1949 return 0;
1950 }
1951
1952 #else
1953
1954 static void invalidate_and_set_dirty(hwaddr addr,
1955 hwaddr length)
1956 {
1957 if (cpu_physical_memory_is_clean(addr)) {
1958 /* invalidate code */
1959 tb_invalidate_phys_page_range(addr, addr + length, 0);
1960 /* set dirty bit */
1961 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1962 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1963 }
1964 xen_modified_memory(addr, length);
1965 }
1966
1967 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1968 {
1969 unsigned access_size_max = mr->ops->valid.max_access_size;
1970
1971 /* Regions are assumed to support 1-4 byte accesses unless
1972 otherwise specified. */
1973 if (access_size_max == 0) {
1974 access_size_max = 4;
1975 }
1976
1977 /* Bound the maximum access by the alignment of the address. */
1978 if (!mr->ops->impl.unaligned) {
1979 unsigned align_size_max = addr & -addr;
1980 if (align_size_max != 0 && align_size_max < access_size_max) {
1981 access_size_max = align_size_max;
1982 }
1983 }
1984
1985 /* Don't attempt accesses larger than the maximum. */
1986 if (l > access_size_max) {
1987 l = access_size_max;
1988 }
1989 if (l & (l - 1)) {
1990 l = 1 << (qemu_fls(l) - 1);
1991 }
1992
1993 return l;
1994 }
1995
1996 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1997 int len, bool is_write)
1998 {
1999 hwaddr l;
2000 uint8_t *ptr;
2001 uint64_t val;
2002 hwaddr addr1;
2003 MemoryRegion *mr;
2004 bool error = false;
2005
2006 while (len > 0) {
2007 l = len;
2008 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2009
2010 if (is_write) {
2011 if (!memory_access_is_direct(mr, is_write)) {
2012 l = memory_access_size(mr, l, addr1);
2013 /* XXX: could force current_cpu to NULL to avoid
2014 potential bugs */
2015 switch (l) {
2016 case 8:
2017 /* 64 bit write access */
2018 val = ldq_p(buf);
2019 error |= io_mem_write(mr, addr1, val, 8);
2020 break;
2021 case 4:
2022 /* 32 bit write access */
2023 val = ldl_p(buf);
2024 error |= io_mem_write(mr, addr1, val, 4);
2025 break;
2026 case 2:
2027 /* 16 bit write access */
2028 val = lduw_p(buf);
2029 error |= io_mem_write(mr, addr1, val, 2);
2030 break;
2031 case 1:
2032 /* 8 bit write access */
2033 val = ldub_p(buf);
2034 error |= io_mem_write(mr, addr1, val, 1);
2035 break;
2036 default:
2037 abort();
2038 }
2039 } else {
2040 addr1 += memory_region_get_ram_addr(mr);
2041 /* RAM case */
2042 ptr = qemu_get_ram_ptr(addr1);
2043 memcpy(ptr, buf, l);
2044 invalidate_and_set_dirty(addr1, l);
2045 }
2046 } else {
2047 if (!memory_access_is_direct(mr, is_write)) {
2048 /* I/O case */
2049 l = memory_access_size(mr, l, addr1);
2050 switch (l) {
2051 case 8:
2052 /* 64 bit read access */
2053 error |= io_mem_read(mr, addr1, &val, 8);
2054 stq_p(buf, val);
2055 break;
2056 case 4:
2057 /* 32 bit read access */
2058 error |= io_mem_read(mr, addr1, &val, 4);
2059 stl_p(buf, val);
2060 break;
2061 case 2:
2062 /* 16 bit read access */
2063 error |= io_mem_read(mr, addr1, &val, 2);
2064 stw_p(buf, val);
2065 break;
2066 case 1:
2067 /* 8 bit read access */
2068 error |= io_mem_read(mr, addr1, &val, 1);
2069 stb_p(buf, val);
2070 break;
2071 default:
2072 abort();
2073 }
2074 } else {
2075 /* RAM case */
2076 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2077 memcpy(buf, ptr, l);
2078 }
2079 }
2080 len -= l;
2081 buf += l;
2082 addr += l;
2083 }
2084
2085 return error;
2086 }
2087
2088 bool address_space_write(AddressSpace *as, hwaddr addr,
2089 const uint8_t *buf, int len)
2090 {
2091 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2092 }
2093
2094 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2095 {
2096 return address_space_rw(as, addr, buf, len, false);
2097 }
2098
2099
2100 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2101 int len, int is_write)
2102 {
2103 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2104 }
2105
2106 enum write_rom_type {
2107 WRITE_DATA,
2108 FLUSH_CACHE,
2109 };
2110
2111 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2112 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2113 {
2114 hwaddr l;
2115 uint8_t *ptr;
2116 hwaddr addr1;
2117 MemoryRegion *mr;
2118
2119 while (len > 0) {
2120 l = len;
2121 mr = address_space_translate(as, addr, &addr1, &l, true);
2122
2123 if (!(memory_region_is_ram(mr) ||
2124 memory_region_is_romd(mr))) {
2125 /* do nothing */
2126 } else {
2127 addr1 += memory_region_get_ram_addr(mr);
2128 /* ROM/RAM case */
2129 ptr = qemu_get_ram_ptr(addr1);
2130 switch (type) {
2131 case WRITE_DATA:
2132 memcpy(ptr, buf, l);
2133 invalidate_and_set_dirty(addr1, l);
2134 break;
2135 case FLUSH_CACHE:
2136 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2137 break;
2138 }
2139 }
2140 len -= l;
2141 buf += l;
2142 addr += l;
2143 }
2144 }
2145
2146 /* used for ROM loading : can write in RAM and ROM */
2147 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2148 const uint8_t *buf, int len)
2149 {
2150 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2151 }
2152
2153 void cpu_flush_icache_range(hwaddr start, int len)
2154 {
2155 /*
2156 * This function should do the same thing as an icache flush that was
2157 * triggered from within the guest. For TCG we are always cache coherent,
2158 * so there is no need to flush anything. For KVM / Xen we need to flush
2159 * the host's instruction cache at least.
2160 */
2161 if (tcg_enabled()) {
2162 return;
2163 }
2164
2165 cpu_physical_memory_write_rom_internal(&address_space_memory,
2166 start, NULL, len, FLUSH_CACHE);
2167 }
2168
2169 typedef struct {
2170 MemoryRegion *mr;
2171 void *buffer;
2172 hwaddr addr;
2173 hwaddr len;
2174 } BounceBuffer;
2175
2176 static BounceBuffer bounce;
2177
2178 typedef struct MapClient {
2179 void *opaque;
2180 void (*callback)(void *opaque);
2181 QLIST_ENTRY(MapClient) link;
2182 } MapClient;
2183
2184 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2185 = QLIST_HEAD_INITIALIZER(map_client_list);
2186
2187 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2188 {
2189 MapClient *client = g_malloc(sizeof(*client));
2190
2191 client->opaque = opaque;
2192 client->callback = callback;
2193 QLIST_INSERT_HEAD(&map_client_list, client, link);
2194 return client;
2195 }
2196
2197 static void cpu_unregister_map_client(void *_client)
2198 {
2199 MapClient *client = (MapClient *)_client;
2200
2201 QLIST_REMOVE(client, link);
2202 g_free(client);
2203 }
2204
2205 static void cpu_notify_map_clients(void)
2206 {
2207 MapClient *client;
2208
2209 while (!QLIST_EMPTY(&map_client_list)) {
2210 client = QLIST_FIRST(&map_client_list);
2211 client->callback(client->opaque);
2212 cpu_unregister_map_client(client);
2213 }
2214 }
2215
2216 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2217 {
2218 MemoryRegion *mr;
2219 hwaddr l, xlat;
2220
2221 while (len > 0) {
2222 l = len;
2223 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2224 if (!memory_access_is_direct(mr, is_write)) {
2225 l = memory_access_size(mr, l, addr);
2226 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2227 return false;
2228 }
2229 }
2230
2231 len -= l;
2232 addr += l;
2233 }
2234 return true;
2235 }
2236
2237 /* Map a physical memory region into a host virtual address.
2238 * May map a subset of the requested range, given by and returned in *plen.
2239 * May return NULL if resources needed to perform the mapping are exhausted.
2240 * Use only for reads OR writes - not for read-modify-write operations.
2241 * Use cpu_register_map_client() to know when retrying the map operation is
2242 * likely to succeed.
2243 */
2244 void *address_space_map(AddressSpace *as,
2245 hwaddr addr,
2246 hwaddr *plen,
2247 bool is_write)
2248 {
2249 hwaddr len = *plen;
2250 hwaddr done = 0;
2251 hwaddr l, xlat, base;
2252 MemoryRegion *mr, *this_mr;
2253 ram_addr_t raddr;
2254
2255 if (len == 0) {
2256 return NULL;
2257 }
2258
2259 l = len;
2260 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2261 if (!memory_access_is_direct(mr, is_write)) {
2262 if (bounce.buffer) {
2263 return NULL;
2264 }
2265 /* Avoid unbounded allocations */
2266 l = MIN(l, TARGET_PAGE_SIZE);
2267 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2268 bounce.addr = addr;
2269 bounce.len = l;
2270
2271 memory_region_ref(mr);
2272 bounce.mr = mr;
2273 if (!is_write) {
2274 address_space_read(as, addr, bounce.buffer, l);
2275 }
2276
2277 *plen = l;
2278 return bounce.buffer;
2279 }
2280
2281 base = xlat;
2282 raddr = memory_region_get_ram_addr(mr);
2283
2284 for (;;) {
2285 len -= l;
2286 addr += l;
2287 done += l;
2288 if (len == 0) {
2289 break;
2290 }
2291
2292 l = len;
2293 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2294 if (this_mr != mr || xlat != base + done) {
2295 break;
2296 }
2297 }
2298
2299 memory_region_ref(mr);
2300 *plen = done;
2301 return qemu_ram_ptr_length(raddr + base, plen);
2302 }
2303
2304 /* Unmaps a memory region previously mapped by address_space_map().
2305 * Will also mark the memory as dirty if is_write == 1. access_len gives
2306 * the amount of memory that was actually read or written by the caller.
2307 */
2308 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2309 int is_write, hwaddr access_len)
2310 {
2311 if (buffer != bounce.buffer) {
2312 MemoryRegion *mr;
2313 ram_addr_t addr1;
2314
2315 mr = qemu_ram_addr_from_host(buffer, &addr1);
2316 assert(mr != NULL);
2317 if (is_write) {
2318 while (access_len) {
2319 unsigned l;
2320 l = TARGET_PAGE_SIZE;
2321 if (l > access_len)
2322 l = access_len;
2323 invalidate_and_set_dirty(addr1, l);
2324 addr1 += l;
2325 access_len -= l;
2326 }
2327 }
2328 if (xen_enabled()) {
2329 xen_invalidate_map_cache_entry(buffer);
2330 }
2331 memory_region_unref(mr);
2332 return;
2333 }
2334 if (is_write) {
2335 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2336 }
2337 qemu_vfree(bounce.buffer);
2338 bounce.buffer = NULL;
2339 memory_region_unref(bounce.mr);
2340 cpu_notify_map_clients();
2341 }
2342
2343 void *cpu_physical_memory_map(hwaddr addr,
2344 hwaddr *plen,
2345 int is_write)
2346 {
2347 return address_space_map(&address_space_memory, addr, plen, is_write);
2348 }
2349
2350 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2351 int is_write, hwaddr access_len)
2352 {
2353 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2354 }
2355
2356 /* warning: addr must be aligned */
2357 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2358 enum device_endian endian)
2359 {
2360 uint8_t *ptr;
2361 uint64_t val;
2362 MemoryRegion *mr;
2363 hwaddr l = 4;
2364 hwaddr addr1;
2365
2366 mr = address_space_translate(as, addr, &addr1, &l, false);
2367 if (l < 4 || !memory_access_is_direct(mr, false)) {
2368 /* I/O case */
2369 io_mem_read(mr, addr1, &val, 4);
2370 #if defined(TARGET_WORDS_BIGENDIAN)
2371 if (endian == DEVICE_LITTLE_ENDIAN) {
2372 val = bswap32(val);
2373 }
2374 #else
2375 if (endian == DEVICE_BIG_ENDIAN) {
2376 val = bswap32(val);
2377 }
2378 #endif
2379 } else {
2380 /* RAM case */
2381 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2382 & TARGET_PAGE_MASK)
2383 + addr1);
2384 switch (endian) {
2385 case DEVICE_LITTLE_ENDIAN:
2386 val = ldl_le_p(ptr);
2387 break;
2388 case DEVICE_BIG_ENDIAN:
2389 val = ldl_be_p(ptr);
2390 break;
2391 default:
2392 val = ldl_p(ptr);
2393 break;
2394 }
2395 }
2396 return val;
2397 }
2398
2399 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2400 {
2401 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2402 }
2403
2404 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2405 {
2406 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2407 }
2408
2409 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2410 {
2411 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2412 }
2413
2414 /* warning: addr must be aligned */
2415 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2416 enum device_endian endian)
2417 {
2418 uint8_t *ptr;
2419 uint64_t val;
2420 MemoryRegion *mr;
2421 hwaddr l = 8;
2422 hwaddr addr1;
2423
2424 mr = address_space_translate(as, addr, &addr1, &l,
2425 false);
2426 if (l < 8 || !memory_access_is_direct(mr, false)) {
2427 /* I/O case */
2428 io_mem_read(mr, addr1, &val, 8);
2429 #if defined(TARGET_WORDS_BIGENDIAN)
2430 if (endian == DEVICE_LITTLE_ENDIAN) {
2431 val = bswap64(val);
2432 }
2433 #else
2434 if (endian == DEVICE_BIG_ENDIAN) {
2435 val = bswap64(val);
2436 }
2437 #endif
2438 } else {
2439 /* RAM case */
2440 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2441 & TARGET_PAGE_MASK)
2442 + addr1);
2443 switch (endian) {
2444 case DEVICE_LITTLE_ENDIAN:
2445 val = ldq_le_p(ptr);
2446 break;
2447 case DEVICE_BIG_ENDIAN:
2448 val = ldq_be_p(ptr);
2449 break;
2450 default:
2451 val = ldq_p(ptr);
2452 break;
2453 }
2454 }
2455 return val;
2456 }
2457
2458 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2459 {
2460 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2461 }
2462
2463 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2464 {
2465 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2466 }
2467
2468 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2469 {
2470 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2471 }
2472
2473 /* XXX: optimize */
2474 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2475 {
2476 uint8_t val;
2477 address_space_rw(as, addr, &val, 1, 0);
2478 return val;
2479 }
2480
2481 /* warning: addr must be aligned */
2482 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2483 enum device_endian endian)
2484 {
2485 uint8_t *ptr;
2486 uint64_t val;
2487 MemoryRegion *mr;
2488 hwaddr l = 2;
2489 hwaddr addr1;
2490
2491 mr = address_space_translate(as, addr, &addr1, &l,
2492 false);
2493 if (l < 2 || !memory_access_is_direct(mr, false)) {
2494 /* I/O case */
2495 io_mem_read(mr, addr1, &val, 2);
2496 #if defined(TARGET_WORDS_BIGENDIAN)
2497 if (endian == DEVICE_LITTLE_ENDIAN) {
2498 val = bswap16(val);
2499 }
2500 #else
2501 if (endian == DEVICE_BIG_ENDIAN) {
2502 val = bswap16(val);
2503 }
2504 #endif
2505 } else {
2506 /* RAM case */
2507 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2508 & TARGET_PAGE_MASK)
2509 + addr1);
2510 switch (endian) {
2511 case DEVICE_LITTLE_ENDIAN:
2512 val = lduw_le_p(ptr);
2513 break;
2514 case DEVICE_BIG_ENDIAN:
2515 val = lduw_be_p(ptr);
2516 break;
2517 default:
2518 val = lduw_p(ptr);
2519 break;
2520 }
2521 }
2522 return val;
2523 }
2524
2525 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2526 {
2527 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2528 }
2529
2530 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2531 {
2532 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2533 }
2534
2535 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2536 {
2537 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2538 }
2539
2540 /* warning: addr must be aligned. The ram page is not masked as dirty
2541 and the code inside is not invalidated. It is useful if the dirty
2542 bits are used to track modified PTEs */
2543 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2544 {
2545 uint8_t *ptr;
2546 MemoryRegion *mr;
2547 hwaddr l = 4;
2548 hwaddr addr1;
2549
2550 mr = address_space_translate(as, addr, &addr1, &l,
2551 true);
2552 if (l < 4 || !memory_access_is_direct(mr, true)) {
2553 io_mem_write(mr, addr1, val, 4);
2554 } else {
2555 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2556 ptr = qemu_get_ram_ptr(addr1);
2557 stl_p(ptr, val);
2558
2559 if (unlikely(in_migration)) {
2560 if (cpu_physical_memory_is_clean(addr1)) {
2561 /* invalidate code */
2562 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2563 /* set dirty bit */
2564 cpu_physical_memory_set_dirty_flag(addr1,
2565 DIRTY_MEMORY_MIGRATION);
2566 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2567 }
2568 }
2569 }
2570 }
2571
2572 /* warning: addr must be aligned */
2573 static inline void stl_phys_internal(AddressSpace *as,
2574 hwaddr addr, uint32_t val,
2575 enum device_endian endian)
2576 {
2577 uint8_t *ptr;
2578 MemoryRegion *mr;
2579 hwaddr l = 4;
2580 hwaddr addr1;
2581
2582 mr = address_space_translate(as, addr, &addr1, &l,
2583 true);
2584 if (l < 4 || !memory_access_is_direct(mr, true)) {
2585 #if defined(TARGET_WORDS_BIGENDIAN)
2586 if (endian == DEVICE_LITTLE_ENDIAN) {
2587 val = bswap32(val);
2588 }
2589 #else
2590 if (endian == DEVICE_BIG_ENDIAN) {
2591 val = bswap32(val);
2592 }
2593 #endif
2594 io_mem_write(mr, addr1, val, 4);
2595 } else {
2596 /* RAM case */
2597 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2598 ptr = qemu_get_ram_ptr(addr1);
2599 switch (endian) {
2600 case DEVICE_LITTLE_ENDIAN:
2601 stl_le_p(ptr, val);
2602 break;
2603 case DEVICE_BIG_ENDIAN:
2604 stl_be_p(ptr, val);
2605 break;
2606 default:
2607 stl_p(ptr, val);
2608 break;
2609 }
2610 invalidate_and_set_dirty(addr1, 4);
2611 }
2612 }
2613
2614 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2615 {
2616 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2617 }
2618
2619 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2620 {
2621 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2622 }
2623
2624 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2625 {
2626 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2627 }
2628
2629 /* XXX: optimize */
2630 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2631 {
2632 uint8_t v = val;
2633 address_space_rw(as, addr, &v, 1, 1);
2634 }
2635
2636 /* warning: addr must be aligned */
2637 static inline void stw_phys_internal(AddressSpace *as,
2638 hwaddr addr, uint32_t val,
2639 enum device_endian endian)
2640 {
2641 uint8_t *ptr;
2642 MemoryRegion *mr;
2643 hwaddr l = 2;
2644 hwaddr addr1;
2645
2646 mr = address_space_translate(as, addr, &addr1, &l, true);
2647 if (l < 2 || !memory_access_is_direct(mr, true)) {
2648 #if defined(TARGET_WORDS_BIGENDIAN)
2649 if (endian == DEVICE_LITTLE_ENDIAN) {
2650 val = bswap16(val);
2651 }
2652 #else
2653 if (endian == DEVICE_BIG_ENDIAN) {
2654 val = bswap16(val);
2655 }
2656 #endif
2657 io_mem_write(mr, addr1, val, 2);
2658 } else {
2659 /* RAM case */
2660 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2661 ptr = qemu_get_ram_ptr(addr1);
2662 switch (endian) {
2663 case DEVICE_LITTLE_ENDIAN:
2664 stw_le_p(ptr, val);
2665 break;
2666 case DEVICE_BIG_ENDIAN:
2667 stw_be_p(ptr, val);
2668 break;
2669 default:
2670 stw_p(ptr, val);
2671 break;
2672 }
2673 invalidate_and_set_dirty(addr1, 2);
2674 }
2675 }
2676
2677 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2678 {
2679 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2680 }
2681
2682 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2683 {
2684 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2685 }
2686
2687 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2688 {
2689 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2690 }
2691
2692 /* XXX: optimize */
2693 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2694 {
2695 val = tswap64(val);
2696 address_space_rw(as, addr, (void *) &val, 8, 1);
2697 }
2698
2699 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2700 {
2701 val = cpu_to_le64(val);
2702 address_space_rw(as, addr, (void *) &val, 8, 1);
2703 }
2704
2705 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2706 {
2707 val = cpu_to_be64(val);
2708 address_space_rw(as, addr, (void *) &val, 8, 1);
2709 }
2710
2711 /* virtual memory access for debug (includes writing to ROM) */
2712 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2713 uint8_t *buf, int len, int is_write)
2714 {
2715 int l;
2716 hwaddr phys_addr;
2717 target_ulong page;
2718
2719 while (len > 0) {
2720 page = addr & TARGET_PAGE_MASK;
2721 phys_addr = cpu_get_phys_page_debug(cpu, page);
2722 /* if no physical page mapped, return an error */
2723 if (phys_addr == -1)
2724 return -1;
2725 l = (page + TARGET_PAGE_SIZE) - addr;
2726 if (l > len)
2727 l = len;
2728 phys_addr += (addr & ~TARGET_PAGE_MASK);
2729 if (is_write) {
2730 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2731 } else {
2732 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2733 }
2734 len -= l;
2735 buf += l;
2736 addr += l;
2737 }
2738 return 0;
2739 }
2740 #endif
2741
2742 #if !defined(CONFIG_USER_ONLY)
2743
2744 /*
2745 * A helper function for the _utterly broken_ virtio device model to find out if
2746 * it's running on a big endian machine. Don't do this at home kids!
2747 */
2748 bool virtio_is_big_endian(void);
2749 bool virtio_is_big_endian(void)
2750 {
2751 #if defined(TARGET_WORDS_BIGENDIAN)
2752 return true;
2753 #else
2754 return false;
2755 #endif
2756 }
2757
2758 #endif
2759
2760 #ifndef CONFIG_USER_ONLY
2761 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2762 {
2763 MemoryRegion*mr;
2764 hwaddr l = 1;
2765
2766 mr = address_space_translate(&address_space_memory,
2767 phys_addr, &phys_addr, &l, false);
2768
2769 return !(memory_region_is_ram(mr) ||
2770 memory_region_is_romd(mr));
2771 }
2772
2773 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2774 {
2775 RAMBlock *block;
2776
2777 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2778 func(block->host, block->offset, block->length, opaque);
2779 }
2780 }
2781 #endif