]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
NUMA: Add numa_info structure to contain numa nodes info
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 #endif
74
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
83
84 #if !defined(CONFIG_USER_ONLY)
85
86 typedef struct PhysPageEntry PhysPageEntry;
87
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
93 };
94
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
99
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
102
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104
105 typedef PhysPageEntry Node[P_L2_SIZE];
106
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
115
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
119 */
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
123 };
124
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
132
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
137
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
141
142 static MemoryRegion io_mem_watch;
143 #endif
144
145 #if !defined(CONFIG_USER_ONLY)
146
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148 {
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153 }
154 }
155
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
157 {
158 unsigned i;
159 uint32_t ret;
160
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167 }
168 return ret;
169 }
170
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
174 {
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
186 }
187 }
188 } else {
189 p = map->nodes[lp->ptr];
190 }
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 }
202 ++lp;
203 }
204 }
205
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
209 {
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
214 }
215
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
218 */
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
220 {
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
225
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
228 }
229
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
234 }
235
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
240 }
241 }
242
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
246 }
247
248 assert(valid_ptr < P_L2_SIZE);
249
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
253 }
254
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
262 */
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
266 }
267 }
268
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
270 {
271 DECLARE_BITMAP(compacted, nodes_nb);
272
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 }
276 }
277
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
280 {
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
284
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
288 }
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291 }
292
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
299 }
300 }
301
302 bool memory_region_is_unassigned(MemoryRegion *mr)
303 {
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
306 }
307
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
311 {
312 MemoryRegionSection *section;
313 subpage_t *subpage;
314
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 }
320 return section;
321 }
322
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
326 {
327 MemoryRegionSection *section;
328 Int128 diff;
329
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
333
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
336
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
340 }
341
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
343 {
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
346 }
347 if (memory_region_is_romd(mr)) {
348 return !is_write;
349 }
350
351 return false;
352 }
353
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
356 bool is_write)
357 {
358 IOMMUTLBEntry iotlb;
359 MemoryRegionSection *section;
360 MemoryRegion *mr;
361 hwaddr len = *plen;
362
363 for (;;) {
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
365 mr = section->mr;
366
367 if (!mr->iommu_ops) {
368 break;
369 }
370
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
377 break;
378 }
379
380 as = iotlb.target_as;
381 }
382
383 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
386 }
387
388 *plen = len;
389 *xlat = addr;
390 return mr;
391 }
392
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
395 hwaddr *plen)
396 {
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
399
400 assert(!section->mr->iommu_ops);
401 return section;
402 }
403 #endif
404
405 void cpu_exec_init_all(void)
406 {
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
409 memory_map_init();
410 io_mem_init();
411 #endif
412 }
413
414 #if !defined(CONFIG_USER_ONLY)
415
416 static int cpu_common_post_load(void *opaque, int version_id)
417 {
418 CPUState *cpu = opaque;
419
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu, 1);
424
425 return 0;
426 }
427
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
430 .version_id = 1,
431 .minimum_version_id = 1,
432 .post_load = cpu_common_post_load,
433 .fields = (VMStateField[]) {
434 VMSTATE_UINT32(halted, CPUState),
435 VMSTATE_UINT32(interrupt_request, CPUState),
436 VMSTATE_END_OF_LIST()
437 }
438 };
439
440 #endif
441
442 CPUState *qemu_get_cpu(int index)
443 {
444 CPUState *cpu;
445
446 CPU_FOREACH(cpu) {
447 if (cpu->cpu_index == index) {
448 return cpu;
449 }
450 }
451
452 return NULL;
453 }
454
455 #if !defined(CONFIG_USER_ONLY)
456 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
457 {
458 /* We only support one address space per cpu at the moment. */
459 assert(cpu->as == as);
460
461 if (cpu->tcg_as_listener) {
462 memory_listener_unregister(cpu->tcg_as_listener);
463 } else {
464 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
465 }
466 cpu->tcg_as_listener->commit = tcg_commit;
467 memory_listener_register(cpu->tcg_as_listener, as);
468 }
469 #endif
470
471 void cpu_exec_init(CPUArchState *env)
472 {
473 CPUState *cpu = ENV_GET_CPU(env);
474 CPUClass *cc = CPU_GET_CLASS(cpu);
475 CPUState *some_cpu;
476 int cpu_index;
477
478 #if defined(CONFIG_USER_ONLY)
479 cpu_list_lock();
480 #endif
481 cpu_index = 0;
482 CPU_FOREACH(some_cpu) {
483 cpu_index++;
484 }
485 cpu->cpu_index = cpu_index;
486 cpu->numa_node = 0;
487 QTAILQ_INIT(&cpu->breakpoints);
488 QTAILQ_INIT(&cpu->watchpoints);
489 #ifndef CONFIG_USER_ONLY
490 cpu->as = &address_space_memory;
491 cpu->thread_id = qemu_get_thread_id();
492 #endif
493 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
494 #if defined(CONFIG_USER_ONLY)
495 cpu_list_unlock();
496 #endif
497 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
498 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
499 }
500 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
501 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
502 cpu_save, cpu_load, env);
503 assert(cc->vmsd == NULL);
504 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
505 #endif
506 if (cc->vmsd != NULL) {
507 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
508 }
509 }
510
511 #if defined(TARGET_HAS_ICE)
512 #if defined(CONFIG_USER_ONLY)
513 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
514 {
515 tb_invalidate_phys_page_range(pc, pc + 1, 0);
516 }
517 #else
518 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
519 {
520 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
521 if (phys != -1) {
522 tb_invalidate_phys_addr(cpu->as,
523 phys | (pc & ~TARGET_PAGE_MASK));
524 }
525 }
526 #endif
527 #endif /* TARGET_HAS_ICE */
528
529 #if defined(CONFIG_USER_ONLY)
530 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
531
532 {
533 }
534
535 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
536 int flags, CPUWatchpoint **watchpoint)
537 {
538 return -ENOSYS;
539 }
540 #else
541 /* Add a watchpoint. */
542 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
543 int flags, CPUWatchpoint **watchpoint)
544 {
545 vaddr len_mask = ~(len - 1);
546 CPUWatchpoint *wp;
547
548 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
549 if ((len & (len - 1)) || (addr & ~len_mask) ||
550 len == 0 || len > TARGET_PAGE_SIZE) {
551 error_report("tried to set invalid watchpoint at %"
552 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
553 return -EINVAL;
554 }
555 wp = g_malloc(sizeof(*wp));
556
557 wp->vaddr = addr;
558 wp->len_mask = len_mask;
559 wp->flags = flags;
560
561 /* keep all GDB-injected watchpoints in front */
562 if (flags & BP_GDB) {
563 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
564 } else {
565 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
566 }
567
568 tlb_flush_page(cpu, addr);
569
570 if (watchpoint)
571 *watchpoint = wp;
572 return 0;
573 }
574
575 /* Remove a specific watchpoint. */
576 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
577 int flags)
578 {
579 vaddr len_mask = ~(len - 1);
580 CPUWatchpoint *wp;
581
582 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
583 if (addr == wp->vaddr && len_mask == wp->len_mask
584 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
585 cpu_watchpoint_remove_by_ref(cpu, wp);
586 return 0;
587 }
588 }
589 return -ENOENT;
590 }
591
592 /* Remove a specific watchpoint by reference. */
593 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
594 {
595 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
596
597 tlb_flush_page(cpu, watchpoint->vaddr);
598
599 g_free(watchpoint);
600 }
601
602 /* Remove all matching watchpoints. */
603 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
604 {
605 CPUWatchpoint *wp, *next;
606
607 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
608 if (wp->flags & mask) {
609 cpu_watchpoint_remove_by_ref(cpu, wp);
610 }
611 }
612 }
613 #endif
614
615 /* Add a breakpoint. */
616 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
617 CPUBreakpoint **breakpoint)
618 {
619 #if defined(TARGET_HAS_ICE)
620 CPUBreakpoint *bp;
621
622 bp = g_malloc(sizeof(*bp));
623
624 bp->pc = pc;
625 bp->flags = flags;
626
627 /* keep all GDB-injected breakpoints in front */
628 if (flags & BP_GDB) {
629 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
630 } else {
631 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
632 }
633
634 breakpoint_invalidate(cpu, pc);
635
636 if (breakpoint) {
637 *breakpoint = bp;
638 }
639 return 0;
640 #else
641 return -ENOSYS;
642 #endif
643 }
644
645 /* Remove a specific breakpoint. */
646 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
647 {
648 #if defined(TARGET_HAS_ICE)
649 CPUBreakpoint *bp;
650
651 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
652 if (bp->pc == pc && bp->flags == flags) {
653 cpu_breakpoint_remove_by_ref(cpu, bp);
654 return 0;
655 }
656 }
657 return -ENOENT;
658 #else
659 return -ENOSYS;
660 #endif
661 }
662
663 /* Remove a specific breakpoint by reference. */
664 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
665 {
666 #if defined(TARGET_HAS_ICE)
667 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
668
669 breakpoint_invalidate(cpu, breakpoint->pc);
670
671 g_free(breakpoint);
672 #endif
673 }
674
675 /* Remove all matching breakpoints. */
676 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
677 {
678 #if defined(TARGET_HAS_ICE)
679 CPUBreakpoint *bp, *next;
680
681 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
682 if (bp->flags & mask) {
683 cpu_breakpoint_remove_by_ref(cpu, bp);
684 }
685 }
686 #endif
687 }
688
689 /* enable or disable single step mode. EXCP_DEBUG is returned by the
690 CPU loop after each instruction */
691 void cpu_single_step(CPUState *cpu, int enabled)
692 {
693 #if defined(TARGET_HAS_ICE)
694 if (cpu->singlestep_enabled != enabled) {
695 cpu->singlestep_enabled = enabled;
696 if (kvm_enabled()) {
697 kvm_update_guest_debug(cpu, 0);
698 } else {
699 /* must flush all the translated code to avoid inconsistencies */
700 /* XXX: only flush what is necessary */
701 CPUArchState *env = cpu->env_ptr;
702 tb_flush(env);
703 }
704 }
705 #endif
706 }
707
708 void cpu_abort(CPUState *cpu, const char *fmt, ...)
709 {
710 va_list ap;
711 va_list ap2;
712
713 va_start(ap, fmt);
714 va_copy(ap2, ap);
715 fprintf(stderr, "qemu: fatal: ");
716 vfprintf(stderr, fmt, ap);
717 fprintf(stderr, "\n");
718 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
719 if (qemu_log_enabled()) {
720 qemu_log("qemu: fatal: ");
721 qemu_log_vprintf(fmt, ap2);
722 qemu_log("\n");
723 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724 qemu_log_flush();
725 qemu_log_close();
726 }
727 va_end(ap2);
728 va_end(ap);
729 #if defined(CONFIG_USER_ONLY)
730 {
731 struct sigaction act;
732 sigfillset(&act.sa_mask);
733 act.sa_handler = SIG_DFL;
734 sigaction(SIGABRT, &act, NULL);
735 }
736 #endif
737 abort();
738 }
739
740 #if !defined(CONFIG_USER_ONLY)
741 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
742 {
743 RAMBlock *block;
744
745 /* The list is protected by the iothread lock here. */
746 block = ram_list.mru_block;
747 if (block && addr - block->offset < block->length) {
748 goto found;
749 }
750 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
751 if (addr - block->offset < block->length) {
752 goto found;
753 }
754 }
755
756 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
757 abort();
758
759 found:
760 ram_list.mru_block = block;
761 return block;
762 }
763
764 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
765 {
766 ram_addr_t start1;
767 RAMBlock *block;
768 ram_addr_t end;
769
770 end = TARGET_PAGE_ALIGN(start + length);
771 start &= TARGET_PAGE_MASK;
772
773 block = qemu_get_ram_block(start);
774 assert(block == qemu_get_ram_block(end - 1));
775 start1 = (uintptr_t)block->host + (start - block->offset);
776 cpu_tlb_reset_dirty_all(start1, length);
777 }
778
779 /* Note: start and end must be within the same ram block. */
780 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
781 unsigned client)
782 {
783 if (length == 0)
784 return;
785 cpu_physical_memory_clear_dirty_range(start, length, client);
786
787 if (tcg_enabled()) {
788 tlb_reset_dirty_range_all(start, length);
789 }
790 }
791
792 static void cpu_physical_memory_set_dirty_tracking(bool enable)
793 {
794 in_migration = enable;
795 }
796
797 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
798 MemoryRegionSection *section,
799 target_ulong vaddr,
800 hwaddr paddr, hwaddr xlat,
801 int prot,
802 target_ulong *address)
803 {
804 hwaddr iotlb;
805 CPUWatchpoint *wp;
806
807 if (memory_region_is_ram(section->mr)) {
808 /* Normal RAM. */
809 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
810 + xlat;
811 if (!section->readonly) {
812 iotlb |= PHYS_SECTION_NOTDIRTY;
813 } else {
814 iotlb |= PHYS_SECTION_ROM;
815 }
816 } else {
817 iotlb = section - section->address_space->dispatch->map.sections;
818 iotlb += xlat;
819 }
820
821 /* Make accesses to pages with watchpoints go via the
822 watchpoint trap routines. */
823 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
824 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
825 /* Avoid trapping reads of pages with a write breakpoint. */
826 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
827 iotlb = PHYS_SECTION_WATCH + paddr;
828 *address |= TLB_MMIO;
829 break;
830 }
831 }
832 }
833
834 return iotlb;
835 }
836 #endif /* defined(CONFIG_USER_ONLY) */
837
838 #if !defined(CONFIG_USER_ONLY)
839
840 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
841 uint16_t section);
842 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
843
844 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
845
846 /*
847 * Set a custom physical guest memory alloator.
848 * Accelerators with unusual needs may need this. Hopefully, we can
849 * get rid of it eventually.
850 */
851 void phys_mem_set_alloc(void *(*alloc)(size_t))
852 {
853 phys_mem_alloc = alloc;
854 }
855
856 static uint16_t phys_section_add(PhysPageMap *map,
857 MemoryRegionSection *section)
858 {
859 /* The physical section number is ORed with a page-aligned
860 * pointer to produce the iotlb entries. Thus it should
861 * never overflow into the page-aligned value.
862 */
863 assert(map->sections_nb < TARGET_PAGE_SIZE);
864
865 if (map->sections_nb == map->sections_nb_alloc) {
866 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
867 map->sections = g_renew(MemoryRegionSection, map->sections,
868 map->sections_nb_alloc);
869 }
870 map->sections[map->sections_nb] = *section;
871 memory_region_ref(section->mr);
872 return map->sections_nb++;
873 }
874
875 static void phys_section_destroy(MemoryRegion *mr)
876 {
877 memory_region_unref(mr);
878
879 if (mr->subpage) {
880 subpage_t *subpage = container_of(mr, subpage_t, iomem);
881 memory_region_destroy(&subpage->iomem);
882 g_free(subpage);
883 }
884 }
885
886 static void phys_sections_free(PhysPageMap *map)
887 {
888 while (map->sections_nb > 0) {
889 MemoryRegionSection *section = &map->sections[--map->sections_nb];
890 phys_section_destroy(section->mr);
891 }
892 g_free(map->sections);
893 g_free(map->nodes);
894 }
895
896 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
897 {
898 subpage_t *subpage;
899 hwaddr base = section->offset_within_address_space
900 & TARGET_PAGE_MASK;
901 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
902 d->map.nodes, d->map.sections);
903 MemoryRegionSection subsection = {
904 .offset_within_address_space = base,
905 .size = int128_make64(TARGET_PAGE_SIZE),
906 };
907 hwaddr start, end;
908
909 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
910
911 if (!(existing->mr->subpage)) {
912 subpage = subpage_init(d->as, base);
913 subsection.address_space = d->as;
914 subsection.mr = &subpage->iomem;
915 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
916 phys_section_add(&d->map, &subsection));
917 } else {
918 subpage = container_of(existing->mr, subpage_t, iomem);
919 }
920 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
921 end = start + int128_get64(section->size) - 1;
922 subpage_register(subpage, start, end,
923 phys_section_add(&d->map, section));
924 }
925
926
927 static void register_multipage(AddressSpaceDispatch *d,
928 MemoryRegionSection *section)
929 {
930 hwaddr start_addr = section->offset_within_address_space;
931 uint16_t section_index = phys_section_add(&d->map, section);
932 uint64_t num_pages = int128_get64(int128_rshift(section->size,
933 TARGET_PAGE_BITS));
934
935 assert(num_pages);
936 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
937 }
938
939 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
940 {
941 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
942 AddressSpaceDispatch *d = as->next_dispatch;
943 MemoryRegionSection now = *section, remain = *section;
944 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
945
946 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
947 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
948 - now.offset_within_address_space;
949
950 now.size = int128_min(int128_make64(left), now.size);
951 register_subpage(d, &now);
952 } else {
953 now.size = int128_zero();
954 }
955 while (int128_ne(remain.size, now.size)) {
956 remain.size = int128_sub(remain.size, now.size);
957 remain.offset_within_address_space += int128_get64(now.size);
958 remain.offset_within_region += int128_get64(now.size);
959 now = remain;
960 if (int128_lt(remain.size, page_size)) {
961 register_subpage(d, &now);
962 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
963 now.size = page_size;
964 register_subpage(d, &now);
965 } else {
966 now.size = int128_and(now.size, int128_neg(page_size));
967 register_multipage(d, &now);
968 }
969 }
970 }
971
972 void qemu_flush_coalesced_mmio_buffer(void)
973 {
974 if (kvm_enabled())
975 kvm_flush_coalesced_mmio_buffer();
976 }
977
978 void qemu_mutex_lock_ramlist(void)
979 {
980 qemu_mutex_lock(&ram_list.mutex);
981 }
982
983 void qemu_mutex_unlock_ramlist(void)
984 {
985 qemu_mutex_unlock(&ram_list.mutex);
986 }
987
988 #ifdef __linux__
989
990 #include <sys/vfs.h>
991
992 #define HUGETLBFS_MAGIC 0x958458f6
993
994 static long gethugepagesize(const char *path)
995 {
996 struct statfs fs;
997 int ret;
998
999 do {
1000 ret = statfs(path, &fs);
1001 } while (ret != 0 && errno == EINTR);
1002
1003 if (ret != 0) {
1004 perror(path);
1005 return 0;
1006 }
1007
1008 if (fs.f_type != HUGETLBFS_MAGIC)
1009 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1010
1011 return fs.f_bsize;
1012 }
1013
1014 static sigjmp_buf sigjump;
1015
1016 static void sigbus_handler(int signal)
1017 {
1018 siglongjmp(sigjump, 1);
1019 }
1020
1021 static void *file_ram_alloc(RAMBlock *block,
1022 ram_addr_t memory,
1023 const char *path)
1024 {
1025 char *filename;
1026 char *sanitized_name;
1027 char *c;
1028 void *area;
1029 int fd;
1030 unsigned long hpagesize;
1031
1032 hpagesize = gethugepagesize(path);
1033 if (!hpagesize) {
1034 goto error;
1035 }
1036
1037 if (memory < hpagesize) {
1038 return NULL;
1039 }
1040
1041 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1042 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1043 goto error;
1044 }
1045
1046 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1047 sanitized_name = g_strdup(block->mr->name);
1048 for (c = sanitized_name; *c != '\0'; c++) {
1049 if (*c == '/')
1050 *c = '_';
1051 }
1052
1053 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1054 sanitized_name);
1055 g_free(sanitized_name);
1056
1057 fd = mkstemp(filename);
1058 if (fd < 0) {
1059 perror("unable to create backing store for hugepages");
1060 g_free(filename);
1061 goto error;
1062 }
1063 unlink(filename);
1064 g_free(filename);
1065
1066 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1067
1068 /*
1069 * ftruncate is not supported by hugetlbfs in older
1070 * hosts, so don't bother bailing out on errors.
1071 * If anything goes wrong with it under other filesystems,
1072 * mmap will fail.
1073 */
1074 if (ftruncate(fd, memory))
1075 perror("ftruncate");
1076
1077 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1078 if (area == MAP_FAILED) {
1079 perror("file_ram_alloc: can't mmap RAM pages");
1080 close(fd);
1081 goto error;
1082 }
1083
1084 if (mem_prealloc) {
1085 int ret, i;
1086 struct sigaction act, oldact;
1087 sigset_t set, oldset;
1088
1089 memset(&act, 0, sizeof(act));
1090 act.sa_handler = &sigbus_handler;
1091 act.sa_flags = 0;
1092
1093 ret = sigaction(SIGBUS, &act, &oldact);
1094 if (ret) {
1095 perror("file_ram_alloc: failed to install signal handler");
1096 exit(1);
1097 }
1098
1099 /* unblock SIGBUS */
1100 sigemptyset(&set);
1101 sigaddset(&set, SIGBUS);
1102 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1103
1104 if (sigsetjmp(sigjump, 1)) {
1105 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1106 exit(1);
1107 }
1108
1109 /* MAP_POPULATE silently ignores failures */
1110 for (i = 0; i < (memory/hpagesize); i++) {
1111 memset(area + (hpagesize*i), 0, 1);
1112 }
1113
1114 ret = sigaction(SIGBUS, &oldact, NULL);
1115 if (ret) {
1116 perror("file_ram_alloc: failed to reinstall signal handler");
1117 exit(1);
1118 }
1119
1120 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1121 }
1122
1123 block->fd = fd;
1124 return area;
1125
1126 error:
1127 if (mem_prealloc) {
1128 exit(1);
1129 }
1130 return NULL;
1131 }
1132 #else
1133 static void *file_ram_alloc(RAMBlock *block,
1134 ram_addr_t memory,
1135 const char *path)
1136 {
1137 fprintf(stderr, "-mem-path not supported on this host\n");
1138 exit(1);
1139 }
1140 #endif
1141
1142 static ram_addr_t find_ram_offset(ram_addr_t size)
1143 {
1144 RAMBlock *block, *next_block;
1145 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1146
1147 assert(size != 0); /* it would hand out same offset multiple times */
1148
1149 if (QTAILQ_EMPTY(&ram_list.blocks))
1150 return 0;
1151
1152 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1153 ram_addr_t end, next = RAM_ADDR_MAX;
1154
1155 end = block->offset + block->length;
1156
1157 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1158 if (next_block->offset >= end) {
1159 next = MIN(next, next_block->offset);
1160 }
1161 }
1162 if (next - end >= size && next - end < mingap) {
1163 offset = end;
1164 mingap = next - end;
1165 }
1166 }
1167
1168 if (offset == RAM_ADDR_MAX) {
1169 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1170 (uint64_t)size);
1171 abort();
1172 }
1173
1174 return offset;
1175 }
1176
1177 ram_addr_t last_ram_offset(void)
1178 {
1179 RAMBlock *block;
1180 ram_addr_t last = 0;
1181
1182 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1183 last = MAX(last, block->offset + block->length);
1184
1185 return last;
1186 }
1187
1188 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1189 {
1190 int ret;
1191
1192 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1193 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1194 "dump-guest-core", true)) {
1195 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1196 if (ret) {
1197 perror("qemu_madvise");
1198 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1199 "but dump_guest_core=off specified\n");
1200 }
1201 }
1202 }
1203
1204 static RAMBlock *find_ram_block(ram_addr_t addr)
1205 {
1206 RAMBlock *block;
1207
1208 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1209 if (block->offset == addr) {
1210 return block;
1211 }
1212 }
1213
1214 return NULL;
1215 }
1216
1217 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1218 {
1219 RAMBlock *new_block = find_ram_block(addr);
1220 RAMBlock *block;
1221
1222 assert(new_block);
1223 assert(!new_block->idstr[0]);
1224
1225 if (dev) {
1226 char *id = qdev_get_dev_path(dev);
1227 if (id) {
1228 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1229 g_free(id);
1230 }
1231 }
1232 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1233
1234 /* This assumes the iothread lock is taken here too. */
1235 qemu_mutex_lock_ramlist();
1236 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1237 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1238 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1239 new_block->idstr);
1240 abort();
1241 }
1242 }
1243 qemu_mutex_unlock_ramlist();
1244 }
1245
1246 void qemu_ram_unset_idstr(ram_addr_t addr)
1247 {
1248 RAMBlock *block = find_ram_block(addr);
1249
1250 if (block) {
1251 memset(block->idstr, 0, sizeof(block->idstr));
1252 }
1253 }
1254
1255 static int memory_try_enable_merging(void *addr, size_t len)
1256 {
1257 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1258 /* disabled by the user */
1259 return 0;
1260 }
1261
1262 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1263 }
1264
1265 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1266 MemoryRegion *mr)
1267 {
1268 RAMBlock *block, *new_block;
1269 ram_addr_t old_ram_size, new_ram_size;
1270
1271 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1272
1273 size = TARGET_PAGE_ALIGN(size);
1274 new_block = g_malloc0(sizeof(*new_block));
1275 new_block->fd = -1;
1276
1277 /* This assumes the iothread lock is taken here too. */
1278 qemu_mutex_lock_ramlist();
1279 new_block->mr = mr;
1280 new_block->offset = find_ram_offset(size);
1281 if (host) {
1282 new_block->host = host;
1283 new_block->flags |= RAM_PREALLOC_MASK;
1284 } else if (xen_enabled()) {
1285 if (mem_path) {
1286 fprintf(stderr, "-mem-path not supported with Xen\n");
1287 exit(1);
1288 }
1289 xen_ram_alloc(new_block->offset, size, mr);
1290 } else {
1291 if (mem_path) {
1292 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1293 /*
1294 * file_ram_alloc() needs to allocate just like
1295 * phys_mem_alloc, but we haven't bothered to provide
1296 * a hook there.
1297 */
1298 fprintf(stderr,
1299 "-mem-path not supported with this accelerator\n");
1300 exit(1);
1301 }
1302 new_block->host = file_ram_alloc(new_block, size, mem_path);
1303 }
1304 if (!new_block->host) {
1305 new_block->host = phys_mem_alloc(size);
1306 if (!new_block->host) {
1307 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1308 new_block->mr->name, strerror(errno));
1309 exit(1);
1310 }
1311 memory_try_enable_merging(new_block->host, size);
1312 }
1313 }
1314 new_block->length = size;
1315
1316 /* Keep the list sorted from biggest to smallest block. */
1317 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1318 if (block->length < new_block->length) {
1319 break;
1320 }
1321 }
1322 if (block) {
1323 QTAILQ_INSERT_BEFORE(block, new_block, next);
1324 } else {
1325 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1326 }
1327 ram_list.mru_block = NULL;
1328
1329 ram_list.version++;
1330 qemu_mutex_unlock_ramlist();
1331
1332 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1333
1334 if (new_ram_size > old_ram_size) {
1335 int i;
1336 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1337 ram_list.dirty_memory[i] =
1338 bitmap_zero_extend(ram_list.dirty_memory[i],
1339 old_ram_size, new_ram_size);
1340 }
1341 }
1342 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1343
1344 qemu_ram_setup_dump(new_block->host, size);
1345 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1346 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1347
1348 if (kvm_enabled())
1349 kvm_setup_guest_memory(new_block->host, size);
1350
1351 return new_block->offset;
1352 }
1353
1354 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1355 {
1356 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1357 }
1358
1359 void qemu_ram_free_from_ptr(ram_addr_t addr)
1360 {
1361 RAMBlock *block;
1362
1363 /* This assumes the iothread lock is taken here too. */
1364 qemu_mutex_lock_ramlist();
1365 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1366 if (addr == block->offset) {
1367 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1368 ram_list.mru_block = NULL;
1369 ram_list.version++;
1370 g_free(block);
1371 break;
1372 }
1373 }
1374 qemu_mutex_unlock_ramlist();
1375 }
1376
1377 void qemu_ram_free(ram_addr_t addr)
1378 {
1379 RAMBlock *block;
1380
1381 /* This assumes the iothread lock is taken here too. */
1382 qemu_mutex_lock_ramlist();
1383 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1384 if (addr == block->offset) {
1385 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1386 ram_list.mru_block = NULL;
1387 ram_list.version++;
1388 if (block->flags & RAM_PREALLOC_MASK) {
1389 ;
1390 } else if (xen_enabled()) {
1391 xen_invalidate_map_cache_entry(block->host);
1392 #ifndef _WIN32
1393 } else if (block->fd >= 0) {
1394 munmap(block->host, block->length);
1395 close(block->fd);
1396 #endif
1397 } else {
1398 qemu_anon_ram_free(block->host, block->length);
1399 }
1400 g_free(block);
1401 break;
1402 }
1403 }
1404 qemu_mutex_unlock_ramlist();
1405
1406 }
1407
1408 #ifndef _WIN32
1409 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1410 {
1411 RAMBlock *block;
1412 ram_addr_t offset;
1413 int flags;
1414 void *area, *vaddr;
1415
1416 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1417 offset = addr - block->offset;
1418 if (offset < block->length) {
1419 vaddr = block->host + offset;
1420 if (block->flags & RAM_PREALLOC_MASK) {
1421 ;
1422 } else if (xen_enabled()) {
1423 abort();
1424 } else {
1425 flags = MAP_FIXED;
1426 munmap(vaddr, length);
1427 if (block->fd >= 0) {
1428 #ifdef MAP_POPULATE
1429 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1430 MAP_PRIVATE;
1431 #else
1432 flags |= MAP_PRIVATE;
1433 #endif
1434 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1435 flags, block->fd, offset);
1436 } else {
1437 /*
1438 * Remap needs to match alloc. Accelerators that
1439 * set phys_mem_alloc never remap. If they did,
1440 * we'd need a remap hook here.
1441 */
1442 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1443
1444 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1445 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1446 flags, -1, 0);
1447 }
1448 if (area != vaddr) {
1449 fprintf(stderr, "Could not remap addr: "
1450 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1451 length, addr);
1452 exit(1);
1453 }
1454 memory_try_enable_merging(vaddr, length);
1455 qemu_ram_setup_dump(vaddr, length);
1456 }
1457 return;
1458 }
1459 }
1460 }
1461 #endif /* !_WIN32 */
1462
1463 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1464 With the exception of the softmmu code in this file, this should
1465 only be used for local memory (e.g. video ram) that the device owns,
1466 and knows it isn't going to access beyond the end of the block.
1467
1468 It should not be used for general purpose DMA.
1469 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1470 */
1471 void *qemu_get_ram_ptr(ram_addr_t addr)
1472 {
1473 RAMBlock *block = qemu_get_ram_block(addr);
1474
1475 if (xen_enabled()) {
1476 /* We need to check if the requested address is in the RAM
1477 * because we don't want to map the entire memory in QEMU.
1478 * In that case just map until the end of the page.
1479 */
1480 if (block->offset == 0) {
1481 return xen_map_cache(addr, 0, 0);
1482 } else if (block->host == NULL) {
1483 block->host =
1484 xen_map_cache(block->offset, block->length, 1);
1485 }
1486 }
1487 return block->host + (addr - block->offset);
1488 }
1489
1490 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1491 * but takes a size argument */
1492 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1493 {
1494 if (*size == 0) {
1495 return NULL;
1496 }
1497 if (xen_enabled()) {
1498 return xen_map_cache(addr, *size, 1);
1499 } else {
1500 RAMBlock *block;
1501
1502 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1503 if (addr - block->offset < block->length) {
1504 if (addr - block->offset + *size > block->length)
1505 *size = block->length - addr + block->offset;
1506 return block->host + (addr - block->offset);
1507 }
1508 }
1509
1510 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1511 abort();
1512 }
1513 }
1514
1515 /* Some of the softmmu routines need to translate from a host pointer
1516 (typically a TLB entry) back to a ram offset. */
1517 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1518 {
1519 RAMBlock *block;
1520 uint8_t *host = ptr;
1521
1522 if (xen_enabled()) {
1523 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1524 return qemu_get_ram_block(*ram_addr)->mr;
1525 }
1526
1527 block = ram_list.mru_block;
1528 if (block && block->host && host - block->host < block->length) {
1529 goto found;
1530 }
1531
1532 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1533 /* This case append when the block is not mapped. */
1534 if (block->host == NULL) {
1535 continue;
1536 }
1537 if (host - block->host < block->length) {
1538 goto found;
1539 }
1540 }
1541
1542 return NULL;
1543
1544 found:
1545 *ram_addr = block->offset + (host - block->host);
1546 return block->mr;
1547 }
1548
1549 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1550 uint64_t val, unsigned size)
1551 {
1552 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1553 tb_invalidate_phys_page_fast(ram_addr, size);
1554 }
1555 switch (size) {
1556 case 1:
1557 stb_p(qemu_get_ram_ptr(ram_addr), val);
1558 break;
1559 case 2:
1560 stw_p(qemu_get_ram_ptr(ram_addr), val);
1561 break;
1562 case 4:
1563 stl_p(qemu_get_ram_ptr(ram_addr), val);
1564 break;
1565 default:
1566 abort();
1567 }
1568 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1569 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1570 /* we remove the notdirty callback only if the code has been
1571 flushed */
1572 if (!cpu_physical_memory_is_clean(ram_addr)) {
1573 CPUArchState *env = current_cpu->env_ptr;
1574 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1575 }
1576 }
1577
1578 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1579 unsigned size, bool is_write)
1580 {
1581 return is_write;
1582 }
1583
1584 static const MemoryRegionOps notdirty_mem_ops = {
1585 .write = notdirty_mem_write,
1586 .valid.accepts = notdirty_mem_accepts,
1587 .endianness = DEVICE_NATIVE_ENDIAN,
1588 };
1589
1590 /* Generate a debug exception if a watchpoint has been hit. */
1591 static void check_watchpoint(int offset, int len_mask, int flags)
1592 {
1593 CPUState *cpu = current_cpu;
1594 CPUArchState *env = cpu->env_ptr;
1595 target_ulong pc, cs_base;
1596 target_ulong vaddr;
1597 CPUWatchpoint *wp;
1598 int cpu_flags;
1599
1600 if (cpu->watchpoint_hit) {
1601 /* We re-entered the check after replacing the TB. Now raise
1602 * the debug interrupt so that is will trigger after the
1603 * current instruction. */
1604 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1605 return;
1606 }
1607 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1608 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1609 if ((vaddr == (wp->vaddr & len_mask) ||
1610 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1611 wp->flags |= BP_WATCHPOINT_HIT;
1612 if (!cpu->watchpoint_hit) {
1613 cpu->watchpoint_hit = wp;
1614 tb_check_watchpoint(cpu);
1615 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1616 cpu->exception_index = EXCP_DEBUG;
1617 cpu_loop_exit(cpu);
1618 } else {
1619 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1620 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1621 cpu_resume_from_signal(cpu, NULL);
1622 }
1623 }
1624 } else {
1625 wp->flags &= ~BP_WATCHPOINT_HIT;
1626 }
1627 }
1628 }
1629
1630 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1631 so these check for a hit then pass through to the normal out-of-line
1632 phys routines. */
1633 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1634 unsigned size)
1635 {
1636 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1637 switch (size) {
1638 case 1: return ldub_phys(&address_space_memory, addr);
1639 case 2: return lduw_phys(&address_space_memory, addr);
1640 case 4: return ldl_phys(&address_space_memory, addr);
1641 default: abort();
1642 }
1643 }
1644
1645 static void watch_mem_write(void *opaque, hwaddr addr,
1646 uint64_t val, unsigned size)
1647 {
1648 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1649 switch (size) {
1650 case 1:
1651 stb_phys(&address_space_memory, addr, val);
1652 break;
1653 case 2:
1654 stw_phys(&address_space_memory, addr, val);
1655 break;
1656 case 4:
1657 stl_phys(&address_space_memory, addr, val);
1658 break;
1659 default: abort();
1660 }
1661 }
1662
1663 static const MemoryRegionOps watch_mem_ops = {
1664 .read = watch_mem_read,
1665 .write = watch_mem_write,
1666 .endianness = DEVICE_NATIVE_ENDIAN,
1667 };
1668
1669 static uint64_t subpage_read(void *opaque, hwaddr addr,
1670 unsigned len)
1671 {
1672 subpage_t *subpage = opaque;
1673 uint8_t buf[4];
1674
1675 #if defined(DEBUG_SUBPAGE)
1676 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1677 subpage, len, addr);
1678 #endif
1679 address_space_read(subpage->as, addr + subpage->base, buf, len);
1680 switch (len) {
1681 case 1:
1682 return ldub_p(buf);
1683 case 2:
1684 return lduw_p(buf);
1685 case 4:
1686 return ldl_p(buf);
1687 default:
1688 abort();
1689 }
1690 }
1691
1692 static void subpage_write(void *opaque, hwaddr addr,
1693 uint64_t value, unsigned len)
1694 {
1695 subpage_t *subpage = opaque;
1696 uint8_t buf[4];
1697
1698 #if defined(DEBUG_SUBPAGE)
1699 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1700 " value %"PRIx64"\n",
1701 __func__, subpage, len, addr, value);
1702 #endif
1703 switch (len) {
1704 case 1:
1705 stb_p(buf, value);
1706 break;
1707 case 2:
1708 stw_p(buf, value);
1709 break;
1710 case 4:
1711 stl_p(buf, value);
1712 break;
1713 default:
1714 abort();
1715 }
1716 address_space_write(subpage->as, addr + subpage->base, buf, len);
1717 }
1718
1719 static bool subpage_accepts(void *opaque, hwaddr addr,
1720 unsigned len, bool is_write)
1721 {
1722 subpage_t *subpage = opaque;
1723 #if defined(DEBUG_SUBPAGE)
1724 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1725 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1726 #endif
1727
1728 return address_space_access_valid(subpage->as, addr + subpage->base,
1729 len, is_write);
1730 }
1731
1732 static const MemoryRegionOps subpage_ops = {
1733 .read = subpage_read,
1734 .write = subpage_write,
1735 .valid.accepts = subpage_accepts,
1736 .endianness = DEVICE_NATIVE_ENDIAN,
1737 };
1738
1739 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1740 uint16_t section)
1741 {
1742 int idx, eidx;
1743
1744 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1745 return -1;
1746 idx = SUBPAGE_IDX(start);
1747 eidx = SUBPAGE_IDX(end);
1748 #if defined(DEBUG_SUBPAGE)
1749 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1750 __func__, mmio, start, end, idx, eidx, section);
1751 #endif
1752 for (; idx <= eidx; idx++) {
1753 mmio->sub_section[idx] = section;
1754 }
1755
1756 return 0;
1757 }
1758
1759 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1760 {
1761 subpage_t *mmio;
1762
1763 mmio = g_malloc0(sizeof(subpage_t));
1764
1765 mmio->as = as;
1766 mmio->base = base;
1767 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1768 "subpage", TARGET_PAGE_SIZE);
1769 mmio->iomem.subpage = true;
1770 #if defined(DEBUG_SUBPAGE)
1771 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1772 mmio, base, TARGET_PAGE_SIZE);
1773 #endif
1774 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1775
1776 return mmio;
1777 }
1778
1779 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1780 MemoryRegion *mr)
1781 {
1782 assert(as);
1783 MemoryRegionSection section = {
1784 .address_space = as,
1785 .mr = mr,
1786 .offset_within_address_space = 0,
1787 .offset_within_region = 0,
1788 .size = int128_2_64(),
1789 };
1790
1791 return phys_section_add(map, &section);
1792 }
1793
1794 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1795 {
1796 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1797 }
1798
1799 static void io_mem_init(void)
1800 {
1801 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1802 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1803 "unassigned", UINT64_MAX);
1804 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1805 "notdirty", UINT64_MAX);
1806 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1807 "watch", UINT64_MAX);
1808 }
1809
1810 static void mem_begin(MemoryListener *listener)
1811 {
1812 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1813 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1814 uint16_t n;
1815
1816 n = dummy_section(&d->map, as, &io_mem_unassigned);
1817 assert(n == PHYS_SECTION_UNASSIGNED);
1818 n = dummy_section(&d->map, as, &io_mem_notdirty);
1819 assert(n == PHYS_SECTION_NOTDIRTY);
1820 n = dummy_section(&d->map, as, &io_mem_rom);
1821 assert(n == PHYS_SECTION_ROM);
1822 n = dummy_section(&d->map, as, &io_mem_watch);
1823 assert(n == PHYS_SECTION_WATCH);
1824
1825 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1826 d->as = as;
1827 as->next_dispatch = d;
1828 }
1829
1830 static void mem_commit(MemoryListener *listener)
1831 {
1832 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1833 AddressSpaceDispatch *cur = as->dispatch;
1834 AddressSpaceDispatch *next = as->next_dispatch;
1835
1836 phys_page_compact_all(next, next->map.nodes_nb);
1837
1838 as->dispatch = next;
1839
1840 if (cur) {
1841 phys_sections_free(&cur->map);
1842 g_free(cur);
1843 }
1844 }
1845
1846 static void tcg_commit(MemoryListener *listener)
1847 {
1848 CPUState *cpu;
1849
1850 /* since each CPU stores ram addresses in its TLB cache, we must
1851 reset the modified entries */
1852 /* XXX: slow ! */
1853 CPU_FOREACH(cpu) {
1854 /* FIXME: Disentangle the cpu.h circular files deps so we can
1855 directly get the right CPU from listener. */
1856 if (cpu->tcg_as_listener != listener) {
1857 continue;
1858 }
1859 tlb_flush(cpu, 1);
1860 }
1861 }
1862
1863 static void core_log_global_start(MemoryListener *listener)
1864 {
1865 cpu_physical_memory_set_dirty_tracking(true);
1866 }
1867
1868 static void core_log_global_stop(MemoryListener *listener)
1869 {
1870 cpu_physical_memory_set_dirty_tracking(false);
1871 }
1872
1873 static MemoryListener core_memory_listener = {
1874 .log_global_start = core_log_global_start,
1875 .log_global_stop = core_log_global_stop,
1876 .priority = 1,
1877 };
1878
1879 void address_space_init_dispatch(AddressSpace *as)
1880 {
1881 as->dispatch = NULL;
1882 as->dispatch_listener = (MemoryListener) {
1883 .begin = mem_begin,
1884 .commit = mem_commit,
1885 .region_add = mem_add,
1886 .region_nop = mem_add,
1887 .priority = 0,
1888 };
1889 memory_listener_register(&as->dispatch_listener, as);
1890 }
1891
1892 void address_space_destroy_dispatch(AddressSpace *as)
1893 {
1894 AddressSpaceDispatch *d = as->dispatch;
1895
1896 memory_listener_unregister(&as->dispatch_listener);
1897 g_free(d);
1898 as->dispatch = NULL;
1899 }
1900
1901 static void memory_map_init(void)
1902 {
1903 system_memory = g_malloc(sizeof(*system_memory));
1904
1905 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1906 address_space_init(&address_space_memory, system_memory, "memory");
1907
1908 system_io = g_malloc(sizeof(*system_io));
1909 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1910 65536);
1911 address_space_init(&address_space_io, system_io, "I/O");
1912
1913 memory_listener_register(&core_memory_listener, &address_space_memory);
1914 }
1915
1916 MemoryRegion *get_system_memory(void)
1917 {
1918 return system_memory;
1919 }
1920
1921 MemoryRegion *get_system_io(void)
1922 {
1923 return system_io;
1924 }
1925
1926 #endif /* !defined(CONFIG_USER_ONLY) */
1927
1928 /* physical memory access (slow version, mainly for debug) */
1929 #if defined(CONFIG_USER_ONLY)
1930 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1931 uint8_t *buf, int len, int is_write)
1932 {
1933 int l, flags;
1934 target_ulong page;
1935 void * p;
1936
1937 while (len > 0) {
1938 page = addr & TARGET_PAGE_MASK;
1939 l = (page + TARGET_PAGE_SIZE) - addr;
1940 if (l > len)
1941 l = len;
1942 flags = page_get_flags(page);
1943 if (!(flags & PAGE_VALID))
1944 return -1;
1945 if (is_write) {
1946 if (!(flags & PAGE_WRITE))
1947 return -1;
1948 /* XXX: this code should not depend on lock_user */
1949 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1950 return -1;
1951 memcpy(p, buf, l);
1952 unlock_user(p, addr, l);
1953 } else {
1954 if (!(flags & PAGE_READ))
1955 return -1;
1956 /* XXX: this code should not depend on lock_user */
1957 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1958 return -1;
1959 memcpy(buf, p, l);
1960 unlock_user(p, addr, 0);
1961 }
1962 len -= l;
1963 buf += l;
1964 addr += l;
1965 }
1966 return 0;
1967 }
1968
1969 #else
1970
1971 static void invalidate_and_set_dirty(hwaddr addr,
1972 hwaddr length)
1973 {
1974 if (cpu_physical_memory_is_clean(addr)) {
1975 /* invalidate code */
1976 tb_invalidate_phys_page_range(addr, addr + length, 0);
1977 /* set dirty bit */
1978 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1979 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1980 }
1981 xen_modified_memory(addr, length);
1982 }
1983
1984 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1985 {
1986 unsigned access_size_max = mr->ops->valid.max_access_size;
1987
1988 /* Regions are assumed to support 1-4 byte accesses unless
1989 otherwise specified. */
1990 if (access_size_max == 0) {
1991 access_size_max = 4;
1992 }
1993
1994 /* Bound the maximum access by the alignment of the address. */
1995 if (!mr->ops->impl.unaligned) {
1996 unsigned align_size_max = addr & -addr;
1997 if (align_size_max != 0 && align_size_max < access_size_max) {
1998 access_size_max = align_size_max;
1999 }
2000 }
2001
2002 /* Don't attempt accesses larger than the maximum. */
2003 if (l > access_size_max) {
2004 l = access_size_max;
2005 }
2006 if (l & (l - 1)) {
2007 l = 1 << (qemu_fls(l) - 1);
2008 }
2009
2010 return l;
2011 }
2012
2013 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2014 int len, bool is_write)
2015 {
2016 hwaddr l;
2017 uint8_t *ptr;
2018 uint64_t val;
2019 hwaddr addr1;
2020 MemoryRegion *mr;
2021 bool error = false;
2022
2023 while (len > 0) {
2024 l = len;
2025 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2026
2027 if (is_write) {
2028 if (!memory_access_is_direct(mr, is_write)) {
2029 l = memory_access_size(mr, l, addr1);
2030 /* XXX: could force current_cpu to NULL to avoid
2031 potential bugs */
2032 switch (l) {
2033 case 8:
2034 /* 64 bit write access */
2035 val = ldq_p(buf);
2036 error |= io_mem_write(mr, addr1, val, 8);
2037 break;
2038 case 4:
2039 /* 32 bit write access */
2040 val = ldl_p(buf);
2041 error |= io_mem_write(mr, addr1, val, 4);
2042 break;
2043 case 2:
2044 /* 16 bit write access */
2045 val = lduw_p(buf);
2046 error |= io_mem_write(mr, addr1, val, 2);
2047 break;
2048 case 1:
2049 /* 8 bit write access */
2050 val = ldub_p(buf);
2051 error |= io_mem_write(mr, addr1, val, 1);
2052 break;
2053 default:
2054 abort();
2055 }
2056 } else {
2057 addr1 += memory_region_get_ram_addr(mr);
2058 /* RAM case */
2059 ptr = qemu_get_ram_ptr(addr1);
2060 memcpy(ptr, buf, l);
2061 invalidate_and_set_dirty(addr1, l);
2062 }
2063 } else {
2064 if (!memory_access_is_direct(mr, is_write)) {
2065 /* I/O case */
2066 l = memory_access_size(mr, l, addr1);
2067 switch (l) {
2068 case 8:
2069 /* 64 bit read access */
2070 error |= io_mem_read(mr, addr1, &val, 8);
2071 stq_p(buf, val);
2072 break;
2073 case 4:
2074 /* 32 bit read access */
2075 error |= io_mem_read(mr, addr1, &val, 4);
2076 stl_p(buf, val);
2077 break;
2078 case 2:
2079 /* 16 bit read access */
2080 error |= io_mem_read(mr, addr1, &val, 2);
2081 stw_p(buf, val);
2082 break;
2083 case 1:
2084 /* 8 bit read access */
2085 error |= io_mem_read(mr, addr1, &val, 1);
2086 stb_p(buf, val);
2087 break;
2088 default:
2089 abort();
2090 }
2091 } else {
2092 /* RAM case */
2093 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2094 memcpy(buf, ptr, l);
2095 }
2096 }
2097 len -= l;
2098 buf += l;
2099 addr += l;
2100 }
2101
2102 return error;
2103 }
2104
2105 bool address_space_write(AddressSpace *as, hwaddr addr,
2106 const uint8_t *buf, int len)
2107 {
2108 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2109 }
2110
2111 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2112 {
2113 return address_space_rw(as, addr, buf, len, false);
2114 }
2115
2116
2117 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2118 int len, int is_write)
2119 {
2120 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2121 }
2122
2123 enum write_rom_type {
2124 WRITE_DATA,
2125 FLUSH_CACHE,
2126 };
2127
2128 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2129 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2130 {
2131 hwaddr l;
2132 uint8_t *ptr;
2133 hwaddr addr1;
2134 MemoryRegion *mr;
2135
2136 while (len > 0) {
2137 l = len;
2138 mr = address_space_translate(as, addr, &addr1, &l, true);
2139
2140 if (!(memory_region_is_ram(mr) ||
2141 memory_region_is_romd(mr))) {
2142 /* do nothing */
2143 } else {
2144 addr1 += memory_region_get_ram_addr(mr);
2145 /* ROM/RAM case */
2146 ptr = qemu_get_ram_ptr(addr1);
2147 switch (type) {
2148 case WRITE_DATA:
2149 memcpy(ptr, buf, l);
2150 invalidate_and_set_dirty(addr1, l);
2151 break;
2152 case FLUSH_CACHE:
2153 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2154 break;
2155 }
2156 }
2157 len -= l;
2158 buf += l;
2159 addr += l;
2160 }
2161 }
2162
2163 /* used for ROM loading : can write in RAM and ROM */
2164 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2165 const uint8_t *buf, int len)
2166 {
2167 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2168 }
2169
2170 void cpu_flush_icache_range(hwaddr start, int len)
2171 {
2172 /*
2173 * This function should do the same thing as an icache flush that was
2174 * triggered from within the guest. For TCG we are always cache coherent,
2175 * so there is no need to flush anything. For KVM / Xen we need to flush
2176 * the host's instruction cache at least.
2177 */
2178 if (tcg_enabled()) {
2179 return;
2180 }
2181
2182 cpu_physical_memory_write_rom_internal(&address_space_memory,
2183 start, NULL, len, FLUSH_CACHE);
2184 }
2185
2186 typedef struct {
2187 MemoryRegion *mr;
2188 void *buffer;
2189 hwaddr addr;
2190 hwaddr len;
2191 } BounceBuffer;
2192
2193 static BounceBuffer bounce;
2194
2195 typedef struct MapClient {
2196 void *opaque;
2197 void (*callback)(void *opaque);
2198 QLIST_ENTRY(MapClient) link;
2199 } MapClient;
2200
2201 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2202 = QLIST_HEAD_INITIALIZER(map_client_list);
2203
2204 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2205 {
2206 MapClient *client = g_malloc(sizeof(*client));
2207
2208 client->opaque = opaque;
2209 client->callback = callback;
2210 QLIST_INSERT_HEAD(&map_client_list, client, link);
2211 return client;
2212 }
2213
2214 static void cpu_unregister_map_client(void *_client)
2215 {
2216 MapClient *client = (MapClient *)_client;
2217
2218 QLIST_REMOVE(client, link);
2219 g_free(client);
2220 }
2221
2222 static void cpu_notify_map_clients(void)
2223 {
2224 MapClient *client;
2225
2226 while (!QLIST_EMPTY(&map_client_list)) {
2227 client = QLIST_FIRST(&map_client_list);
2228 client->callback(client->opaque);
2229 cpu_unregister_map_client(client);
2230 }
2231 }
2232
2233 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2234 {
2235 MemoryRegion *mr;
2236 hwaddr l, xlat;
2237
2238 while (len > 0) {
2239 l = len;
2240 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2241 if (!memory_access_is_direct(mr, is_write)) {
2242 l = memory_access_size(mr, l, addr);
2243 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2244 return false;
2245 }
2246 }
2247
2248 len -= l;
2249 addr += l;
2250 }
2251 return true;
2252 }
2253
2254 /* Map a physical memory region into a host virtual address.
2255 * May map a subset of the requested range, given by and returned in *plen.
2256 * May return NULL if resources needed to perform the mapping are exhausted.
2257 * Use only for reads OR writes - not for read-modify-write operations.
2258 * Use cpu_register_map_client() to know when retrying the map operation is
2259 * likely to succeed.
2260 */
2261 void *address_space_map(AddressSpace *as,
2262 hwaddr addr,
2263 hwaddr *plen,
2264 bool is_write)
2265 {
2266 hwaddr len = *plen;
2267 hwaddr done = 0;
2268 hwaddr l, xlat, base;
2269 MemoryRegion *mr, *this_mr;
2270 ram_addr_t raddr;
2271
2272 if (len == 0) {
2273 return NULL;
2274 }
2275
2276 l = len;
2277 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2278 if (!memory_access_is_direct(mr, is_write)) {
2279 if (bounce.buffer) {
2280 return NULL;
2281 }
2282 /* Avoid unbounded allocations */
2283 l = MIN(l, TARGET_PAGE_SIZE);
2284 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2285 bounce.addr = addr;
2286 bounce.len = l;
2287
2288 memory_region_ref(mr);
2289 bounce.mr = mr;
2290 if (!is_write) {
2291 address_space_read(as, addr, bounce.buffer, l);
2292 }
2293
2294 *plen = l;
2295 return bounce.buffer;
2296 }
2297
2298 base = xlat;
2299 raddr = memory_region_get_ram_addr(mr);
2300
2301 for (;;) {
2302 len -= l;
2303 addr += l;
2304 done += l;
2305 if (len == 0) {
2306 break;
2307 }
2308
2309 l = len;
2310 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2311 if (this_mr != mr || xlat != base + done) {
2312 break;
2313 }
2314 }
2315
2316 memory_region_ref(mr);
2317 *plen = done;
2318 return qemu_ram_ptr_length(raddr + base, plen);
2319 }
2320
2321 /* Unmaps a memory region previously mapped by address_space_map().
2322 * Will also mark the memory as dirty if is_write == 1. access_len gives
2323 * the amount of memory that was actually read or written by the caller.
2324 */
2325 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2326 int is_write, hwaddr access_len)
2327 {
2328 if (buffer != bounce.buffer) {
2329 MemoryRegion *mr;
2330 ram_addr_t addr1;
2331
2332 mr = qemu_ram_addr_from_host(buffer, &addr1);
2333 assert(mr != NULL);
2334 if (is_write) {
2335 while (access_len) {
2336 unsigned l;
2337 l = TARGET_PAGE_SIZE;
2338 if (l > access_len)
2339 l = access_len;
2340 invalidate_and_set_dirty(addr1, l);
2341 addr1 += l;
2342 access_len -= l;
2343 }
2344 }
2345 if (xen_enabled()) {
2346 xen_invalidate_map_cache_entry(buffer);
2347 }
2348 memory_region_unref(mr);
2349 return;
2350 }
2351 if (is_write) {
2352 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2353 }
2354 qemu_vfree(bounce.buffer);
2355 bounce.buffer = NULL;
2356 memory_region_unref(bounce.mr);
2357 cpu_notify_map_clients();
2358 }
2359
2360 void *cpu_physical_memory_map(hwaddr addr,
2361 hwaddr *plen,
2362 int is_write)
2363 {
2364 return address_space_map(&address_space_memory, addr, plen, is_write);
2365 }
2366
2367 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2368 int is_write, hwaddr access_len)
2369 {
2370 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2371 }
2372
2373 /* warning: addr must be aligned */
2374 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2375 enum device_endian endian)
2376 {
2377 uint8_t *ptr;
2378 uint64_t val;
2379 MemoryRegion *mr;
2380 hwaddr l = 4;
2381 hwaddr addr1;
2382
2383 mr = address_space_translate(as, addr, &addr1, &l, false);
2384 if (l < 4 || !memory_access_is_direct(mr, false)) {
2385 /* I/O case */
2386 io_mem_read(mr, addr1, &val, 4);
2387 #if defined(TARGET_WORDS_BIGENDIAN)
2388 if (endian == DEVICE_LITTLE_ENDIAN) {
2389 val = bswap32(val);
2390 }
2391 #else
2392 if (endian == DEVICE_BIG_ENDIAN) {
2393 val = bswap32(val);
2394 }
2395 #endif
2396 } else {
2397 /* RAM case */
2398 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2399 & TARGET_PAGE_MASK)
2400 + addr1);
2401 switch (endian) {
2402 case DEVICE_LITTLE_ENDIAN:
2403 val = ldl_le_p(ptr);
2404 break;
2405 case DEVICE_BIG_ENDIAN:
2406 val = ldl_be_p(ptr);
2407 break;
2408 default:
2409 val = ldl_p(ptr);
2410 break;
2411 }
2412 }
2413 return val;
2414 }
2415
2416 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2417 {
2418 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2419 }
2420
2421 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2422 {
2423 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2424 }
2425
2426 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2427 {
2428 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2429 }
2430
2431 /* warning: addr must be aligned */
2432 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2433 enum device_endian endian)
2434 {
2435 uint8_t *ptr;
2436 uint64_t val;
2437 MemoryRegion *mr;
2438 hwaddr l = 8;
2439 hwaddr addr1;
2440
2441 mr = address_space_translate(as, addr, &addr1, &l,
2442 false);
2443 if (l < 8 || !memory_access_is_direct(mr, false)) {
2444 /* I/O case */
2445 io_mem_read(mr, addr1, &val, 8);
2446 #if defined(TARGET_WORDS_BIGENDIAN)
2447 if (endian == DEVICE_LITTLE_ENDIAN) {
2448 val = bswap64(val);
2449 }
2450 #else
2451 if (endian == DEVICE_BIG_ENDIAN) {
2452 val = bswap64(val);
2453 }
2454 #endif
2455 } else {
2456 /* RAM case */
2457 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2458 & TARGET_PAGE_MASK)
2459 + addr1);
2460 switch (endian) {
2461 case DEVICE_LITTLE_ENDIAN:
2462 val = ldq_le_p(ptr);
2463 break;
2464 case DEVICE_BIG_ENDIAN:
2465 val = ldq_be_p(ptr);
2466 break;
2467 default:
2468 val = ldq_p(ptr);
2469 break;
2470 }
2471 }
2472 return val;
2473 }
2474
2475 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2476 {
2477 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2478 }
2479
2480 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2481 {
2482 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2483 }
2484
2485 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2486 {
2487 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2488 }
2489
2490 /* XXX: optimize */
2491 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2492 {
2493 uint8_t val;
2494 address_space_rw(as, addr, &val, 1, 0);
2495 return val;
2496 }
2497
2498 /* warning: addr must be aligned */
2499 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2500 enum device_endian endian)
2501 {
2502 uint8_t *ptr;
2503 uint64_t val;
2504 MemoryRegion *mr;
2505 hwaddr l = 2;
2506 hwaddr addr1;
2507
2508 mr = address_space_translate(as, addr, &addr1, &l,
2509 false);
2510 if (l < 2 || !memory_access_is_direct(mr, false)) {
2511 /* I/O case */
2512 io_mem_read(mr, addr1, &val, 2);
2513 #if defined(TARGET_WORDS_BIGENDIAN)
2514 if (endian == DEVICE_LITTLE_ENDIAN) {
2515 val = bswap16(val);
2516 }
2517 #else
2518 if (endian == DEVICE_BIG_ENDIAN) {
2519 val = bswap16(val);
2520 }
2521 #endif
2522 } else {
2523 /* RAM case */
2524 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2525 & TARGET_PAGE_MASK)
2526 + addr1);
2527 switch (endian) {
2528 case DEVICE_LITTLE_ENDIAN:
2529 val = lduw_le_p(ptr);
2530 break;
2531 case DEVICE_BIG_ENDIAN:
2532 val = lduw_be_p(ptr);
2533 break;
2534 default:
2535 val = lduw_p(ptr);
2536 break;
2537 }
2538 }
2539 return val;
2540 }
2541
2542 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2543 {
2544 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2545 }
2546
2547 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2548 {
2549 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2550 }
2551
2552 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2553 {
2554 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2555 }
2556
2557 /* warning: addr must be aligned. The ram page is not masked as dirty
2558 and the code inside is not invalidated. It is useful if the dirty
2559 bits are used to track modified PTEs */
2560 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2561 {
2562 uint8_t *ptr;
2563 MemoryRegion *mr;
2564 hwaddr l = 4;
2565 hwaddr addr1;
2566
2567 mr = address_space_translate(as, addr, &addr1, &l,
2568 true);
2569 if (l < 4 || !memory_access_is_direct(mr, true)) {
2570 io_mem_write(mr, addr1, val, 4);
2571 } else {
2572 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2573 ptr = qemu_get_ram_ptr(addr1);
2574 stl_p(ptr, val);
2575
2576 if (unlikely(in_migration)) {
2577 if (cpu_physical_memory_is_clean(addr1)) {
2578 /* invalidate code */
2579 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2580 /* set dirty bit */
2581 cpu_physical_memory_set_dirty_flag(addr1,
2582 DIRTY_MEMORY_MIGRATION);
2583 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2584 }
2585 }
2586 }
2587 }
2588
2589 /* warning: addr must be aligned */
2590 static inline void stl_phys_internal(AddressSpace *as,
2591 hwaddr addr, uint32_t val,
2592 enum device_endian endian)
2593 {
2594 uint8_t *ptr;
2595 MemoryRegion *mr;
2596 hwaddr l = 4;
2597 hwaddr addr1;
2598
2599 mr = address_space_translate(as, addr, &addr1, &l,
2600 true);
2601 if (l < 4 || !memory_access_is_direct(mr, true)) {
2602 #if defined(TARGET_WORDS_BIGENDIAN)
2603 if (endian == DEVICE_LITTLE_ENDIAN) {
2604 val = bswap32(val);
2605 }
2606 #else
2607 if (endian == DEVICE_BIG_ENDIAN) {
2608 val = bswap32(val);
2609 }
2610 #endif
2611 io_mem_write(mr, addr1, val, 4);
2612 } else {
2613 /* RAM case */
2614 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2615 ptr = qemu_get_ram_ptr(addr1);
2616 switch (endian) {
2617 case DEVICE_LITTLE_ENDIAN:
2618 stl_le_p(ptr, val);
2619 break;
2620 case DEVICE_BIG_ENDIAN:
2621 stl_be_p(ptr, val);
2622 break;
2623 default:
2624 stl_p(ptr, val);
2625 break;
2626 }
2627 invalidate_and_set_dirty(addr1, 4);
2628 }
2629 }
2630
2631 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2632 {
2633 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2634 }
2635
2636 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2637 {
2638 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2639 }
2640
2641 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2642 {
2643 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2644 }
2645
2646 /* XXX: optimize */
2647 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2648 {
2649 uint8_t v = val;
2650 address_space_rw(as, addr, &v, 1, 1);
2651 }
2652
2653 /* warning: addr must be aligned */
2654 static inline void stw_phys_internal(AddressSpace *as,
2655 hwaddr addr, uint32_t val,
2656 enum device_endian endian)
2657 {
2658 uint8_t *ptr;
2659 MemoryRegion *mr;
2660 hwaddr l = 2;
2661 hwaddr addr1;
2662
2663 mr = address_space_translate(as, addr, &addr1, &l, true);
2664 if (l < 2 || !memory_access_is_direct(mr, true)) {
2665 #if defined(TARGET_WORDS_BIGENDIAN)
2666 if (endian == DEVICE_LITTLE_ENDIAN) {
2667 val = bswap16(val);
2668 }
2669 #else
2670 if (endian == DEVICE_BIG_ENDIAN) {
2671 val = bswap16(val);
2672 }
2673 #endif
2674 io_mem_write(mr, addr1, val, 2);
2675 } else {
2676 /* RAM case */
2677 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2678 ptr = qemu_get_ram_ptr(addr1);
2679 switch (endian) {
2680 case DEVICE_LITTLE_ENDIAN:
2681 stw_le_p(ptr, val);
2682 break;
2683 case DEVICE_BIG_ENDIAN:
2684 stw_be_p(ptr, val);
2685 break;
2686 default:
2687 stw_p(ptr, val);
2688 break;
2689 }
2690 invalidate_and_set_dirty(addr1, 2);
2691 }
2692 }
2693
2694 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2695 {
2696 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2697 }
2698
2699 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2700 {
2701 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2702 }
2703
2704 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2705 {
2706 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2707 }
2708
2709 /* XXX: optimize */
2710 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2711 {
2712 val = tswap64(val);
2713 address_space_rw(as, addr, (void *) &val, 8, 1);
2714 }
2715
2716 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2717 {
2718 val = cpu_to_le64(val);
2719 address_space_rw(as, addr, (void *) &val, 8, 1);
2720 }
2721
2722 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2723 {
2724 val = cpu_to_be64(val);
2725 address_space_rw(as, addr, (void *) &val, 8, 1);
2726 }
2727
2728 /* virtual memory access for debug (includes writing to ROM) */
2729 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2730 uint8_t *buf, int len, int is_write)
2731 {
2732 int l;
2733 hwaddr phys_addr;
2734 target_ulong page;
2735
2736 while (len > 0) {
2737 page = addr & TARGET_PAGE_MASK;
2738 phys_addr = cpu_get_phys_page_debug(cpu, page);
2739 /* if no physical page mapped, return an error */
2740 if (phys_addr == -1)
2741 return -1;
2742 l = (page + TARGET_PAGE_SIZE) - addr;
2743 if (l > len)
2744 l = len;
2745 phys_addr += (addr & ~TARGET_PAGE_MASK);
2746 if (is_write) {
2747 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2748 } else {
2749 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2750 }
2751 len -= l;
2752 buf += l;
2753 addr += l;
2754 }
2755 return 0;
2756 }
2757 #endif
2758
2759 #if !defined(CONFIG_USER_ONLY)
2760
2761 /*
2762 * A helper function for the _utterly broken_ virtio device model to find out if
2763 * it's running on a big endian machine. Don't do this at home kids!
2764 */
2765 bool virtio_is_big_endian(void);
2766 bool virtio_is_big_endian(void)
2767 {
2768 #if defined(TARGET_WORDS_BIGENDIAN)
2769 return true;
2770 #else
2771 return false;
2772 #endif
2773 }
2774
2775 #endif
2776
2777 #ifndef CONFIG_USER_ONLY
2778 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2779 {
2780 MemoryRegion*mr;
2781 hwaddr l = 1;
2782
2783 mr = address_space_translate(&address_space_memory,
2784 phys_addr, &phys_addr, &l, false);
2785
2786 return !(memory_region_is_ram(mr) ||
2787 memory_region_is_romd(mr));
2788 }
2789
2790 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2791 {
2792 RAMBlock *block;
2793
2794 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2795 func(block->host, block->offset, block->length, opaque);
2796 }
2797 }
2798 #endif