]> git.proxmox.com Git - qemu.git/blob - exec.c
ab4b4d2b240b83cebf7cc4a2ca2776bd4ac62a1e
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_SUBPAGE
54
55 #if !defined(CONFIG_USER_ONLY)
56 int phys_ram_fd;
57 static int in_migration;
58
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
63
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
67
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
70
71 #endif
72
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
75 cpu_exec() */
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
80 int use_icount;
81
82 #if !defined(CONFIG_USER_ONLY)
83
84 static MemoryRegionSection *phys_sections;
85 static unsigned phys_sections_nb, phys_sections_nb_alloc;
86 static uint16_t phys_section_unassigned;
87 static uint16_t phys_section_notdirty;
88 static uint16_t phys_section_rom;
89 static uint16_t phys_section_watch;
90
91 /* Simple allocator for PhysPageEntry nodes */
92 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
93 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
94
95 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
96
97 static void io_mem_init(void);
98 static void memory_map_init(void);
99 static void *qemu_safe_ram_ptr(ram_addr_t addr);
100
101 static MemoryRegion io_mem_watch;
102 #endif
103
104 #if !defined(CONFIG_USER_ONLY)
105
106 static void phys_map_node_reserve(unsigned nodes)
107 {
108 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
109 typedef PhysPageEntry Node[L2_SIZE];
110 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
111 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
112 phys_map_nodes_nb + nodes);
113 phys_map_nodes = g_renew(Node, phys_map_nodes,
114 phys_map_nodes_nb_alloc);
115 }
116 }
117
118 static uint16_t phys_map_node_alloc(void)
119 {
120 unsigned i;
121 uint16_t ret;
122
123 ret = phys_map_nodes_nb++;
124 assert(ret != PHYS_MAP_NODE_NIL);
125 assert(ret != phys_map_nodes_nb_alloc);
126 for (i = 0; i < L2_SIZE; ++i) {
127 phys_map_nodes[ret][i].is_leaf = 0;
128 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
129 }
130 return ret;
131 }
132
133 static void phys_map_nodes_reset(void)
134 {
135 phys_map_nodes_nb = 0;
136 }
137
138
139 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
140 hwaddr *nb, uint16_t leaf,
141 int level)
142 {
143 PhysPageEntry *p;
144 int i;
145 hwaddr step = (hwaddr)1 << (level * L2_BITS);
146
147 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
148 lp->ptr = phys_map_node_alloc();
149 p = phys_map_nodes[lp->ptr];
150 if (level == 0) {
151 for (i = 0; i < L2_SIZE; i++) {
152 p[i].is_leaf = 1;
153 p[i].ptr = phys_section_unassigned;
154 }
155 }
156 } else {
157 p = phys_map_nodes[lp->ptr];
158 }
159 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
160
161 while (*nb && lp < &p[L2_SIZE]) {
162 if ((*index & (step - 1)) == 0 && *nb >= step) {
163 lp->is_leaf = true;
164 lp->ptr = leaf;
165 *index += step;
166 *nb -= step;
167 } else {
168 phys_page_set_level(lp, index, nb, leaf, level - 1);
169 }
170 ++lp;
171 }
172 }
173
174 static void phys_page_set(AddressSpaceDispatch *d,
175 hwaddr index, hwaddr nb,
176 uint16_t leaf)
177 {
178 /* Wildly overreserve - it doesn't matter much. */
179 phys_map_node_reserve(3 * P_L2_LEVELS);
180
181 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
182 }
183
184 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
185 {
186 PhysPageEntry lp = d->phys_map;
187 PhysPageEntry *p;
188 int i;
189
190 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
191 if (lp.ptr == PHYS_MAP_NODE_NIL) {
192 return &phys_sections[phys_section_unassigned];
193 }
194 p = phys_map_nodes[lp.ptr];
195 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
196 }
197 return &phys_sections[lp.ptr];
198 }
199
200 bool memory_region_is_unassigned(MemoryRegion *mr)
201 {
202 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
203 && mr != &io_mem_watch;
204 }
205
206 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
207 hwaddr *xlat, hwaddr *plen,
208 bool is_write)
209 {
210 MemoryRegionSection *section;
211 Int128 diff;
212
213 section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
214 /* Compute offset within MemoryRegionSection */
215 addr -= section->offset_within_address_space;
216
217 /* Compute offset within MemoryRegion */
218 *xlat = addr + section->offset_within_region;
219
220 diff = int128_sub(section->mr->size, int128_make64(addr));
221 *plen = MIN(int128_get64(diff), *plen);
222 return section;
223 }
224 #endif
225
226 void cpu_exec_init_all(void)
227 {
228 #if !defined(CONFIG_USER_ONLY)
229 qemu_mutex_init(&ram_list.mutex);
230 memory_map_init();
231 io_mem_init();
232 #endif
233 }
234
235 #if !defined(CONFIG_USER_ONLY)
236
237 static int cpu_common_post_load(void *opaque, int version_id)
238 {
239 CPUState *cpu = opaque;
240
241 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
242 version_id is increased. */
243 cpu->interrupt_request &= ~0x01;
244 tlb_flush(cpu->env_ptr, 1);
245
246 return 0;
247 }
248
249 static const VMStateDescription vmstate_cpu_common = {
250 .name = "cpu_common",
251 .version_id = 1,
252 .minimum_version_id = 1,
253 .minimum_version_id_old = 1,
254 .post_load = cpu_common_post_load,
255 .fields = (VMStateField []) {
256 VMSTATE_UINT32(halted, CPUState),
257 VMSTATE_UINT32(interrupt_request, CPUState),
258 VMSTATE_END_OF_LIST()
259 }
260 };
261 #else
262 #define vmstate_cpu_common vmstate_dummy
263 #endif
264
265 CPUState *qemu_get_cpu(int index)
266 {
267 CPUArchState *env = first_cpu;
268 CPUState *cpu = NULL;
269
270 while (env) {
271 cpu = ENV_GET_CPU(env);
272 if (cpu->cpu_index == index) {
273 break;
274 }
275 env = env->next_cpu;
276 }
277
278 return env ? cpu : NULL;
279 }
280
281 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
282 {
283 CPUArchState *env = first_cpu;
284
285 while (env) {
286 func(ENV_GET_CPU(env), data);
287 env = env->next_cpu;
288 }
289 }
290
291 void cpu_exec_init(CPUArchState *env)
292 {
293 CPUState *cpu = ENV_GET_CPU(env);
294 CPUClass *cc = CPU_GET_CLASS(cpu);
295 CPUArchState **penv;
296 int cpu_index;
297
298 #if defined(CONFIG_USER_ONLY)
299 cpu_list_lock();
300 #endif
301 env->next_cpu = NULL;
302 penv = &first_cpu;
303 cpu_index = 0;
304 while (*penv != NULL) {
305 penv = &(*penv)->next_cpu;
306 cpu_index++;
307 }
308 cpu->cpu_index = cpu_index;
309 cpu->numa_node = 0;
310 QTAILQ_INIT(&env->breakpoints);
311 QTAILQ_INIT(&env->watchpoints);
312 #ifndef CONFIG_USER_ONLY
313 cpu->thread_id = qemu_get_thread_id();
314 #endif
315 *penv = env;
316 #if defined(CONFIG_USER_ONLY)
317 cpu_list_unlock();
318 #endif
319 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
320 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
321 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
322 cpu_save, cpu_load, env);
323 assert(cc->vmsd == NULL);
324 #endif
325 if (cc->vmsd != NULL) {
326 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
327 }
328 }
329
330 #if defined(TARGET_HAS_ICE)
331 #if defined(CONFIG_USER_ONLY)
332 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
333 {
334 tb_invalidate_phys_page_range(pc, pc + 1, 0);
335 }
336 #else
337 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
338 {
339 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
340 (pc & ~TARGET_PAGE_MASK));
341 }
342 #endif
343 #endif /* TARGET_HAS_ICE */
344
345 #if defined(CONFIG_USER_ONLY)
346 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
347
348 {
349 }
350
351 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
352 int flags, CPUWatchpoint **watchpoint)
353 {
354 return -ENOSYS;
355 }
356 #else
357 /* Add a watchpoint. */
358 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
359 int flags, CPUWatchpoint **watchpoint)
360 {
361 target_ulong len_mask = ~(len - 1);
362 CPUWatchpoint *wp;
363
364 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
365 if ((len & (len - 1)) || (addr & ~len_mask) ||
366 len == 0 || len > TARGET_PAGE_SIZE) {
367 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
368 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
369 return -EINVAL;
370 }
371 wp = g_malloc(sizeof(*wp));
372
373 wp->vaddr = addr;
374 wp->len_mask = len_mask;
375 wp->flags = flags;
376
377 /* keep all GDB-injected watchpoints in front */
378 if (flags & BP_GDB)
379 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
380 else
381 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
382
383 tlb_flush_page(env, addr);
384
385 if (watchpoint)
386 *watchpoint = wp;
387 return 0;
388 }
389
390 /* Remove a specific watchpoint. */
391 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
392 int flags)
393 {
394 target_ulong len_mask = ~(len - 1);
395 CPUWatchpoint *wp;
396
397 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
398 if (addr == wp->vaddr && len_mask == wp->len_mask
399 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
400 cpu_watchpoint_remove_by_ref(env, wp);
401 return 0;
402 }
403 }
404 return -ENOENT;
405 }
406
407 /* Remove a specific watchpoint by reference. */
408 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
409 {
410 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
411
412 tlb_flush_page(env, watchpoint->vaddr);
413
414 g_free(watchpoint);
415 }
416
417 /* Remove all matching watchpoints. */
418 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
419 {
420 CPUWatchpoint *wp, *next;
421
422 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
423 if (wp->flags & mask)
424 cpu_watchpoint_remove_by_ref(env, wp);
425 }
426 }
427 #endif
428
429 /* Add a breakpoint. */
430 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
431 CPUBreakpoint **breakpoint)
432 {
433 #if defined(TARGET_HAS_ICE)
434 CPUBreakpoint *bp;
435
436 bp = g_malloc(sizeof(*bp));
437
438 bp->pc = pc;
439 bp->flags = flags;
440
441 /* keep all GDB-injected breakpoints in front */
442 if (flags & BP_GDB)
443 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
444 else
445 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
446
447 breakpoint_invalidate(env, pc);
448
449 if (breakpoint)
450 *breakpoint = bp;
451 return 0;
452 #else
453 return -ENOSYS;
454 #endif
455 }
456
457 /* Remove a specific breakpoint. */
458 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
459 {
460 #if defined(TARGET_HAS_ICE)
461 CPUBreakpoint *bp;
462
463 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
464 if (bp->pc == pc && bp->flags == flags) {
465 cpu_breakpoint_remove_by_ref(env, bp);
466 return 0;
467 }
468 }
469 return -ENOENT;
470 #else
471 return -ENOSYS;
472 #endif
473 }
474
475 /* Remove a specific breakpoint by reference. */
476 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
477 {
478 #if defined(TARGET_HAS_ICE)
479 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
480
481 breakpoint_invalidate(env, breakpoint->pc);
482
483 g_free(breakpoint);
484 #endif
485 }
486
487 /* Remove all matching breakpoints. */
488 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
489 {
490 #if defined(TARGET_HAS_ICE)
491 CPUBreakpoint *bp, *next;
492
493 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
494 if (bp->flags & mask)
495 cpu_breakpoint_remove_by_ref(env, bp);
496 }
497 #endif
498 }
499
500 /* enable or disable single step mode. EXCP_DEBUG is returned by the
501 CPU loop after each instruction */
502 void cpu_single_step(CPUArchState *env, int enabled)
503 {
504 #if defined(TARGET_HAS_ICE)
505 if (env->singlestep_enabled != enabled) {
506 env->singlestep_enabled = enabled;
507 if (kvm_enabled())
508 kvm_update_guest_debug(env, 0);
509 else {
510 /* must flush all the translated code to avoid inconsistencies */
511 /* XXX: only flush what is necessary */
512 tb_flush(env);
513 }
514 }
515 #endif
516 }
517
518 void cpu_exit(CPUArchState *env)
519 {
520 CPUState *cpu = ENV_GET_CPU(env);
521
522 cpu->exit_request = 1;
523 cpu->tcg_exit_req = 1;
524 }
525
526 void cpu_abort(CPUArchState *env, const char *fmt, ...)
527 {
528 va_list ap;
529 va_list ap2;
530
531 va_start(ap, fmt);
532 va_copy(ap2, ap);
533 fprintf(stderr, "qemu: fatal: ");
534 vfprintf(stderr, fmt, ap);
535 fprintf(stderr, "\n");
536 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
537 if (qemu_log_enabled()) {
538 qemu_log("qemu: fatal: ");
539 qemu_log_vprintf(fmt, ap2);
540 qemu_log("\n");
541 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
542 qemu_log_flush();
543 qemu_log_close();
544 }
545 va_end(ap2);
546 va_end(ap);
547 #if defined(CONFIG_USER_ONLY)
548 {
549 struct sigaction act;
550 sigfillset(&act.sa_mask);
551 act.sa_handler = SIG_DFL;
552 sigaction(SIGABRT, &act, NULL);
553 }
554 #endif
555 abort();
556 }
557
558 CPUArchState *cpu_copy(CPUArchState *env)
559 {
560 CPUArchState *new_env = cpu_init(env->cpu_model_str);
561 CPUArchState *next_cpu = new_env->next_cpu;
562 #if defined(TARGET_HAS_ICE)
563 CPUBreakpoint *bp;
564 CPUWatchpoint *wp;
565 #endif
566
567 memcpy(new_env, env, sizeof(CPUArchState));
568
569 /* Preserve chaining. */
570 new_env->next_cpu = next_cpu;
571
572 /* Clone all break/watchpoints.
573 Note: Once we support ptrace with hw-debug register access, make sure
574 BP_CPU break/watchpoints are handled correctly on clone. */
575 QTAILQ_INIT(&env->breakpoints);
576 QTAILQ_INIT(&env->watchpoints);
577 #if defined(TARGET_HAS_ICE)
578 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
579 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
580 }
581 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
582 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
583 wp->flags, NULL);
584 }
585 #endif
586
587 return new_env;
588 }
589
590 #if !defined(CONFIG_USER_ONLY)
591 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
592 uintptr_t length)
593 {
594 uintptr_t start1;
595
596 /* we modify the TLB cache so that the dirty bit will be set again
597 when accessing the range */
598 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
599 /* Check that we don't span multiple blocks - this breaks the
600 address comparisons below. */
601 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
602 != (end - 1) - start) {
603 abort();
604 }
605 cpu_tlb_reset_dirty_all(start1, length);
606
607 }
608
609 /* Note: start and end must be within the same ram block. */
610 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
611 int dirty_flags)
612 {
613 uintptr_t length;
614
615 start &= TARGET_PAGE_MASK;
616 end = TARGET_PAGE_ALIGN(end);
617
618 length = end - start;
619 if (length == 0)
620 return;
621 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
622
623 if (tcg_enabled()) {
624 tlb_reset_dirty_range_all(start, end, length);
625 }
626 }
627
628 static int cpu_physical_memory_set_dirty_tracking(int enable)
629 {
630 int ret = 0;
631 in_migration = enable;
632 return ret;
633 }
634
635 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
636 MemoryRegionSection *section,
637 target_ulong vaddr,
638 hwaddr paddr, hwaddr xlat,
639 int prot,
640 target_ulong *address)
641 {
642 hwaddr iotlb;
643 CPUWatchpoint *wp;
644
645 if (memory_region_is_ram(section->mr)) {
646 /* Normal RAM. */
647 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
648 + xlat;
649 if (!section->readonly) {
650 iotlb |= phys_section_notdirty;
651 } else {
652 iotlb |= phys_section_rom;
653 }
654 } else {
655 iotlb = section - phys_sections;
656 iotlb += xlat;
657 }
658
659 /* Make accesses to pages with watchpoints go via the
660 watchpoint trap routines. */
661 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
662 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
663 /* Avoid trapping reads of pages with a write breakpoint. */
664 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
665 iotlb = phys_section_watch + paddr;
666 *address |= TLB_MMIO;
667 break;
668 }
669 }
670 }
671
672 return iotlb;
673 }
674 #endif /* defined(CONFIG_USER_ONLY) */
675
676 #if !defined(CONFIG_USER_ONLY)
677
678 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
679 typedef struct subpage_t {
680 MemoryRegion iomem;
681 hwaddr base;
682 uint16_t sub_section[TARGET_PAGE_SIZE];
683 } subpage_t;
684
685 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
686 uint16_t section);
687 static subpage_t *subpage_init(hwaddr base);
688 static void destroy_page_desc(uint16_t section_index)
689 {
690 MemoryRegionSection *section = &phys_sections[section_index];
691 MemoryRegion *mr = section->mr;
692
693 if (mr->subpage) {
694 subpage_t *subpage = container_of(mr, subpage_t, iomem);
695 memory_region_destroy(&subpage->iomem);
696 g_free(subpage);
697 }
698 }
699
700 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
701 {
702 unsigned i;
703 PhysPageEntry *p;
704
705 if (lp->ptr == PHYS_MAP_NODE_NIL) {
706 return;
707 }
708
709 p = phys_map_nodes[lp->ptr];
710 for (i = 0; i < L2_SIZE; ++i) {
711 if (!p[i].is_leaf) {
712 destroy_l2_mapping(&p[i], level - 1);
713 } else {
714 destroy_page_desc(p[i].ptr);
715 }
716 }
717 lp->is_leaf = 0;
718 lp->ptr = PHYS_MAP_NODE_NIL;
719 }
720
721 static void destroy_all_mappings(AddressSpaceDispatch *d)
722 {
723 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
724 phys_map_nodes_reset();
725 }
726
727 static uint16_t phys_section_add(MemoryRegionSection *section)
728 {
729 /* The physical section number is ORed with a page-aligned
730 * pointer to produce the iotlb entries. Thus it should
731 * never overflow into the page-aligned value.
732 */
733 assert(phys_sections_nb < TARGET_PAGE_SIZE);
734
735 if (phys_sections_nb == phys_sections_nb_alloc) {
736 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
737 phys_sections = g_renew(MemoryRegionSection, phys_sections,
738 phys_sections_nb_alloc);
739 }
740 phys_sections[phys_sections_nb] = *section;
741 return phys_sections_nb++;
742 }
743
744 static void phys_sections_clear(void)
745 {
746 phys_sections_nb = 0;
747 }
748
749 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
750 {
751 subpage_t *subpage;
752 hwaddr base = section->offset_within_address_space
753 & TARGET_PAGE_MASK;
754 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
755 MemoryRegionSection subsection = {
756 .offset_within_address_space = base,
757 .size = TARGET_PAGE_SIZE,
758 };
759 hwaddr start, end;
760
761 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
762
763 if (!(existing->mr->subpage)) {
764 subpage = subpage_init(base);
765 subsection.mr = &subpage->iomem;
766 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
767 phys_section_add(&subsection));
768 } else {
769 subpage = container_of(existing->mr, subpage_t, iomem);
770 }
771 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
772 end = start + section->size - 1;
773 subpage_register(subpage, start, end, phys_section_add(section));
774 }
775
776
777 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
778 {
779 hwaddr start_addr = section->offset_within_address_space;
780 ram_addr_t size = section->size;
781 hwaddr addr;
782 uint16_t section_index = phys_section_add(section);
783
784 assert(size);
785
786 addr = start_addr;
787 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
788 section_index);
789 }
790
791 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
792
793 static MemoryRegionSection limit(MemoryRegionSection section)
794 {
795 section.size = MIN(section.offset_within_address_space + section.size,
796 MAX_PHYS_ADDR + 1)
797 - section.offset_within_address_space;
798
799 return section;
800 }
801
802 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
803 {
804 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
805 MemoryRegionSection now = limit(*section), remain = limit(*section);
806
807 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
808 || (now.size < TARGET_PAGE_SIZE)) {
809 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
810 - now.offset_within_address_space,
811 now.size);
812 register_subpage(d, &now);
813 remain.size -= now.size;
814 remain.offset_within_address_space += now.size;
815 remain.offset_within_region += now.size;
816 }
817 while (remain.size >= TARGET_PAGE_SIZE) {
818 now = remain;
819 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
820 now.size = TARGET_PAGE_SIZE;
821 register_subpage(d, &now);
822 } else {
823 now.size &= TARGET_PAGE_MASK;
824 register_multipage(d, &now);
825 }
826 remain.size -= now.size;
827 remain.offset_within_address_space += now.size;
828 remain.offset_within_region += now.size;
829 }
830 now = remain;
831 if (now.size) {
832 register_subpage(d, &now);
833 }
834 }
835
836 void qemu_flush_coalesced_mmio_buffer(void)
837 {
838 if (kvm_enabled())
839 kvm_flush_coalesced_mmio_buffer();
840 }
841
842 void qemu_mutex_lock_ramlist(void)
843 {
844 qemu_mutex_lock(&ram_list.mutex);
845 }
846
847 void qemu_mutex_unlock_ramlist(void)
848 {
849 qemu_mutex_unlock(&ram_list.mutex);
850 }
851
852 #if defined(__linux__) && !defined(TARGET_S390X)
853
854 #include <sys/vfs.h>
855
856 #define HUGETLBFS_MAGIC 0x958458f6
857
858 static long gethugepagesize(const char *path)
859 {
860 struct statfs fs;
861 int ret;
862
863 do {
864 ret = statfs(path, &fs);
865 } while (ret != 0 && errno == EINTR);
866
867 if (ret != 0) {
868 perror(path);
869 return 0;
870 }
871
872 if (fs.f_type != HUGETLBFS_MAGIC)
873 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
874
875 return fs.f_bsize;
876 }
877
878 static void *file_ram_alloc(RAMBlock *block,
879 ram_addr_t memory,
880 const char *path)
881 {
882 char *filename;
883 char *sanitized_name;
884 char *c;
885 void *area;
886 int fd;
887 #ifdef MAP_POPULATE
888 int flags;
889 #endif
890 unsigned long hpagesize;
891
892 hpagesize = gethugepagesize(path);
893 if (!hpagesize) {
894 return NULL;
895 }
896
897 if (memory < hpagesize) {
898 return NULL;
899 }
900
901 if (kvm_enabled() && !kvm_has_sync_mmu()) {
902 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
903 return NULL;
904 }
905
906 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
907 sanitized_name = g_strdup(block->mr->name);
908 for (c = sanitized_name; *c != '\0'; c++) {
909 if (*c == '/')
910 *c = '_';
911 }
912
913 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
914 sanitized_name);
915 g_free(sanitized_name);
916
917 fd = mkstemp(filename);
918 if (fd < 0) {
919 perror("unable to create backing store for hugepages");
920 g_free(filename);
921 return NULL;
922 }
923 unlink(filename);
924 g_free(filename);
925
926 memory = (memory+hpagesize-1) & ~(hpagesize-1);
927
928 /*
929 * ftruncate is not supported by hugetlbfs in older
930 * hosts, so don't bother bailing out on errors.
931 * If anything goes wrong with it under other filesystems,
932 * mmap will fail.
933 */
934 if (ftruncate(fd, memory))
935 perror("ftruncate");
936
937 #ifdef MAP_POPULATE
938 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
939 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
940 * to sidestep this quirk.
941 */
942 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
943 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
944 #else
945 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
946 #endif
947 if (area == MAP_FAILED) {
948 perror("file_ram_alloc: can't mmap RAM pages");
949 close(fd);
950 return (NULL);
951 }
952 block->fd = fd;
953 return area;
954 }
955 #endif
956
957 static ram_addr_t find_ram_offset(ram_addr_t size)
958 {
959 RAMBlock *block, *next_block;
960 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
961
962 assert(size != 0); /* it would hand out same offset multiple times */
963
964 if (QTAILQ_EMPTY(&ram_list.blocks))
965 return 0;
966
967 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
968 ram_addr_t end, next = RAM_ADDR_MAX;
969
970 end = block->offset + block->length;
971
972 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
973 if (next_block->offset >= end) {
974 next = MIN(next, next_block->offset);
975 }
976 }
977 if (next - end >= size && next - end < mingap) {
978 offset = end;
979 mingap = next - end;
980 }
981 }
982
983 if (offset == RAM_ADDR_MAX) {
984 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
985 (uint64_t)size);
986 abort();
987 }
988
989 return offset;
990 }
991
992 ram_addr_t last_ram_offset(void)
993 {
994 RAMBlock *block;
995 ram_addr_t last = 0;
996
997 QTAILQ_FOREACH(block, &ram_list.blocks, next)
998 last = MAX(last, block->offset + block->length);
999
1000 return last;
1001 }
1002
1003 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1004 {
1005 int ret;
1006 QemuOpts *machine_opts;
1007
1008 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1009 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1010 if (machine_opts &&
1011 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1012 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1013 if (ret) {
1014 perror("qemu_madvise");
1015 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1016 "but dump_guest_core=off specified\n");
1017 }
1018 }
1019 }
1020
1021 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1022 {
1023 RAMBlock *new_block, *block;
1024
1025 new_block = NULL;
1026 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1027 if (block->offset == addr) {
1028 new_block = block;
1029 break;
1030 }
1031 }
1032 assert(new_block);
1033 assert(!new_block->idstr[0]);
1034
1035 if (dev) {
1036 char *id = qdev_get_dev_path(dev);
1037 if (id) {
1038 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1039 g_free(id);
1040 }
1041 }
1042 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1043
1044 /* This assumes the iothread lock is taken here too. */
1045 qemu_mutex_lock_ramlist();
1046 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1047 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1048 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1049 new_block->idstr);
1050 abort();
1051 }
1052 }
1053 qemu_mutex_unlock_ramlist();
1054 }
1055
1056 static int memory_try_enable_merging(void *addr, size_t len)
1057 {
1058 QemuOpts *opts;
1059
1060 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1061 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1062 /* disabled by the user */
1063 return 0;
1064 }
1065
1066 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1067 }
1068
1069 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1070 MemoryRegion *mr)
1071 {
1072 RAMBlock *block, *new_block;
1073
1074 size = TARGET_PAGE_ALIGN(size);
1075 new_block = g_malloc0(sizeof(*new_block));
1076
1077 /* This assumes the iothread lock is taken here too. */
1078 qemu_mutex_lock_ramlist();
1079 new_block->mr = mr;
1080 new_block->offset = find_ram_offset(size);
1081 if (host) {
1082 new_block->host = host;
1083 new_block->flags |= RAM_PREALLOC_MASK;
1084 } else {
1085 if (mem_path) {
1086 #if defined (__linux__) && !defined(TARGET_S390X)
1087 new_block->host = file_ram_alloc(new_block, size, mem_path);
1088 if (!new_block->host) {
1089 new_block->host = qemu_anon_ram_alloc(size);
1090 memory_try_enable_merging(new_block->host, size);
1091 }
1092 #else
1093 fprintf(stderr, "-mem-path option unsupported\n");
1094 exit(1);
1095 #endif
1096 } else {
1097 if (xen_enabled()) {
1098 xen_ram_alloc(new_block->offset, size, mr);
1099 } else if (kvm_enabled()) {
1100 /* some s390/kvm configurations have special constraints */
1101 new_block->host = kvm_ram_alloc(size);
1102 } else {
1103 new_block->host = qemu_anon_ram_alloc(size);
1104 }
1105 memory_try_enable_merging(new_block->host, size);
1106 }
1107 }
1108 new_block->length = size;
1109
1110 /* Keep the list sorted from biggest to smallest block. */
1111 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1112 if (block->length < new_block->length) {
1113 break;
1114 }
1115 }
1116 if (block) {
1117 QTAILQ_INSERT_BEFORE(block, new_block, next);
1118 } else {
1119 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1120 }
1121 ram_list.mru_block = NULL;
1122
1123 ram_list.version++;
1124 qemu_mutex_unlock_ramlist();
1125
1126 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1127 last_ram_offset() >> TARGET_PAGE_BITS);
1128 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1129 0, size >> TARGET_PAGE_BITS);
1130 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1131
1132 qemu_ram_setup_dump(new_block->host, size);
1133 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1134
1135 if (kvm_enabled())
1136 kvm_setup_guest_memory(new_block->host, size);
1137
1138 return new_block->offset;
1139 }
1140
1141 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1142 {
1143 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1144 }
1145
1146 void qemu_ram_free_from_ptr(ram_addr_t addr)
1147 {
1148 RAMBlock *block;
1149
1150 /* This assumes the iothread lock is taken here too. */
1151 qemu_mutex_lock_ramlist();
1152 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1153 if (addr == block->offset) {
1154 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1155 ram_list.mru_block = NULL;
1156 ram_list.version++;
1157 g_free(block);
1158 break;
1159 }
1160 }
1161 qemu_mutex_unlock_ramlist();
1162 }
1163
1164 void qemu_ram_free(ram_addr_t addr)
1165 {
1166 RAMBlock *block;
1167
1168 /* This assumes the iothread lock is taken here too. */
1169 qemu_mutex_lock_ramlist();
1170 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1171 if (addr == block->offset) {
1172 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1173 ram_list.mru_block = NULL;
1174 ram_list.version++;
1175 if (block->flags & RAM_PREALLOC_MASK) {
1176 ;
1177 } else if (mem_path) {
1178 #if defined (__linux__) && !defined(TARGET_S390X)
1179 if (block->fd) {
1180 munmap(block->host, block->length);
1181 close(block->fd);
1182 } else {
1183 qemu_anon_ram_free(block->host, block->length);
1184 }
1185 #else
1186 abort();
1187 #endif
1188 } else {
1189 if (xen_enabled()) {
1190 xen_invalidate_map_cache_entry(block->host);
1191 } else {
1192 qemu_anon_ram_free(block->host, block->length);
1193 }
1194 }
1195 g_free(block);
1196 break;
1197 }
1198 }
1199 qemu_mutex_unlock_ramlist();
1200
1201 }
1202
1203 #ifndef _WIN32
1204 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1205 {
1206 RAMBlock *block;
1207 ram_addr_t offset;
1208 int flags;
1209 void *area, *vaddr;
1210
1211 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1212 offset = addr - block->offset;
1213 if (offset < block->length) {
1214 vaddr = block->host + offset;
1215 if (block->flags & RAM_PREALLOC_MASK) {
1216 ;
1217 } else {
1218 flags = MAP_FIXED;
1219 munmap(vaddr, length);
1220 if (mem_path) {
1221 #if defined(__linux__) && !defined(TARGET_S390X)
1222 if (block->fd) {
1223 #ifdef MAP_POPULATE
1224 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1225 MAP_PRIVATE;
1226 #else
1227 flags |= MAP_PRIVATE;
1228 #endif
1229 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1230 flags, block->fd, offset);
1231 } else {
1232 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1233 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1234 flags, -1, 0);
1235 }
1236 #else
1237 abort();
1238 #endif
1239 } else {
1240 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1241 flags |= MAP_SHARED | MAP_ANONYMOUS;
1242 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1243 flags, -1, 0);
1244 #else
1245 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1246 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1247 flags, -1, 0);
1248 #endif
1249 }
1250 if (area != vaddr) {
1251 fprintf(stderr, "Could not remap addr: "
1252 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1253 length, addr);
1254 exit(1);
1255 }
1256 memory_try_enable_merging(vaddr, length);
1257 qemu_ram_setup_dump(vaddr, length);
1258 }
1259 return;
1260 }
1261 }
1262 }
1263 #endif /* !_WIN32 */
1264
1265 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1266 With the exception of the softmmu code in this file, this should
1267 only be used for local memory (e.g. video ram) that the device owns,
1268 and knows it isn't going to access beyond the end of the block.
1269
1270 It should not be used for general purpose DMA.
1271 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1272 */
1273 void *qemu_get_ram_ptr(ram_addr_t addr)
1274 {
1275 RAMBlock *block;
1276
1277 /* The list is protected by the iothread lock here. */
1278 block = ram_list.mru_block;
1279 if (block && addr - block->offset < block->length) {
1280 goto found;
1281 }
1282 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1283 if (addr - block->offset < block->length) {
1284 goto found;
1285 }
1286 }
1287
1288 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1289 abort();
1290
1291 found:
1292 ram_list.mru_block = block;
1293 if (xen_enabled()) {
1294 /* We need to check if the requested address is in the RAM
1295 * because we don't want to map the entire memory in QEMU.
1296 * In that case just map until the end of the page.
1297 */
1298 if (block->offset == 0) {
1299 return xen_map_cache(addr, 0, 0);
1300 } else if (block->host == NULL) {
1301 block->host =
1302 xen_map_cache(block->offset, block->length, 1);
1303 }
1304 }
1305 return block->host + (addr - block->offset);
1306 }
1307
1308 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1309 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1310 *
1311 * ??? Is this still necessary?
1312 */
1313 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1314 {
1315 RAMBlock *block;
1316
1317 /* The list is protected by the iothread lock here. */
1318 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1319 if (addr - block->offset < block->length) {
1320 if (xen_enabled()) {
1321 /* We need to check if the requested address is in the RAM
1322 * because we don't want to map the entire memory in QEMU.
1323 * In that case just map until the end of the page.
1324 */
1325 if (block->offset == 0) {
1326 return xen_map_cache(addr, 0, 0);
1327 } else if (block->host == NULL) {
1328 block->host =
1329 xen_map_cache(block->offset, block->length, 1);
1330 }
1331 }
1332 return block->host + (addr - block->offset);
1333 }
1334 }
1335
1336 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1337 abort();
1338
1339 return NULL;
1340 }
1341
1342 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1343 * but takes a size argument */
1344 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1345 {
1346 if (*size == 0) {
1347 return NULL;
1348 }
1349 if (xen_enabled()) {
1350 return xen_map_cache(addr, *size, 1);
1351 } else {
1352 RAMBlock *block;
1353
1354 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1355 if (addr - block->offset < block->length) {
1356 if (addr - block->offset + *size > block->length)
1357 *size = block->length - addr + block->offset;
1358 return block->host + (addr - block->offset);
1359 }
1360 }
1361
1362 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1363 abort();
1364 }
1365 }
1366
1367 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1368 {
1369 RAMBlock *block;
1370 uint8_t *host = ptr;
1371
1372 if (xen_enabled()) {
1373 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1374 return 0;
1375 }
1376
1377 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378 /* This case append when the block is not mapped. */
1379 if (block->host == NULL) {
1380 continue;
1381 }
1382 if (host - block->host < block->length) {
1383 *ram_addr = block->offset + (host - block->host);
1384 return 0;
1385 }
1386 }
1387
1388 return -1;
1389 }
1390
1391 /* Some of the softmmu routines need to translate from a host pointer
1392 (typically a TLB entry) back to a ram offset. */
1393 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1394 {
1395 ram_addr_t ram_addr;
1396
1397 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1398 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1399 abort();
1400 }
1401 return ram_addr;
1402 }
1403
1404 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1405 uint64_t val, unsigned size)
1406 {
1407 int dirty_flags;
1408 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1409 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1410 tb_invalidate_phys_page_fast(ram_addr, size);
1411 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1412 }
1413 switch (size) {
1414 case 1:
1415 stb_p(qemu_get_ram_ptr(ram_addr), val);
1416 break;
1417 case 2:
1418 stw_p(qemu_get_ram_ptr(ram_addr), val);
1419 break;
1420 case 4:
1421 stl_p(qemu_get_ram_ptr(ram_addr), val);
1422 break;
1423 default:
1424 abort();
1425 }
1426 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1427 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1428 /* we remove the notdirty callback only if the code has been
1429 flushed */
1430 if (dirty_flags == 0xff)
1431 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1432 }
1433
1434 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1435 unsigned size, bool is_write)
1436 {
1437 return is_write;
1438 }
1439
1440 static const MemoryRegionOps notdirty_mem_ops = {
1441 .write = notdirty_mem_write,
1442 .valid.accepts = notdirty_mem_accepts,
1443 .endianness = DEVICE_NATIVE_ENDIAN,
1444 };
1445
1446 /* Generate a debug exception if a watchpoint has been hit. */
1447 static void check_watchpoint(int offset, int len_mask, int flags)
1448 {
1449 CPUArchState *env = cpu_single_env;
1450 target_ulong pc, cs_base;
1451 target_ulong vaddr;
1452 CPUWatchpoint *wp;
1453 int cpu_flags;
1454
1455 if (env->watchpoint_hit) {
1456 /* We re-entered the check after replacing the TB. Now raise
1457 * the debug interrupt so that is will trigger after the
1458 * current instruction. */
1459 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1460 return;
1461 }
1462 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1463 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1464 if ((vaddr == (wp->vaddr & len_mask) ||
1465 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1466 wp->flags |= BP_WATCHPOINT_HIT;
1467 if (!env->watchpoint_hit) {
1468 env->watchpoint_hit = wp;
1469 tb_check_watchpoint(env);
1470 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1471 env->exception_index = EXCP_DEBUG;
1472 cpu_loop_exit(env);
1473 } else {
1474 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1475 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1476 cpu_resume_from_signal(env, NULL);
1477 }
1478 }
1479 } else {
1480 wp->flags &= ~BP_WATCHPOINT_HIT;
1481 }
1482 }
1483 }
1484
1485 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1486 so these check for a hit then pass through to the normal out-of-line
1487 phys routines. */
1488 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1489 unsigned size)
1490 {
1491 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1492 switch (size) {
1493 case 1: return ldub_phys(addr);
1494 case 2: return lduw_phys(addr);
1495 case 4: return ldl_phys(addr);
1496 default: abort();
1497 }
1498 }
1499
1500 static void watch_mem_write(void *opaque, hwaddr addr,
1501 uint64_t val, unsigned size)
1502 {
1503 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1504 switch (size) {
1505 case 1:
1506 stb_phys(addr, val);
1507 break;
1508 case 2:
1509 stw_phys(addr, val);
1510 break;
1511 case 4:
1512 stl_phys(addr, val);
1513 break;
1514 default: abort();
1515 }
1516 }
1517
1518 static const MemoryRegionOps watch_mem_ops = {
1519 .read = watch_mem_read,
1520 .write = watch_mem_write,
1521 .endianness = DEVICE_NATIVE_ENDIAN,
1522 };
1523
1524 static uint64_t subpage_read(void *opaque, hwaddr addr,
1525 unsigned len)
1526 {
1527 subpage_t *mmio = opaque;
1528 unsigned int idx = SUBPAGE_IDX(addr);
1529 MemoryRegionSection *section;
1530 #if defined(DEBUG_SUBPAGE)
1531 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1532 mmio, len, addr, idx);
1533 #endif
1534
1535 section = &phys_sections[mmio->sub_section[idx]];
1536 addr += mmio->base;
1537 addr -= section->offset_within_address_space;
1538 addr += section->offset_within_region;
1539 return io_mem_read(section->mr, addr, len);
1540 }
1541
1542 static void subpage_write(void *opaque, hwaddr addr,
1543 uint64_t value, unsigned len)
1544 {
1545 subpage_t *mmio = opaque;
1546 unsigned int idx = SUBPAGE_IDX(addr);
1547 MemoryRegionSection *section;
1548 #if defined(DEBUG_SUBPAGE)
1549 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1550 " idx %d value %"PRIx64"\n",
1551 __func__, mmio, len, addr, idx, value);
1552 #endif
1553
1554 section = &phys_sections[mmio->sub_section[idx]];
1555 addr += mmio->base;
1556 addr -= section->offset_within_address_space;
1557 addr += section->offset_within_region;
1558 io_mem_write(section->mr, addr, value, len);
1559 }
1560
1561 static bool subpage_accepts(void *opaque, hwaddr addr,
1562 unsigned size, bool is_write)
1563 {
1564 subpage_t *mmio = opaque;
1565 unsigned int idx = SUBPAGE_IDX(addr);
1566 MemoryRegionSection *section;
1567 #if defined(DEBUG_SUBPAGE)
1568 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
1569 " idx %d\n", __func__, mmio,
1570 is_write ? 'w' : 'r', len, addr, idx);
1571 #endif
1572
1573 section = &phys_sections[mmio->sub_section[idx]];
1574 addr += mmio->base;
1575 addr -= section->offset_within_address_space;
1576 addr += section->offset_within_region;
1577 return memory_region_access_valid(section->mr, addr, size, is_write);
1578 }
1579
1580 static const MemoryRegionOps subpage_ops = {
1581 .read = subpage_read,
1582 .write = subpage_write,
1583 .valid.accepts = subpage_accepts,
1584 .endianness = DEVICE_NATIVE_ENDIAN,
1585 };
1586
1587 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1588 unsigned size)
1589 {
1590 ram_addr_t raddr = addr;
1591 void *ptr = qemu_get_ram_ptr(raddr);
1592 switch (size) {
1593 case 1: return ldub_p(ptr);
1594 case 2: return lduw_p(ptr);
1595 case 4: return ldl_p(ptr);
1596 default: abort();
1597 }
1598 }
1599
1600 static void subpage_ram_write(void *opaque, hwaddr addr,
1601 uint64_t value, unsigned size)
1602 {
1603 ram_addr_t raddr = addr;
1604 void *ptr = qemu_get_ram_ptr(raddr);
1605 switch (size) {
1606 case 1: return stb_p(ptr, value);
1607 case 2: return stw_p(ptr, value);
1608 case 4: return stl_p(ptr, value);
1609 default: abort();
1610 }
1611 }
1612
1613 static const MemoryRegionOps subpage_ram_ops = {
1614 .read = subpage_ram_read,
1615 .write = subpage_ram_write,
1616 .endianness = DEVICE_NATIVE_ENDIAN,
1617 };
1618
1619 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1620 uint16_t section)
1621 {
1622 int idx, eidx;
1623
1624 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1625 return -1;
1626 idx = SUBPAGE_IDX(start);
1627 eidx = SUBPAGE_IDX(end);
1628 #if defined(DEBUG_SUBPAGE)
1629 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1630 mmio, start, end, idx, eidx, memory);
1631 #endif
1632 if (memory_region_is_ram(phys_sections[section].mr)) {
1633 MemoryRegionSection new_section = phys_sections[section];
1634 new_section.mr = &io_mem_subpage_ram;
1635 section = phys_section_add(&new_section);
1636 }
1637 for (; idx <= eidx; idx++) {
1638 mmio->sub_section[idx] = section;
1639 }
1640
1641 return 0;
1642 }
1643
1644 static subpage_t *subpage_init(hwaddr base)
1645 {
1646 subpage_t *mmio;
1647
1648 mmio = g_malloc0(sizeof(subpage_t));
1649
1650 mmio->base = base;
1651 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1652 "subpage", TARGET_PAGE_SIZE);
1653 mmio->iomem.subpage = true;
1654 #if defined(DEBUG_SUBPAGE)
1655 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1656 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1657 #endif
1658 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1659
1660 return mmio;
1661 }
1662
1663 static uint16_t dummy_section(MemoryRegion *mr)
1664 {
1665 MemoryRegionSection section = {
1666 .mr = mr,
1667 .offset_within_address_space = 0,
1668 .offset_within_region = 0,
1669 .size = UINT64_MAX,
1670 };
1671
1672 return phys_section_add(&section);
1673 }
1674
1675 MemoryRegion *iotlb_to_region(hwaddr index)
1676 {
1677 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1678 }
1679
1680 static void io_mem_init(void)
1681 {
1682 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1683 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1684 "unassigned", UINT64_MAX);
1685 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1686 "notdirty", UINT64_MAX);
1687 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1688 "subpage-ram", UINT64_MAX);
1689 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1690 "watch", UINT64_MAX);
1691 }
1692
1693 static void mem_begin(MemoryListener *listener)
1694 {
1695 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1696
1697 destroy_all_mappings(d);
1698 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1699 }
1700
1701 static void core_begin(MemoryListener *listener)
1702 {
1703 phys_sections_clear();
1704 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1705 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1706 phys_section_rom = dummy_section(&io_mem_rom);
1707 phys_section_watch = dummy_section(&io_mem_watch);
1708 }
1709
1710 static void tcg_commit(MemoryListener *listener)
1711 {
1712 CPUArchState *env;
1713
1714 /* since each CPU stores ram addresses in its TLB cache, we must
1715 reset the modified entries */
1716 /* XXX: slow ! */
1717 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1718 tlb_flush(env, 1);
1719 }
1720 }
1721
1722 static void core_log_global_start(MemoryListener *listener)
1723 {
1724 cpu_physical_memory_set_dirty_tracking(1);
1725 }
1726
1727 static void core_log_global_stop(MemoryListener *listener)
1728 {
1729 cpu_physical_memory_set_dirty_tracking(0);
1730 }
1731
1732 static void io_region_add(MemoryListener *listener,
1733 MemoryRegionSection *section)
1734 {
1735 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1736
1737 mrio->mr = section->mr;
1738 mrio->offset = section->offset_within_region;
1739 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1740 section->offset_within_address_space, section->size);
1741 ioport_register(&mrio->iorange);
1742 }
1743
1744 static void io_region_del(MemoryListener *listener,
1745 MemoryRegionSection *section)
1746 {
1747 isa_unassign_ioport(section->offset_within_address_space, section->size);
1748 }
1749
1750 static MemoryListener core_memory_listener = {
1751 .begin = core_begin,
1752 .log_global_start = core_log_global_start,
1753 .log_global_stop = core_log_global_stop,
1754 .priority = 1,
1755 };
1756
1757 static MemoryListener io_memory_listener = {
1758 .region_add = io_region_add,
1759 .region_del = io_region_del,
1760 .priority = 0,
1761 };
1762
1763 static MemoryListener tcg_memory_listener = {
1764 .commit = tcg_commit,
1765 };
1766
1767 void address_space_init_dispatch(AddressSpace *as)
1768 {
1769 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1770
1771 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1772 d->listener = (MemoryListener) {
1773 .begin = mem_begin,
1774 .region_add = mem_add,
1775 .region_nop = mem_add,
1776 .priority = 0,
1777 };
1778 as->dispatch = d;
1779 memory_listener_register(&d->listener, as);
1780 }
1781
1782 void address_space_destroy_dispatch(AddressSpace *as)
1783 {
1784 AddressSpaceDispatch *d = as->dispatch;
1785
1786 memory_listener_unregister(&d->listener);
1787 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1788 g_free(d);
1789 as->dispatch = NULL;
1790 }
1791
1792 static void memory_map_init(void)
1793 {
1794 system_memory = g_malloc(sizeof(*system_memory));
1795 memory_region_init(system_memory, "system", INT64_MAX);
1796 address_space_init(&address_space_memory, system_memory);
1797 address_space_memory.name = "memory";
1798
1799 system_io = g_malloc(sizeof(*system_io));
1800 memory_region_init(system_io, "io", 65536);
1801 address_space_init(&address_space_io, system_io);
1802 address_space_io.name = "I/O";
1803
1804 memory_listener_register(&core_memory_listener, &address_space_memory);
1805 memory_listener_register(&io_memory_listener, &address_space_io);
1806 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1807
1808 dma_context_init(&dma_context_memory, &address_space_memory,
1809 NULL, NULL, NULL);
1810 }
1811
1812 MemoryRegion *get_system_memory(void)
1813 {
1814 return system_memory;
1815 }
1816
1817 MemoryRegion *get_system_io(void)
1818 {
1819 return system_io;
1820 }
1821
1822 #endif /* !defined(CONFIG_USER_ONLY) */
1823
1824 /* physical memory access (slow version, mainly for debug) */
1825 #if defined(CONFIG_USER_ONLY)
1826 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1827 uint8_t *buf, int len, int is_write)
1828 {
1829 int l, flags;
1830 target_ulong page;
1831 void * p;
1832
1833 while (len > 0) {
1834 page = addr & TARGET_PAGE_MASK;
1835 l = (page + TARGET_PAGE_SIZE) - addr;
1836 if (l > len)
1837 l = len;
1838 flags = page_get_flags(page);
1839 if (!(flags & PAGE_VALID))
1840 return -1;
1841 if (is_write) {
1842 if (!(flags & PAGE_WRITE))
1843 return -1;
1844 /* XXX: this code should not depend on lock_user */
1845 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1846 return -1;
1847 memcpy(p, buf, l);
1848 unlock_user(p, addr, l);
1849 } else {
1850 if (!(flags & PAGE_READ))
1851 return -1;
1852 /* XXX: this code should not depend on lock_user */
1853 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1854 return -1;
1855 memcpy(buf, p, l);
1856 unlock_user(p, addr, 0);
1857 }
1858 len -= l;
1859 buf += l;
1860 addr += l;
1861 }
1862 return 0;
1863 }
1864
1865 #else
1866
1867 static void invalidate_and_set_dirty(hwaddr addr,
1868 hwaddr length)
1869 {
1870 if (!cpu_physical_memory_is_dirty(addr)) {
1871 /* invalidate code */
1872 tb_invalidate_phys_page_range(addr, addr + length, 0);
1873 /* set dirty bit */
1874 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1875 }
1876 xen_modified_memory(addr, length);
1877 }
1878
1879 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1880 {
1881 if (memory_region_is_ram(mr)) {
1882 return !(is_write && mr->readonly);
1883 }
1884 if (memory_region_is_romd(mr)) {
1885 return !is_write;
1886 }
1887
1888 return false;
1889 }
1890
1891 static inline int memory_access_size(int l, hwaddr addr)
1892 {
1893 if (l >= 4 && ((addr & 3) == 0)) {
1894 return 4;
1895 }
1896 if (l >= 2 && ((addr & 1) == 0)) {
1897 return 2;
1898 }
1899 return 1;
1900 }
1901
1902 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1903 int len, bool is_write)
1904 {
1905 hwaddr l;
1906 uint8_t *ptr;
1907 uint32_t val;
1908 hwaddr addr1;
1909 MemoryRegionSection *section;
1910
1911 while (len > 0) {
1912 l = len;
1913 section = address_space_translate(as, addr, &addr1, &l, is_write);
1914
1915 if (is_write) {
1916 if (!memory_access_is_direct(section->mr, is_write)) {
1917 l = memory_access_size(l, addr1);
1918 /* XXX: could force cpu_single_env to NULL to avoid
1919 potential bugs */
1920 if (l == 4) {
1921 /* 32 bit write access */
1922 val = ldl_p(buf);
1923 io_mem_write(section->mr, addr1, val, 4);
1924 } else if (l == 2) {
1925 /* 16 bit write access */
1926 val = lduw_p(buf);
1927 io_mem_write(section->mr, addr1, val, 2);
1928 } else {
1929 /* 8 bit write access */
1930 val = ldub_p(buf);
1931 io_mem_write(section->mr, addr1, val, 1);
1932 }
1933 } else {
1934 addr1 += memory_region_get_ram_addr(section->mr);
1935 /* RAM case */
1936 ptr = qemu_get_ram_ptr(addr1);
1937 memcpy(ptr, buf, l);
1938 invalidate_and_set_dirty(addr1, l);
1939 }
1940 } else {
1941 if (!memory_access_is_direct(section->mr, is_write)) {
1942 /* I/O case */
1943 l = memory_access_size(l, addr1);
1944 if (l == 4) {
1945 /* 32 bit read access */
1946 val = io_mem_read(section->mr, addr1, 4);
1947 stl_p(buf, val);
1948 } else if (l == 2) {
1949 /* 16 bit read access */
1950 val = io_mem_read(section->mr, addr1, 2);
1951 stw_p(buf, val);
1952 } else {
1953 /* 8 bit read access */
1954 val = io_mem_read(section->mr, addr1, 1);
1955 stb_p(buf, val);
1956 }
1957 } else {
1958 /* RAM case */
1959 ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
1960 memcpy(buf, ptr, l);
1961 }
1962 }
1963 len -= l;
1964 buf += l;
1965 addr += l;
1966 }
1967 }
1968
1969 void address_space_write(AddressSpace *as, hwaddr addr,
1970 const uint8_t *buf, int len)
1971 {
1972 address_space_rw(as, addr, (uint8_t *)buf, len, true);
1973 }
1974
1975 /**
1976 * address_space_read: read from an address space.
1977 *
1978 * @as: #AddressSpace to be accessed
1979 * @addr: address within that address space
1980 * @buf: buffer with the data transferred
1981 */
1982 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1983 {
1984 address_space_rw(as, addr, buf, len, false);
1985 }
1986
1987
1988 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1989 int len, int is_write)
1990 {
1991 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1992 }
1993
1994 /* used for ROM loading : can write in RAM and ROM */
1995 void cpu_physical_memory_write_rom(hwaddr addr,
1996 const uint8_t *buf, int len)
1997 {
1998 hwaddr l;
1999 uint8_t *ptr;
2000 hwaddr addr1;
2001 MemoryRegionSection *section;
2002
2003 while (len > 0) {
2004 l = len;
2005 section = address_space_translate(&address_space_memory,
2006 addr, &addr1, &l, true);
2007
2008 if (!(memory_region_is_ram(section->mr) ||
2009 memory_region_is_romd(section->mr))) {
2010 /* do nothing */
2011 } else {
2012 addr1 += memory_region_get_ram_addr(section->mr);
2013 /* ROM/RAM case */
2014 ptr = qemu_get_ram_ptr(addr1);
2015 memcpy(ptr, buf, l);
2016 invalidate_and_set_dirty(addr1, l);
2017 }
2018 len -= l;
2019 buf += l;
2020 addr += l;
2021 }
2022 }
2023
2024 typedef struct {
2025 void *buffer;
2026 hwaddr addr;
2027 hwaddr len;
2028 } BounceBuffer;
2029
2030 static BounceBuffer bounce;
2031
2032 typedef struct MapClient {
2033 void *opaque;
2034 void (*callback)(void *opaque);
2035 QLIST_ENTRY(MapClient) link;
2036 } MapClient;
2037
2038 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2039 = QLIST_HEAD_INITIALIZER(map_client_list);
2040
2041 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2042 {
2043 MapClient *client = g_malloc(sizeof(*client));
2044
2045 client->opaque = opaque;
2046 client->callback = callback;
2047 QLIST_INSERT_HEAD(&map_client_list, client, link);
2048 return client;
2049 }
2050
2051 static void cpu_unregister_map_client(void *_client)
2052 {
2053 MapClient *client = (MapClient *)_client;
2054
2055 QLIST_REMOVE(client, link);
2056 g_free(client);
2057 }
2058
2059 static void cpu_notify_map_clients(void)
2060 {
2061 MapClient *client;
2062
2063 while (!QLIST_EMPTY(&map_client_list)) {
2064 client = QLIST_FIRST(&map_client_list);
2065 client->callback(client->opaque);
2066 cpu_unregister_map_client(client);
2067 }
2068 }
2069
2070 /* Map a physical memory region into a host virtual address.
2071 * May map a subset of the requested range, given by and returned in *plen.
2072 * May return NULL if resources needed to perform the mapping are exhausted.
2073 * Use only for reads OR writes - not for read-modify-write operations.
2074 * Use cpu_register_map_client() to know when retrying the map operation is
2075 * likely to succeed.
2076 */
2077 void *address_space_map(AddressSpace *as,
2078 hwaddr addr,
2079 hwaddr *plen,
2080 bool is_write)
2081 {
2082 hwaddr len = *plen;
2083 hwaddr todo = 0;
2084 hwaddr l, xlat;
2085 MemoryRegionSection *section;
2086 ram_addr_t raddr = RAM_ADDR_MAX;
2087 ram_addr_t rlen;
2088 void *ret;
2089
2090 while (len > 0) {
2091 l = len;
2092 section = address_space_translate(as, addr, &xlat, &l, is_write);
2093
2094 if (!memory_access_is_direct(section->mr, is_write)) {
2095 if (todo || bounce.buffer) {
2096 break;
2097 }
2098 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2099 bounce.addr = addr;
2100 bounce.len = l;
2101 if (!is_write) {
2102 address_space_read(as, addr, bounce.buffer, l);
2103 }
2104
2105 *plen = l;
2106 return bounce.buffer;
2107 }
2108 if (!todo) {
2109 raddr = memory_region_get_ram_addr(section->mr) + xlat;
2110 } else {
2111 if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2112 break;
2113 }
2114 }
2115
2116 len -= l;
2117 addr += l;
2118 todo += l;
2119 }
2120 rlen = todo;
2121 ret = qemu_ram_ptr_length(raddr, &rlen);
2122 *plen = rlen;
2123 return ret;
2124 }
2125
2126 /* Unmaps a memory region previously mapped by address_space_map().
2127 * Will also mark the memory as dirty if is_write == 1. access_len gives
2128 * the amount of memory that was actually read or written by the caller.
2129 */
2130 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2131 int is_write, hwaddr access_len)
2132 {
2133 if (buffer != bounce.buffer) {
2134 if (is_write) {
2135 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2136 while (access_len) {
2137 unsigned l;
2138 l = TARGET_PAGE_SIZE;
2139 if (l > access_len)
2140 l = access_len;
2141 invalidate_and_set_dirty(addr1, l);
2142 addr1 += l;
2143 access_len -= l;
2144 }
2145 }
2146 if (xen_enabled()) {
2147 xen_invalidate_map_cache_entry(buffer);
2148 }
2149 return;
2150 }
2151 if (is_write) {
2152 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2153 }
2154 qemu_vfree(bounce.buffer);
2155 bounce.buffer = NULL;
2156 cpu_notify_map_clients();
2157 }
2158
2159 void *cpu_physical_memory_map(hwaddr addr,
2160 hwaddr *plen,
2161 int is_write)
2162 {
2163 return address_space_map(&address_space_memory, addr, plen, is_write);
2164 }
2165
2166 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2167 int is_write, hwaddr access_len)
2168 {
2169 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2170 }
2171
2172 /* warning: addr must be aligned */
2173 static inline uint32_t ldl_phys_internal(hwaddr addr,
2174 enum device_endian endian)
2175 {
2176 uint8_t *ptr;
2177 uint32_t val;
2178 MemoryRegionSection *section;
2179 hwaddr l = 4;
2180 hwaddr addr1;
2181
2182 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2183 false);
2184 if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2185 /* I/O case */
2186 val = io_mem_read(section->mr, addr1, 4);
2187 #if defined(TARGET_WORDS_BIGENDIAN)
2188 if (endian == DEVICE_LITTLE_ENDIAN) {
2189 val = bswap32(val);
2190 }
2191 #else
2192 if (endian == DEVICE_BIG_ENDIAN) {
2193 val = bswap32(val);
2194 }
2195 #endif
2196 } else {
2197 /* RAM case */
2198 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2199 & TARGET_PAGE_MASK)
2200 + addr1);
2201 switch (endian) {
2202 case DEVICE_LITTLE_ENDIAN:
2203 val = ldl_le_p(ptr);
2204 break;
2205 case DEVICE_BIG_ENDIAN:
2206 val = ldl_be_p(ptr);
2207 break;
2208 default:
2209 val = ldl_p(ptr);
2210 break;
2211 }
2212 }
2213 return val;
2214 }
2215
2216 uint32_t ldl_phys(hwaddr addr)
2217 {
2218 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2219 }
2220
2221 uint32_t ldl_le_phys(hwaddr addr)
2222 {
2223 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2224 }
2225
2226 uint32_t ldl_be_phys(hwaddr addr)
2227 {
2228 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2229 }
2230
2231 /* warning: addr must be aligned */
2232 static inline uint64_t ldq_phys_internal(hwaddr addr,
2233 enum device_endian endian)
2234 {
2235 uint8_t *ptr;
2236 uint64_t val;
2237 MemoryRegionSection *section;
2238 hwaddr l = 8;
2239 hwaddr addr1;
2240
2241 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2242 false);
2243 if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2244 /* I/O case */
2245
2246 /* XXX This is broken when device endian != cpu endian.
2247 Fix and add "endian" variable check */
2248 #ifdef TARGET_WORDS_BIGENDIAN
2249 val = io_mem_read(section->mr, addr1, 4) << 32;
2250 val |= io_mem_read(section->mr, addr1 + 4, 4);
2251 #else
2252 val = io_mem_read(section->mr, addr1, 4);
2253 val |= io_mem_read(section->mr, addr1 + 4, 4) << 32;
2254 #endif
2255 } else {
2256 /* RAM case */
2257 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2258 & TARGET_PAGE_MASK)
2259 + addr1);
2260 switch (endian) {
2261 case DEVICE_LITTLE_ENDIAN:
2262 val = ldq_le_p(ptr);
2263 break;
2264 case DEVICE_BIG_ENDIAN:
2265 val = ldq_be_p(ptr);
2266 break;
2267 default:
2268 val = ldq_p(ptr);
2269 break;
2270 }
2271 }
2272 return val;
2273 }
2274
2275 uint64_t ldq_phys(hwaddr addr)
2276 {
2277 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2278 }
2279
2280 uint64_t ldq_le_phys(hwaddr addr)
2281 {
2282 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2283 }
2284
2285 uint64_t ldq_be_phys(hwaddr addr)
2286 {
2287 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2288 }
2289
2290 /* XXX: optimize */
2291 uint32_t ldub_phys(hwaddr addr)
2292 {
2293 uint8_t val;
2294 cpu_physical_memory_read(addr, &val, 1);
2295 return val;
2296 }
2297
2298 /* warning: addr must be aligned */
2299 static inline uint32_t lduw_phys_internal(hwaddr addr,
2300 enum device_endian endian)
2301 {
2302 uint8_t *ptr;
2303 uint64_t val;
2304 MemoryRegionSection *section;
2305 hwaddr l = 2;
2306 hwaddr addr1;
2307
2308 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2309 false);
2310 if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2311 /* I/O case */
2312 val = io_mem_read(section->mr, addr1, 2);
2313 #if defined(TARGET_WORDS_BIGENDIAN)
2314 if (endian == DEVICE_LITTLE_ENDIAN) {
2315 val = bswap16(val);
2316 }
2317 #else
2318 if (endian == DEVICE_BIG_ENDIAN) {
2319 val = bswap16(val);
2320 }
2321 #endif
2322 } else {
2323 /* RAM case */
2324 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2325 & TARGET_PAGE_MASK)
2326 + addr1);
2327 switch (endian) {
2328 case DEVICE_LITTLE_ENDIAN:
2329 val = lduw_le_p(ptr);
2330 break;
2331 case DEVICE_BIG_ENDIAN:
2332 val = lduw_be_p(ptr);
2333 break;
2334 default:
2335 val = lduw_p(ptr);
2336 break;
2337 }
2338 }
2339 return val;
2340 }
2341
2342 uint32_t lduw_phys(hwaddr addr)
2343 {
2344 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2345 }
2346
2347 uint32_t lduw_le_phys(hwaddr addr)
2348 {
2349 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2350 }
2351
2352 uint32_t lduw_be_phys(hwaddr addr)
2353 {
2354 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2355 }
2356
2357 /* warning: addr must be aligned. The ram page is not masked as dirty
2358 and the code inside is not invalidated. It is useful if the dirty
2359 bits are used to track modified PTEs */
2360 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2361 {
2362 uint8_t *ptr;
2363 MemoryRegionSection *section;
2364 hwaddr l = 4;
2365 hwaddr addr1;
2366
2367 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2368 true);
2369 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2370 io_mem_write(section->mr, addr1, val, 4);
2371 } else {
2372 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2373 ptr = qemu_get_ram_ptr(addr1);
2374 stl_p(ptr, val);
2375
2376 if (unlikely(in_migration)) {
2377 if (!cpu_physical_memory_is_dirty(addr1)) {
2378 /* invalidate code */
2379 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2380 /* set dirty bit */
2381 cpu_physical_memory_set_dirty_flags(
2382 addr1, (0xff & ~CODE_DIRTY_FLAG));
2383 }
2384 }
2385 }
2386 }
2387
2388 /* warning: addr must be aligned */
2389 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2390 enum device_endian endian)
2391 {
2392 uint8_t *ptr;
2393 MemoryRegionSection *section;
2394 hwaddr l = 4;
2395 hwaddr addr1;
2396
2397 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2398 true);
2399 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2400 #if defined(TARGET_WORDS_BIGENDIAN)
2401 if (endian == DEVICE_LITTLE_ENDIAN) {
2402 val = bswap32(val);
2403 }
2404 #else
2405 if (endian == DEVICE_BIG_ENDIAN) {
2406 val = bswap32(val);
2407 }
2408 #endif
2409 io_mem_write(section->mr, addr1, val, 4);
2410 } else {
2411 /* RAM case */
2412 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2413 ptr = qemu_get_ram_ptr(addr1);
2414 switch (endian) {
2415 case DEVICE_LITTLE_ENDIAN:
2416 stl_le_p(ptr, val);
2417 break;
2418 case DEVICE_BIG_ENDIAN:
2419 stl_be_p(ptr, val);
2420 break;
2421 default:
2422 stl_p(ptr, val);
2423 break;
2424 }
2425 invalidate_and_set_dirty(addr1, 4);
2426 }
2427 }
2428
2429 void stl_phys(hwaddr addr, uint32_t val)
2430 {
2431 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2432 }
2433
2434 void stl_le_phys(hwaddr addr, uint32_t val)
2435 {
2436 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2437 }
2438
2439 void stl_be_phys(hwaddr addr, uint32_t val)
2440 {
2441 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2442 }
2443
2444 /* XXX: optimize */
2445 void stb_phys(hwaddr addr, uint32_t val)
2446 {
2447 uint8_t v = val;
2448 cpu_physical_memory_write(addr, &v, 1);
2449 }
2450
2451 /* warning: addr must be aligned */
2452 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2453 enum device_endian endian)
2454 {
2455 uint8_t *ptr;
2456 MemoryRegionSection *section;
2457 hwaddr l = 2;
2458 hwaddr addr1;
2459
2460 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2461 true);
2462 if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2463 #if defined(TARGET_WORDS_BIGENDIAN)
2464 if (endian == DEVICE_LITTLE_ENDIAN) {
2465 val = bswap16(val);
2466 }
2467 #else
2468 if (endian == DEVICE_BIG_ENDIAN) {
2469 val = bswap16(val);
2470 }
2471 #endif
2472 io_mem_write(section->mr, addr1, val, 2);
2473 } else {
2474 /* RAM case */
2475 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2476 ptr = qemu_get_ram_ptr(addr1);
2477 switch (endian) {
2478 case DEVICE_LITTLE_ENDIAN:
2479 stw_le_p(ptr, val);
2480 break;
2481 case DEVICE_BIG_ENDIAN:
2482 stw_be_p(ptr, val);
2483 break;
2484 default:
2485 stw_p(ptr, val);
2486 break;
2487 }
2488 invalidate_and_set_dirty(addr1, 2);
2489 }
2490 }
2491
2492 void stw_phys(hwaddr addr, uint32_t val)
2493 {
2494 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2495 }
2496
2497 void stw_le_phys(hwaddr addr, uint32_t val)
2498 {
2499 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2500 }
2501
2502 void stw_be_phys(hwaddr addr, uint32_t val)
2503 {
2504 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2505 }
2506
2507 /* XXX: optimize */
2508 void stq_phys(hwaddr addr, uint64_t val)
2509 {
2510 val = tswap64(val);
2511 cpu_physical_memory_write(addr, &val, 8);
2512 }
2513
2514 void stq_le_phys(hwaddr addr, uint64_t val)
2515 {
2516 val = cpu_to_le64(val);
2517 cpu_physical_memory_write(addr, &val, 8);
2518 }
2519
2520 void stq_be_phys(hwaddr addr, uint64_t val)
2521 {
2522 val = cpu_to_be64(val);
2523 cpu_physical_memory_write(addr, &val, 8);
2524 }
2525
2526 /* virtual memory access for debug (includes writing to ROM) */
2527 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2528 uint8_t *buf, int len, int is_write)
2529 {
2530 int l;
2531 hwaddr phys_addr;
2532 target_ulong page;
2533
2534 while (len > 0) {
2535 page = addr & TARGET_PAGE_MASK;
2536 phys_addr = cpu_get_phys_page_debug(env, page);
2537 /* if no physical page mapped, return an error */
2538 if (phys_addr == -1)
2539 return -1;
2540 l = (page + TARGET_PAGE_SIZE) - addr;
2541 if (l > len)
2542 l = len;
2543 phys_addr += (addr & ~TARGET_PAGE_MASK);
2544 if (is_write)
2545 cpu_physical_memory_write_rom(phys_addr, buf, l);
2546 else
2547 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2548 len -= l;
2549 buf += l;
2550 addr += l;
2551 }
2552 return 0;
2553 }
2554 #endif
2555
2556 #if !defined(CONFIG_USER_ONLY)
2557
2558 /*
2559 * A helper function for the _utterly broken_ virtio device model to find out if
2560 * it's running on a big endian machine. Don't do this at home kids!
2561 */
2562 bool virtio_is_big_endian(void);
2563 bool virtio_is_big_endian(void)
2564 {
2565 #if defined(TARGET_WORDS_BIGENDIAN)
2566 return true;
2567 #else
2568 return false;
2569 #endif
2570 }
2571
2572 #endif
2573
2574 #ifndef CONFIG_USER_ONLY
2575 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2576 {
2577 MemoryRegionSection *section;
2578 hwaddr l = 1;
2579
2580 section = address_space_translate(&address_space_memory,
2581 phys_addr, &phys_addr, &l, false);
2582
2583 return !(memory_region_is_ram(section->mr) ||
2584 memory_region_is_romd(section->mr));
2585 }
2586 #endif