]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20150218' into staging
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47 #include "qemu/rcu_queue.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53
54 #include "qemu/range.h"
55
56 //#define DEBUG_SUBPAGE
57
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
60
61 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
62 * are protected by the ramlist lock.
63 */
64 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
65
66 static MemoryRegion *system_memory;
67 static MemoryRegion *system_io;
68
69 AddressSpace address_space_io;
70 AddressSpace address_space_memory;
71
72 MemoryRegion io_mem_rom, io_mem_notdirty;
73 static MemoryRegion io_mem_unassigned;
74
75 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
76 #define RAM_PREALLOC (1 << 0)
77
78 /* RAM is mmap-ed with MAP_SHARED */
79 #define RAM_SHARED (1 << 1)
80
81 /* Only a portion of RAM (used_length) is actually used, and migrated.
82 * This used_length size can change across reboots.
83 */
84 #define RAM_RESIZEABLE (1 << 2)
85
86 #endif
87
88 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
89 /* current CPU in the current thread. It is only valid inside
90 cpu_exec() */
91 DEFINE_TLS(CPUState *, current_cpu);
92 /* 0 = Do not count executed instructions.
93 1 = Precise instruction counting.
94 2 = Adaptive rate instruction counting. */
95 int use_icount;
96
97 #if !defined(CONFIG_USER_ONLY)
98
99 typedef struct PhysPageEntry PhysPageEntry;
100
101 struct PhysPageEntry {
102 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
103 uint32_t skip : 6;
104 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
105 uint32_t ptr : 26;
106 };
107
108 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
109
110 /* Size of the L2 (and L3, etc) page tables. */
111 #define ADDR_SPACE_BITS 64
112
113 #define P_L2_BITS 9
114 #define P_L2_SIZE (1 << P_L2_BITS)
115
116 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
117
118 typedef PhysPageEntry Node[P_L2_SIZE];
119
120 typedef struct PhysPageMap {
121 struct rcu_head rcu;
122
123 unsigned sections_nb;
124 unsigned sections_nb_alloc;
125 unsigned nodes_nb;
126 unsigned nodes_nb_alloc;
127 Node *nodes;
128 MemoryRegionSection *sections;
129 } PhysPageMap;
130
131 struct AddressSpaceDispatch {
132 struct rcu_head rcu;
133
134 /* This is a multi-level map on the physical address space.
135 * The bottom level has pointers to MemoryRegionSections.
136 */
137 PhysPageEntry phys_map;
138 PhysPageMap map;
139 AddressSpace *as;
140 };
141
142 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
143 typedef struct subpage_t {
144 MemoryRegion iomem;
145 AddressSpace *as;
146 hwaddr base;
147 uint16_t sub_section[TARGET_PAGE_SIZE];
148 } subpage_t;
149
150 #define PHYS_SECTION_UNASSIGNED 0
151 #define PHYS_SECTION_NOTDIRTY 1
152 #define PHYS_SECTION_ROM 2
153 #define PHYS_SECTION_WATCH 3
154
155 static void io_mem_init(void);
156 static void memory_map_init(void);
157 static void tcg_commit(MemoryListener *listener);
158
159 static MemoryRegion io_mem_watch;
160 #endif
161
162 #if !defined(CONFIG_USER_ONLY)
163
164 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
165 {
166 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
167 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
168 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
169 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
170 }
171 }
172
173 static uint32_t phys_map_node_alloc(PhysPageMap *map)
174 {
175 unsigned i;
176 uint32_t ret;
177
178 ret = map->nodes_nb++;
179 assert(ret != PHYS_MAP_NODE_NIL);
180 assert(ret != map->nodes_nb_alloc);
181 for (i = 0; i < P_L2_SIZE; ++i) {
182 map->nodes[ret][i].skip = 1;
183 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
184 }
185 return ret;
186 }
187
188 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
189 hwaddr *index, hwaddr *nb, uint16_t leaf,
190 int level)
191 {
192 PhysPageEntry *p;
193 int i;
194 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
195
196 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
197 lp->ptr = phys_map_node_alloc(map);
198 p = map->nodes[lp->ptr];
199 if (level == 0) {
200 for (i = 0; i < P_L2_SIZE; i++) {
201 p[i].skip = 0;
202 p[i].ptr = PHYS_SECTION_UNASSIGNED;
203 }
204 }
205 } else {
206 p = map->nodes[lp->ptr];
207 }
208 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
209
210 while (*nb && lp < &p[P_L2_SIZE]) {
211 if ((*index & (step - 1)) == 0 && *nb >= step) {
212 lp->skip = 0;
213 lp->ptr = leaf;
214 *index += step;
215 *nb -= step;
216 } else {
217 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
218 }
219 ++lp;
220 }
221 }
222
223 static void phys_page_set(AddressSpaceDispatch *d,
224 hwaddr index, hwaddr nb,
225 uint16_t leaf)
226 {
227 /* Wildly overreserve - it doesn't matter much. */
228 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
229
230 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
231 }
232
233 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
234 * and update our entry so we can skip it and go directly to the destination.
235 */
236 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
237 {
238 unsigned valid_ptr = P_L2_SIZE;
239 int valid = 0;
240 PhysPageEntry *p;
241 int i;
242
243 if (lp->ptr == PHYS_MAP_NODE_NIL) {
244 return;
245 }
246
247 p = nodes[lp->ptr];
248 for (i = 0; i < P_L2_SIZE; i++) {
249 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
250 continue;
251 }
252
253 valid_ptr = i;
254 valid++;
255 if (p[i].skip) {
256 phys_page_compact(&p[i], nodes, compacted);
257 }
258 }
259
260 /* We can only compress if there's only one child. */
261 if (valid != 1) {
262 return;
263 }
264
265 assert(valid_ptr < P_L2_SIZE);
266
267 /* Don't compress if it won't fit in the # of bits we have. */
268 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
269 return;
270 }
271
272 lp->ptr = p[valid_ptr].ptr;
273 if (!p[valid_ptr].skip) {
274 /* If our only child is a leaf, make this a leaf. */
275 /* By design, we should have made this node a leaf to begin with so we
276 * should never reach here.
277 * But since it's so simple to handle this, let's do it just in case we
278 * change this rule.
279 */
280 lp->skip = 0;
281 } else {
282 lp->skip += p[valid_ptr].skip;
283 }
284 }
285
286 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
287 {
288 DECLARE_BITMAP(compacted, nodes_nb);
289
290 if (d->phys_map.skip) {
291 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
292 }
293 }
294
295 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
296 Node *nodes, MemoryRegionSection *sections)
297 {
298 PhysPageEntry *p;
299 hwaddr index = addr >> TARGET_PAGE_BITS;
300 int i;
301
302 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
303 if (lp.ptr == PHYS_MAP_NODE_NIL) {
304 return &sections[PHYS_SECTION_UNASSIGNED];
305 }
306 p = nodes[lp.ptr];
307 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
308 }
309
310 if (sections[lp.ptr].size.hi ||
311 range_covers_byte(sections[lp.ptr].offset_within_address_space,
312 sections[lp.ptr].size.lo, addr)) {
313 return &sections[lp.ptr];
314 } else {
315 return &sections[PHYS_SECTION_UNASSIGNED];
316 }
317 }
318
319 bool memory_region_is_unassigned(MemoryRegion *mr)
320 {
321 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
322 && mr != &io_mem_watch;
323 }
324
325 /* Called from RCU critical section */
326 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
327 hwaddr addr,
328 bool resolve_subpage)
329 {
330 MemoryRegionSection *section;
331 subpage_t *subpage;
332
333 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
334 if (resolve_subpage && section->mr->subpage) {
335 subpage = container_of(section->mr, subpage_t, iomem);
336 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
337 }
338 return section;
339 }
340
341 /* Called from RCU critical section */
342 static MemoryRegionSection *
343 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
344 hwaddr *plen, bool resolve_subpage)
345 {
346 MemoryRegionSection *section;
347 Int128 diff;
348
349 section = address_space_lookup_region(d, addr, resolve_subpage);
350 /* Compute offset within MemoryRegionSection */
351 addr -= section->offset_within_address_space;
352
353 /* Compute offset within MemoryRegion */
354 *xlat = addr + section->offset_within_region;
355
356 diff = int128_sub(section->mr->size, int128_make64(addr));
357 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
358 return section;
359 }
360
361 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
362 {
363 if (memory_region_is_ram(mr)) {
364 return !(is_write && mr->readonly);
365 }
366 if (memory_region_is_romd(mr)) {
367 return !is_write;
368 }
369
370 return false;
371 }
372
373 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
374 hwaddr *xlat, hwaddr *plen,
375 bool is_write)
376 {
377 IOMMUTLBEntry iotlb;
378 MemoryRegionSection *section;
379 MemoryRegion *mr;
380 hwaddr len = *plen;
381
382 rcu_read_lock();
383 for (;;) {
384 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
385 section = address_space_translate_internal(d, addr, &addr, plen, true);
386 mr = section->mr;
387
388 if (!mr->iommu_ops) {
389 break;
390 }
391
392 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
393 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
394 | (addr & iotlb.addr_mask));
395 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
396 if (!(iotlb.perm & (1 << is_write))) {
397 mr = &io_mem_unassigned;
398 break;
399 }
400
401 as = iotlb.target_as;
402 }
403
404 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
405 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
406 len = MIN(page, len);
407 }
408
409 *plen = len;
410 *xlat = addr;
411 rcu_read_unlock();
412 return mr;
413 }
414
415 /* Called from RCU critical section */
416 MemoryRegionSection *
417 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
418 hwaddr *xlat, hwaddr *plen)
419 {
420 MemoryRegionSection *section;
421 section = address_space_translate_internal(cpu->memory_dispatch,
422 addr, xlat, plen, false);
423
424 assert(!section->mr->iommu_ops);
425 return section;
426 }
427 #endif
428
429 void cpu_exec_init_all(void)
430 {
431 #if !defined(CONFIG_USER_ONLY)
432 qemu_mutex_init(&ram_list.mutex);
433 memory_map_init();
434 io_mem_init();
435 #endif
436 }
437
438 #if !defined(CONFIG_USER_ONLY)
439
440 static int cpu_common_post_load(void *opaque, int version_id)
441 {
442 CPUState *cpu = opaque;
443
444 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
445 version_id is increased. */
446 cpu->interrupt_request &= ~0x01;
447 tlb_flush(cpu, 1);
448
449 return 0;
450 }
451
452 static int cpu_common_pre_load(void *opaque)
453 {
454 CPUState *cpu = opaque;
455
456 cpu->exception_index = -1;
457
458 return 0;
459 }
460
461 static bool cpu_common_exception_index_needed(void *opaque)
462 {
463 CPUState *cpu = opaque;
464
465 return tcg_enabled() && cpu->exception_index != -1;
466 }
467
468 static const VMStateDescription vmstate_cpu_common_exception_index = {
469 .name = "cpu_common/exception_index",
470 .version_id = 1,
471 .minimum_version_id = 1,
472 .fields = (VMStateField[]) {
473 VMSTATE_INT32(exception_index, CPUState),
474 VMSTATE_END_OF_LIST()
475 }
476 };
477
478 const VMStateDescription vmstate_cpu_common = {
479 .name = "cpu_common",
480 .version_id = 1,
481 .minimum_version_id = 1,
482 .pre_load = cpu_common_pre_load,
483 .post_load = cpu_common_post_load,
484 .fields = (VMStateField[]) {
485 VMSTATE_UINT32(halted, CPUState),
486 VMSTATE_UINT32(interrupt_request, CPUState),
487 VMSTATE_END_OF_LIST()
488 },
489 .subsections = (VMStateSubsection[]) {
490 {
491 .vmsd = &vmstate_cpu_common_exception_index,
492 .needed = cpu_common_exception_index_needed,
493 } , {
494 /* empty */
495 }
496 }
497 };
498
499 #endif
500
501 CPUState *qemu_get_cpu(int index)
502 {
503 CPUState *cpu;
504
505 CPU_FOREACH(cpu) {
506 if (cpu->cpu_index == index) {
507 return cpu;
508 }
509 }
510
511 return NULL;
512 }
513
514 #if !defined(CONFIG_USER_ONLY)
515 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
516 {
517 /* We only support one address space per cpu at the moment. */
518 assert(cpu->as == as);
519
520 if (cpu->tcg_as_listener) {
521 memory_listener_unregister(cpu->tcg_as_listener);
522 } else {
523 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
524 }
525 cpu->tcg_as_listener->commit = tcg_commit;
526 memory_listener_register(cpu->tcg_as_listener, as);
527 }
528 #endif
529
530 void cpu_exec_init(CPUArchState *env)
531 {
532 CPUState *cpu = ENV_GET_CPU(env);
533 CPUClass *cc = CPU_GET_CLASS(cpu);
534 CPUState *some_cpu;
535 int cpu_index;
536
537 #if defined(CONFIG_USER_ONLY)
538 cpu_list_lock();
539 #endif
540 cpu_index = 0;
541 CPU_FOREACH(some_cpu) {
542 cpu_index++;
543 }
544 cpu->cpu_index = cpu_index;
545 cpu->numa_node = 0;
546 QTAILQ_INIT(&cpu->breakpoints);
547 QTAILQ_INIT(&cpu->watchpoints);
548 #ifndef CONFIG_USER_ONLY
549 cpu->as = &address_space_memory;
550 cpu->thread_id = qemu_get_thread_id();
551 #endif
552 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
553 #if defined(CONFIG_USER_ONLY)
554 cpu_list_unlock();
555 #endif
556 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
557 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
558 }
559 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
560 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
561 cpu_save, cpu_load, env);
562 assert(cc->vmsd == NULL);
563 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
564 #endif
565 if (cc->vmsd != NULL) {
566 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
567 }
568 }
569
570 #if defined(CONFIG_USER_ONLY)
571 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
572 {
573 tb_invalidate_phys_page_range(pc, pc + 1, 0);
574 }
575 #else
576 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
577 {
578 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
579 if (phys != -1) {
580 tb_invalidate_phys_addr(cpu->as,
581 phys | (pc & ~TARGET_PAGE_MASK));
582 }
583 }
584 #endif
585
586 #if defined(CONFIG_USER_ONLY)
587 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
588
589 {
590 }
591
592 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
593 int flags)
594 {
595 return -ENOSYS;
596 }
597
598 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
599 {
600 }
601
602 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
603 int flags, CPUWatchpoint **watchpoint)
604 {
605 return -ENOSYS;
606 }
607 #else
608 /* Add a watchpoint. */
609 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
610 int flags, CPUWatchpoint **watchpoint)
611 {
612 CPUWatchpoint *wp;
613
614 /* forbid ranges which are empty or run off the end of the address space */
615 if (len == 0 || (addr + len - 1) < addr) {
616 error_report("tried to set invalid watchpoint at %"
617 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
618 return -EINVAL;
619 }
620 wp = g_malloc(sizeof(*wp));
621
622 wp->vaddr = addr;
623 wp->len = len;
624 wp->flags = flags;
625
626 /* keep all GDB-injected watchpoints in front */
627 if (flags & BP_GDB) {
628 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
629 } else {
630 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
631 }
632
633 tlb_flush_page(cpu, addr);
634
635 if (watchpoint)
636 *watchpoint = wp;
637 return 0;
638 }
639
640 /* Remove a specific watchpoint. */
641 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
642 int flags)
643 {
644 CPUWatchpoint *wp;
645
646 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
647 if (addr == wp->vaddr && len == wp->len
648 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
649 cpu_watchpoint_remove_by_ref(cpu, wp);
650 return 0;
651 }
652 }
653 return -ENOENT;
654 }
655
656 /* Remove a specific watchpoint by reference. */
657 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
658 {
659 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
660
661 tlb_flush_page(cpu, watchpoint->vaddr);
662
663 g_free(watchpoint);
664 }
665
666 /* Remove all matching watchpoints. */
667 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
668 {
669 CPUWatchpoint *wp, *next;
670
671 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
672 if (wp->flags & mask) {
673 cpu_watchpoint_remove_by_ref(cpu, wp);
674 }
675 }
676 }
677
678 /* Return true if this watchpoint address matches the specified
679 * access (ie the address range covered by the watchpoint overlaps
680 * partially or completely with the address range covered by the
681 * access).
682 */
683 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
684 vaddr addr,
685 vaddr len)
686 {
687 /* We know the lengths are non-zero, but a little caution is
688 * required to avoid errors in the case where the range ends
689 * exactly at the top of the address space and so addr + len
690 * wraps round to zero.
691 */
692 vaddr wpend = wp->vaddr + wp->len - 1;
693 vaddr addrend = addr + len - 1;
694
695 return !(addr > wpend || wp->vaddr > addrend);
696 }
697
698 #endif
699
700 /* Add a breakpoint. */
701 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
702 CPUBreakpoint **breakpoint)
703 {
704 CPUBreakpoint *bp;
705
706 bp = g_malloc(sizeof(*bp));
707
708 bp->pc = pc;
709 bp->flags = flags;
710
711 /* keep all GDB-injected breakpoints in front */
712 if (flags & BP_GDB) {
713 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
714 } else {
715 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
716 }
717
718 breakpoint_invalidate(cpu, pc);
719
720 if (breakpoint) {
721 *breakpoint = bp;
722 }
723 return 0;
724 }
725
726 /* Remove a specific breakpoint. */
727 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
728 {
729 CPUBreakpoint *bp;
730
731 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
732 if (bp->pc == pc && bp->flags == flags) {
733 cpu_breakpoint_remove_by_ref(cpu, bp);
734 return 0;
735 }
736 }
737 return -ENOENT;
738 }
739
740 /* Remove a specific breakpoint by reference. */
741 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
742 {
743 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
744
745 breakpoint_invalidate(cpu, breakpoint->pc);
746
747 g_free(breakpoint);
748 }
749
750 /* Remove all matching breakpoints. */
751 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
752 {
753 CPUBreakpoint *bp, *next;
754
755 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
756 if (bp->flags & mask) {
757 cpu_breakpoint_remove_by_ref(cpu, bp);
758 }
759 }
760 }
761
762 /* enable or disable single step mode. EXCP_DEBUG is returned by the
763 CPU loop after each instruction */
764 void cpu_single_step(CPUState *cpu, int enabled)
765 {
766 if (cpu->singlestep_enabled != enabled) {
767 cpu->singlestep_enabled = enabled;
768 if (kvm_enabled()) {
769 kvm_update_guest_debug(cpu, 0);
770 } else {
771 /* must flush all the translated code to avoid inconsistencies */
772 /* XXX: only flush what is necessary */
773 CPUArchState *env = cpu->env_ptr;
774 tb_flush(env);
775 }
776 }
777 }
778
779 void cpu_abort(CPUState *cpu, const char *fmt, ...)
780 {
781 va_list ap;
782 va_list ap2;
783
784 va_start(ap, fmt);
785 va_copy(ap2, ap);
786 fprintf(stderr, "qemu: fatal: ");
787 vfprintf(stderr, fmt, ap);
788 fprintf(stderr, "\n");
789 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
790 if (qemu_log_enabled()) {
791 qemu_log("qemu: fatal: ");
792 qemu_log_vprintf(fmt, ap2);
793 qemu_log("\n");
794 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
795 qemu_log_flush();
796 qemu_log_close();
797 }
798 va_end(ap2);
799 va_end(ap);
800 #if defined(CONFIG_USER_ONLY)
801 {
802 struct sigaction act;
803 sigfillset(&act.sa_mask);
804 act.sa_handler = SIG_DFL;
805 sigaction(SIGABRT, &act, NULL);
806 }
807 #endif
808 abort();
809 }
810
811 #if !defined(CONFIG_USER_ONLY)
812 /* Called from RCU critical section */
813 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
814 {
815 RAMBlock *block;
816
817 block = atomic_rcu_read(&ram_list.mru_block);
818 if (block && addr - block->offset < block->max_length) {
819 goto found;
820 }
821 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
822 if (addr - block->offset < block->max_length) {
823 goto found;
824 }
825 }
826
827 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
828 abort();
829
830 found:
831 /* It is safe to write mru_block outside the iothread lock. This
832 * is what happens:
833 *
834 * mru_block = xxx
835 * rcu_read_unlock()
836 * xxx removed from list
837 * rcu_read_lock()
838 * read mru_block
839 * mru_block = NULL;
840 * call_rcu(reclaim_ramblock, xxx);
841 * rcu_read_unlock()
842 *
843 * atomic_rcu_set is not needed here. The block was already published
844 * when it was placed into the list. Here we're just making an extra
845 * copy of the pointer.
846 */
847 ram_list.mru_block = block;
848 return block;
849 }
850
851 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
852 {
853 ram_addr_t start1;
854 RAMBlock *block;
855 ram_addr_t end;
856
857 end = TARGET_PAGE_ALIGN(start + length);
858 start &= TARGET_PAGE_MASK;
859
860 rcu_read_lock();
861 block = qemu_get_ram_block(start);
862 assert(block == qemu_get_ram_block(end - 1));
863 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
864 cpu_tlb_reset_dirty_all(start1, length);
865 rcu_read_unlock();
866 }
867
868 /* Note: start and end must be within the same ram block. */
869 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
870 unsigned client)
871 {
872 if (length == 0)
873 return;
874 cpu_physical_memory_clear_dirty_range_type(start, length, client);
875
876 if (tcg_enabled()) {
877 tlb_reset_dirty_range_all(start, length);
878 }
879 }
880
881 static void cpu_physical_memory_set_dirty_tracking(bool enable)
882 {
883 in_migration = enable;
884 }
885
886 /* Called from RCU critical section */
887 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
888 MemoryRegionSection *section,
889 target_ulong vaddr,
890 hwaddr paddr, hwaddr xlat,
891 int prot,
892 target_ulong *address)
893 {
894 hwaddr iotlb;
895 CPUWatchpoint *wp;
896
897 if (memory_region_is_ram(section->mr)) {
898 /* Normal RAM. */
899 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
900 + xlat;
901 if (!section->readonly) {
902 iotlb |= PHYS_SECTION_NOTDIRTY;
903 } else {
904 iotlb |= PHYS_SECTION_ROM;
905 }
906 } else {
907 iotlb = section - section->address_space->dispatch->map.sections;
908 iotlb += xlat;
909 }
910
911 /* Make accesses to pages with watchpoints go via the
912 watchpoint trap routines. */
913 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
914 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
915 /* Avoid trapping reads of pages with a write breakpoint. */
916 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
917 iotlb = PHYS_SECTION_WATCH + paddr;
918 *address |= TLB_MMIO;
919 break;
920 }
921 }
922 }
923
924 return iotlb;
925 }
926 #endif /* defined(CONFIG_USER_ONLY) */
927
928 #if !defined(CONFIG_USER_ONLY)
929
930 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
931 uint16_t section);
932 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
933
934 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
935 qemu_anon_ram_alloc;
936
937 /*
938 * Set a custom physical guest memory alloator.
939 * Accelerators with unusual needs may need this. Hopefully, we can
940 * get rid of it eventually.
941 */
942 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
943 {
944 phys_mem_alloc = alloc;
945 }
946
947 static uint16_t phys_section_add(PhysPageMap *map,
948 MemoryRegionSection *section)
949 {
950 /* The physical section number is ORed with a page-aligned
951 * pointer to produce the iotlb entries. Thus it should
952 * never overflow into the page-aligned value.
953 */
954 assert(map->sections_nb < TARGET_PAGE_SIZE);
955
956 if (map->sections_nb == map->sections_nb_alloc) {
957 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
958 map->sections = g_renew(MemoryRegionSection, map->sections,
959 map->sections_nb_alloc);
960 }
961 map->sections[map->sections_nb] = *section;
962 memory_region_ref(section->mr);
963 return map->sections_nb++;
964 }
965
966 static void phys_section_destroy(MemoryRegion *mr)
967 {
968 memory_region_unref(mr);
969
970 if (mr->subpage) {
971 subpage_t *subpage = container_of(mr, subpage_t, iomem);
972 object_unref(OBJECT(&subpage->iomem));
973 g_free(subpage);
974 }
975 }
976
977 static void phys_sections_free(PhysPageMap *map)
978 {
979 while (map->sections_nb > 0) {
980 MemoryRegionSection *section = &map->sections[--map->sections_nb];
981 phys_section_destroy(section->mr);
982 }
983 g_free(map->sections);
984 g_free(map->nodes);
985 }
986
987 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
988 {
989 subpage_t *subpage;
990 hwaddr base = section->offset_within_address_space
991 & TARGET_PAGE_MASK;
992 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
993 d->map.nodes, d->map.sections);
994 MemoryRegionSection subsection = {
995 .offset_within_address_space = base,
996 .size = int128_make64(TARGET_PAGE_SIZE),
997 };
998 hwaddr start, end;
999
1000 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1001
1002 if (!(existing->mr->subpage)) {
1003 subpage = subpage_init(d->as, base);
1004 subsection.address_space = d->as;
1005 subsection.mr = &subpage->iomem;
1006 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1007 phys_section_add(&d->map, &subsection));
1008 } else {
1009 subpage = container_of(existing->mr, subpage_t, iomem);
1010 }
1011 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1012 end = start + int128_get64(section->size) - 1;
1013 subpage_register(subpage, start, end,
1014 phys_section_add(&d->map, section));
1015 }
1016
1017
1018 static void register_multipage(AddressSpaceDispatch *d,
1019 MemoryRegionSection *section)
1020 {
1021 hwaddr start_addr = section->offset_within_address_space;
1022 uint16_t section_index = phys_section_add(&d->map, section);
1023 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1024 TARGET_PAGE_BITS));
1025
1026 assert(num_pages);
1027 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1028 }
1029
1030 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1031 {
1032 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1033 AddressSpaceDispatch *d = as->next_dispatch;
1034 MemoryRegionSection now = *section, remain = *section;
1035 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1036
1037 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1038 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1039 - now.offset_within_address_space;
1040
1041 now.size = int128_min(int128_make64(left), now.size);
1042 register_subpage(d, &now);
1043 } else {
1044 now.size = int128_zero();
1045 }
1046 while (int128_ne(remain.size, now.size)) {
1047 remain.size = int128_sub(remain.size, now.size);
1048 remain.offset_within_address_space += int128_get64(now.size);
1049 remain.offset_within_region += int128_get64(now.size);
1050 now = remain;
1051 if (int128_lt(remain.size, page_size)) {
1052 register_subpage(d, &now);
1053 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1054 now.size = page_size;
1055 register_subpage(d, &now);
1056 } else {
1057 now.size = int128_and(now.size, int128_neg(page_size));
1058 register_multipage(d, &now);
1059 }
1060 }
1061 }
1062
1063 void qemu_flush_coalesced_mmio_buffer(void)
1064 {
1065 if (kvm_enabled())
1066 kvm_flush_coalesced_mmio_buffer();
1067 }
1068
1069 void qemu_mutex_lock_ramlist(void)
1070 {
1071 qemu_mutex_lock(&ram_list.mutex);
1072 }
1073
1074 void qemu_mutex_unlock_ramlist(void)
1075 {
1076 qemu_mutex_unlock(&ram_list.mutex);
1077 }
1078
1079 #ifdef __linux__
1080
1081 #include <sys/vfs.h>
1082
1083 #define HUGETLBFS_MAGIC 0x958458f6
1084
1085 static long gethugepagesize(const char *path, Error **errp)
1086 {
1087 struct statfs fs;
1088 int ret;
1089
1090 do {
1091 ret = statfs(path, &fs);
1092 } while (ret != 0 && errno == EINTR);
1093
1094 if (ret != 0) {
1095 error_setg_errno(errp, errno, "failed to get page size of file %s",
1096 path);
1097 return 0;
1098 }
1099
1100 if (fs.f_type != HUGETLBFS_MAGIC)
1101 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1102
1103 return fs.f_bsize;
1104 }
1105
1106 static void *file_ram_alloc(RAMBlock *block,
1107 ram_addr_t memory,
1108 const char *path,
1109 Error **errp)
1110 {
1111 char *filename;
1112 char *sanitized_name;
1113 char *c;
1114 void *area = NULL;
1115 int fd;
1116 uint64_t hpagesize;
1117 Error *local_err = NULL;
1118
1119 hpagesize = gethugepagesize(path, &local_err);
1120 if (local_err) {
1121 error_propagate(errp, local_err);
1122 goto error;
1123 }
1124 block->mr->align = hpagesize;
1125
1126 if (memory < hpagesize) {
1127 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1128 "or larger than huge page size 0x%" PRIx64,
1129 memory, hpagesize);
1130 goto error;
1131 }
1132
1133 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1134 error_setg(errp,
1135 "host lacks kvm mmu notifiers, -mem-path unsupported");
1136 goto error;
1137 }
1138
1139 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1140 sanitized_name = g_strdup(memory_region_name(block->mr));
1141 for (c = sanitized_name; *c != '\0'; c++) {
1142 if (*c == '/')
1143 *c = '_';
1144 }
1145
1146 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1147 sanitized_name);
1148 g_free(sanitized_name);
1149
1150 fd = mkstemp(filename);
1151 if (fd < 0) {
1152 error_setg_errno(errp, errno,
1153 "unable to create backing store for hugepages");
1154 g_free(filename);
1155 goto error;
1156 }
1157 unlink(filename);
1158 g_free(filename);
1159
1160 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1161
1162 /*
1163 * ftruncate is not supported by hugetlbfs in older
1164 * hosts, so don't bother bailing out on errors.
1165 * If anything goes wrong with it under other filesystems,
1166 * mmap will fail.
1167 */
1168 if (ftruncate(fd, memory)) {
1169 perror("ftruncate");
1170 }
1171
1172 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1173 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1174 fd, 0);
1175 if (area == MAP_FAILED) {
1176 error_setg_errno(errp, errno,
1177 "unable to map backing store for hugepages");
1178 close(fd);
1179 goto error;
1180 }
1181
1182 if (mem_prealloc) {
1183 os_mem_prealloc(fd, area, memory);
1184 }
1185
1186 block->fd = fd;
1187 return area;
1188
1189 error:
1190 if (mem_prealloc) {
1191 error_report("%s\n", error_get_pretty(*errp));
1192 exit(1);
1193 }
1194 return NULL;
1195 }
1196 #endif
1197
1198 /* Called with the ramlist lock held. */
1199 static ram_addr_t find_ram_offset(ram_addr_t size)
1200 {
1201 RAMBlock *block, *next_block;
1202 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1203
1204 assert(size != 0); /* it would hand out same offset multiple times */
1205
1206 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1207 return 0;
1208 }
1209
1210 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1211 ram_addr_t end, next = RAM_ADDR_MAX;
1212
1213 end = block->offset + block->max_length;
1214
1215 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1216 if (next_block->offset >= end) {
1217 next = MIN(next, next_block->offset);
1218 }
1219 }
1220 if (next - end >= size && next - end < mingap) {
1221 offset = end;
1222 mingap = next - end;
1223 }
1224 }
1225
1226 if (offset == RAM_ADDR_MAX) {
1227 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1228 (uint64_t)size);
1229 abort();
1230 }
1231
1232 return offset;
1233 }
1234
1235 ram_addr_t last_ram_offset(void)
1236 {
1237 RAMBlock *block;
1238 ram_addr_t last = 0;
1239
1240 rcu_read_lock();
1241 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1242 last = MAX(last, block->offset + block->max_length);
1243 }
1244 rcu_read_unlock();
1245 return last;
1246 }
1247
1248 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1249 {
1250 int ret;
1251
1252 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1253 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1254 "dump-guest-core", true)) {
1255 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1256 if (ret) {
1257 perror("qemu_madvise");
1258 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1259 "but dump_guest_core=off specified\n");
1260 }
1261 }
1262 }
1263
1264 /* Called within an RCU critical section, or while the ramlist lock
1265 * is held.
1266 */
1267 static RAMBlock *find_ram_block(ram_addr_t addr)
1268 {
1269 RAMBlock *block;
1270
1271 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1272 if (block->offset == addr) {
1273 return block;
1274 }
1275 }
1276
1277 return NULL;
1278 }
1279
1280 /* Called with iothread lock held. */
1281 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1282 {
1283 RAMBlock *new_block, *block;
1284
1285 rcu_read_lock();
1286 new_block = find_ram_block(addr);
1287 assert(new_block);
1288 assert(!new_block->idstr[0]);
1289
1290 if (dev) {
1291 char *id = qdev_get_dev_path(dev);
1292 if (id) {
1293 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1294 g_free(id);
1295 }
1296 }
1297 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1298
1299 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1300 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1301 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1302 new_block->idstr);
1303 abort();
1304 }
1305 }
1306 rcu_read_unlock();
1307 }
1308
1309 /* Called with iothread lock held. */
1310 void qemu_ram_unset_idstr(ram_addr_t addr)
1311 {
1312 RAMBlock *block;
1313
1314 /* FIXME: arch_init.c assumes that this is not called throughout
1315 * migration. Ignore the problem since hot-unplug during migration
1316 * does not work anyway.
1317 */
1318
1319 rcu_read_lock();
1320 block = find_ram_block(addr);
1321 if (block) {
1322 memset(block->idstr, 0, sizeof(block->idstr));
1323 }
1324 rcu_read_unlock();
1325 }
1326
1327 static int memory_try_enable_merging(void *addr, size_t len)
1328 {
1329 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1330 /* disabled by the user */
1331 return 0;
1332 }
1333
1334 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1335 }
1336
1337 /* Only legal before guest might have detected the memory size: e.g. on
1338 * incoming migration, or right after reset.
1339 *
1340 * As memory core doesn't know how is memory accessed, it is up to
1341 * resize callback to update device state and/or add assertions to detect
1342 * misuse, if necessary.
1343 */
1344 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1345 {
1346 RAMBlock *block = find_ram_block(base);
1347
1348 assert(block);
1349
1350 if (block->used_length == newsize) {
1351 return 0;
1352 }
1353
1354 if (!(block->flags & RAM_RESIZEABLE)) {
1355 error_setg_errno(errp, EINVAL,
1356 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1357 " in != 0x" RAM_ADDR_FMT, block->idstr,
1358 newsize, block->used_length);
1359 return -EINVAL;
1360 }
1361
1362 if (block->max_length < newsize) {
1363 error_setg_errno(errp, EINVAL,
1364 "Length too large: %s: 0x" RAM_ADDR_FMT
1365 " > 0x" RAM_ADDR_FMT, block->idstr,
1366 newsize, block->max_length);
1367 return -EINVAL;
1368 }
1369
1370 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1371 block->used_length = newsize;
1372 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1373 memory_region_set_size(block->mr, newsize);
1374 if (block->resized) {
1375 block->resized(block->idstr, newsize, block->host);
1376 }
1377 return 0;
1378 }
1379
1380 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1381 {
1382 RAMBlock *block;
1383 RAMBlock *last_block = NULL;
1384 ram_addr_t old_ram_size, new_ram_size;
1385
1386 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1387
1388 qemu_mutex_lock_ramlist();
1389 new_block->offset = find_ram_offset(new_block->max_length);
1390
1391 if (!new_block->host) {
1392 if (xen_enabled()) {
1393 xen_ram_alloc(new_block->offset, new_block->max_length,
1394 new_block->mr);
1395 } else {
1396 new_block->host = phys_mem_alloc(new_block->max_length,
1397 &new_block->mr->align);
1398 if (!new_block->host) {
1399 error_setg_errno(errp, errno,
1400 "cannot set up guest memory '%s'",
1401 memory_region_name(new_block->mr));
1402 qemu_mutex_unlock_ramlist();
1403 return -1;
1404 }
1405 memory_try_enable_merging(new_block->host, new_block->max_length);
1406 }
1407 }
1408
1409 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1410 * QLIST (which has an RCU-friendly variant) does not have insertion at
1411 * tail, so save the last element in last_block.
1412 */
1413 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1414 last_block = block;
1415 if (block->max_length < new_block->max_length) {
1416 break;
1417 }
1418 }
1419 if (block) {
1420 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1421 } else if (last_block) {
1422 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1423 } else { /* list is empty */
1424 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1425 }
1426 ram_list.mru_block = NULL;
1427
1428 /* Write list before version */
1429 smp_wmb();
1430 ram_list.version++;
1431 qemu_mutex_unlock_ramlist();
1432
1433 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1434
1435 if (new_ram_size > old_ram_size) {
1436 int i;
1437
1438 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1439 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1440 ram_list.dirty_memory[i] =
1441 bitmap_zero_extend(ram_list.dirty_memory[i],
1442 old_ram_size, new_ram_size);
1443 }
1444 }
1445 cpu_physical_memory_set_dirty_range(new_block->offset,
1446 new_block->used_length);
1447
1448 if (new_block->host) {
1449 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1450 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1451 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1452 if (kvm_enabled()) {
1453 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1454 }
1455 }
1456
1457 return new_block->offset;
1458 }
1459
1460 #ifdef __linux__
1461 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1462 bool share, const char *mem_path,
1463 Error **errp)
1464 {
1465 RAMBlock *new_block;
1466 ram_addr_t addr;
1467 Error *local_err = NULL;
1468
1469 if (xen_enabled()) {
1470 error_setg(errp, "-mem-path not supported with Xen");
1471 return -1;
1472 }
1473
1474 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1475 /*
1476 * file_ram_alloc() needs to allocate just like
1477 * phys_mem_alloc, but we haven't bothered to provide
1478 * a hook there.
1479 */
1480 error_setg(errp,
1481 "-mem-path not supported with this accelerator");
1482 return -1;
1483 }
1484
1485 size = TARGET_PAGE_ALIGN(size);
1486 new_block = g_malloc0(sizeof(*new_block));
1487 new_block->mr = mr;
1488 new_block->used_length = size;
1489 new_block->max_length = size;
1490 new_block->flags = share ? RAM_SHARED : 0;
1491 new_block->host = file_ram_alloc(new_block, size,
1492 mem_path, errp);
1493 if (!new_block->host) {
1494 g_free(new_block);
1495 return -1;
1496 }
1497
1498 addr = ram_block_add(new_block, &local_err);
1499 if (local_err) {
1500 g_free(new_block);
1501 error_propagate(errp, local_err);
1502 return -1;
1503 }
1504 return addr;
1505 }
1506 #endif
1507
1508 static
1509 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1510 void (*resized)(const char*,
1511 uint64_t length,
1512 void *host),
1513 void *host, bool resizeable,
1514 MemoryRegion *mr, Error **errp)
1515 {
1516 RAMBlock *new_block;
1517 ram_addr_t addr;
1518 Error *local_err = NULL;
1519
1520 size = TARGET_PAGE_ALIGN(size);
1521 max_size = TARGET_PAGE_ALIGN(max_size);
1522 new_block = g_malloc0(sizeof(*new_block));
1523 new_block->mr = mr;
1524 new_block->resized = resized;
1525 new_block->used_length = size;
1526 new_block->max_length = max_size;
1527 assert(max_size >= size);
1528 new_block->fd = -1;
1529 new_block->host = host;
1530 if (host) {
1531 new_block->flags |= RAM_PREALLOC;
1532 }
1533 if (resizeable) {
1534 new_block->flags |= RAM_RESIZEABLE;
1535 }
1536 addr = ram_block_add(new_block, &local_err);
1537 if (local_err) {
1538 g_free(new_block);
1539 error_propagate(errp, local_err);
1540 return -1;
1541 }
1542 return addr;
1543 }
1544
1545 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1546 MemoryRegion *mr, Error **errp)
1547 {
1548 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1549 }
1550
1551 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1552 {
1553 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1554 }
1555
1556 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1557 void (*resized)(const char*,
1558 uint64_t length,
1559 void *host),
1560 MemoryRegion *mr, Error **errp)
1561 {
1562 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1563 }
1564
1565 void qemu_ram_free_from_ptr(ram_addr_t addr)
1566 {
1567 RAMBlock *block;
1568
1569 qemu_mutex_lock_ramlist();
1570 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1571 if (addr == block->offset) {
1572 QLIST_REMOVE_RCU(block, next);
1573 ram_list.mru_block = NULL;
1574 /* Write list before version */
1575 smp_wmb();
1576 ram_list.version++;
1577 g_free_rcu(block, rcu);
1578 break;
1579 }
1580 }
1581 qemu_mutex_unlock_ramlist();
1582 }
1583
1584 static void reclaim_ramblock(RAMBlock *block)
1585 {
1586 if (block->flags & RAM_PREALLOC) {
1587 ;
1588 } else if (xen_enabled()) {
1589 xen_invalidate_map_cache_entry(block->host);
1590 #ifndef _WIN32
1591 } else if (block->fd >= 0) {
1592 munmap(block->host, block->max_length);
1593 close(block->fd);
1594 #endif
1595 } else {
1596 qemu_anon_ram_free(block->host, block->max_length);
1597 }
1598 g_free(block);
1599 }
1600
1601 void qemu_ram_free(ram_addr_t addr)
1602 {
1603 RAMBlock *block;
1604
1605 qemu_mutex_lock_ramlist();
1606 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1607 if (addr == block->offset) {
1608 QLIST_REMOVE_RCU(block, next);
1609 ram_list.mru_block = NULL;
1610 /* Write list before version */
1611 smp_wmb();
1612 ram_list.version++;
1613 call_rcu(block, reclaim_ramblock, rcu);
1614 break;
1615 }
1616 }
1617 qemu_mutex_unlock_ramlist();
1618 }
1619
1620 #ifndef _WIN32
1621 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1622 {
1623 RAMBlock *block;
1624 ram_addr_t offset;
1625 int flags;
1626 void *area, *vaddr;
1627
1628 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1629 offset = addr - block->offset;
1630 if (offset < block->max_length) {
1631 vaddr = ramblock_ptr(block, offset);
1632 if (block->flags & RAM_PREALLOC) {
1633 ;
1634 } else if (xen_enabled()) {
1635 abort();
1636 } else {
1637 flags = MAP_FIXED;
1638 munmap(vaddr, length);
1639 if (block->fd >= 0) {
1640 flags |= (block->flags & RAM_SHARED ?
1641 MAP_SHARED : MAP_PRIVATE);
1642 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1643 flags, block->fd, offset);
1644 } else {
1645 /*
1646 * Remap needs to match alloc. Accelerators that
1647 * set phys_mem_alloc never remap. If they did,
1648 * we'd need a remap hook here.
1649 */
1650 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1651
1652 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1653 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1654 flags, -1, 0);
1655 }
1656 if (area != vaddr) {
1657 fprintf(stderr, "Could not remap addr: "
1658 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1659 length, addr);
1660 exit(1);
1661 }
1662 memory_try_enable_merging(vaddr, length);
1663 qemu_ram_setup_dump(vaddr, length);
1664 }
1665 }
1666 }
1667 }
1668 #endif /* !_WIN32 */
1669
1670 int qemu_get_ram_fd(ram_addr_t addr)
1671 {
1672 RAMBlock *block;
1673 int fd;
1674
1675 rcu_read_lock();
1676 block = qemu_get_ram_block(addr);
1677 fd = block->fd;
1678 rcu_read_unlock();
1679 return fd;
1680 }
1681
1682 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1683 {
1684 RAMBlock *block;
1685 void *ptr;
1686
1687 rcu_read_lock();
1688 block = qemu_get_ram_block(addr);
1689 ptr = ramblock_ptr(block, 0);
1690 rcu_read_unlock();
1691 return ptr;
1692 }
1693
1694 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1695 * This should not be used for general purpose DMA. Use address_space_map
1696 * or address_space_rw instead. For local memory (e.g. video ram) that the
1697 * device owns, use memory_region_get_ram_ptr.
1698 *
1699 * By the time this function returns, the returned pointer is not protected
1700 * by RCU anymore. If the caller is not within an RCU critical section and
1701 * does not hold the iothread lock, it must have other means of protecting the
1702 * pointer, such as a reference to the region that includes the incoming
1703 * ram_addr_t.
1704 */
1705 void *qemu_get_ram_ptr(ram_addr_t addr)
1706 {
1707 RAMBlock *block;
1708 void *ptr;
1709
1710 rcu_read_lock();
1711 block = qemu_get_ram_block(addr);
1712
1713 if (xen_enabled() && block->host == NULL) {
1714 /* We need to check if the requested address is in the RAM
1715 * because we don't want to map the entire memory in QEMU.
1716 * In that case just map until the end of the page.
1717 */
1718 if (block->offset == 0) {
1719 ptr = xen_map_cache(addr, 0, 0);
1720 goto unlock;
1721 }
1722
1723 block->host = xen_map_cache(block->offset, block->max_length, 1);
1724 }
1725 ptr = ramblock_ptr(block, addr - block->offset);
1726
1727 unlock:
1728 rcu_read_unlock();
1729 return ptr;
1730 }
1731
1732 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1733 * but takes a size argument.
1734 *
1735 * By the time this function returns, the returned pointer is not protected
1736 * by RCU anymore. If the caller is not within an RCU critical section and
1737 * does not hold the iothread lock, it must have other means of protecting the
1738 * pointer, such as a reference to the region that includes the incoming
1739 * ram_addr_t.
1740 */
1741 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1742 {
1743 void *ptr;
1744 if (*size == 0) {
1745 return NULL;
1746 }
1747 if (xen_enabled()) {
1748 return xen_map_cache(addr, *size, 1);
1749 } else {
1750 RAMBlock *block;
1751 rcu_read_lock();
1752 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1753 if (addr - block->offset < block->max_length) {
1754 if (addr - block->offset + *size > block->max_length)
1755 *size = block->max_length - addr + block->offset;
1756 ptr = ramblock_ptr(block, addr - block->offset);
1757 rcu_read_unlock();
1758 return ptr;
1759 }
1760 }
1761
1762 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1763 abort();
1764 }
1765 }
1766
1767 /* Some of the softmmu routines need to translate from a host pointer
1768 * (typically a TLB entry) back to a ram offset.
1769 *
1770 * By the time this function returns, the returned pointer is not protected
1771 * by RCU anymore. If the caller is not within an RCU critical section and
1772 * does not hold the iothread lock, it must have other means of protecting the
1773 * pointer, such as a reference to the region that includes the incoming
1774 * ram_addr_t.
1775 */
1776 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1777 {
1778 RAMBlock *block;
1779 uint8_t *host = ptr;
1780 MemoryRegion *mr;
1781
1782 if (xen_enabled()) {
1783 rcu_read_lock();
1784 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1785 mr = qemu_get_ram_block(*ram_addr)->mr;
1786 rcu_read_unlock();
1787 return mr;
1788 }
1789
1790 rcu_read_lock();
1791 block = atomic_rcu_read(&ram_list.mru_block);
1792 if (block && block->host && host - block->host < block->max_length) {
1793 goto found;
1794 }
1795
1796 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1797 /* This case append when the block is not mapped. */
1798 if (block->host == NULL) {
1799 continue;
1800 }
1801 if (host - block->host < block->max_length) {
1802 goto found;
1803 }
1804 }
1805
1806 rcu_read_unlock();
1807 return NULL;
1808
1809 found:
1810 *ram_addr = block->offset + (host - block->host);
1811 mr = block->mr;
1812 rcu_read_unlock();
1813 return mr;
1814 }
1815
1816 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1817 uint64_t val, unsigned size)
1818 {
1819 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1820 tb_invalidate_phys_page_fast(ram_addr, size);
1821 }
1822 switch (size) {
1823 case 1:
1824 stb_p(qemu_get_ram_ptr(ram_addr), val);
1825 break;
1826 case 2:
1827 stw_p(qemu_get_ram_ptr(ram_addr), val);
1828 break;
1829 case 4:
1830 stl_p(qemu_get_ram_ptr(ram_addr), val);
1831 break;
1832 default:
1833 abort();
1834 }
1835 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1836 /* we remove the notdirty callback only if the code has been
1837 flushed */
1838 if (!cpu_physical_memory_is_clean(ram_addr)) {
1839 CPUArchState *env = current_cpu->env_ptr;
1840 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1841 }
1842 }
1843
1844 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1845 unsigned size, bool is_write)
1846 {
1847 return is_write;
1848 }
1849
1850 static const MemoryRegionOps notdirty_mem_ops = {
1851 .write = notdirty_mem_write,
1852 .valid.accepts = notdirty_mem_accepts,
1853 .endianness = DEVICE_NATIVE_ENDIAN,
1854 };
1855
1856 /* Generate a debug exception if a watchpoint has been hit. */
1857 static void check_watchpoint(int offset, int len, int flags)
1858 {
1859 CPUState *cpu = current_cpu;
1860 CPUArchState *env = cpu->env_ptr;
1861 target_ulong pc, cs_base;
1862 target_ulong vaddr;
1863 CPUWatchpoint *wp;
1864 int cpu_flags;
1865
1866 if (cpu->watchpoint_hit) {
1867 /* We re-entered the check after replacing the TB. Now raise
1868 * the debug interrupt so that is will trigger after the
1869 * current instruction. */
1870 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1871 return;
1872 }
1873 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1874 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1875 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1876 && (wp->flags & flags)) {
1877 if (flags == BP_MEM_READ) {
1878 wp->flags |= BP_WATCHPOINT_HIT_READ;
1879 } else {
1880 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1881 }
1882 wp->hitaddr = vaddr;
1883 if (!cpu->watchpoint_hit) {
1884 cpu->watchpoint_hit = wp;
1885 tb_check_watchpoint(cpu);
1886 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1887 cpu->exception_index = EXCP_DEBUG;
1888 cpu_loop_exit(cpu);
1889 } else {
1890 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1891 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1892 cpu_resume_from_signal(cpu, NULL);
1893 }
1894 }
1895 } else {
1896 wp->flags &= ~BP_WATCHPOINT_HIT;
1897 }
1898 }
1899 }
1900
1901 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1902 so these check for a hit then pass through to the normal out-of-line
1903 phys routines. */
1904 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1905 unsigned size)
1906 {
1907 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1908 switch (size) {
1909 case 1: return ldub_phys(&address_space_memory, addr);
1910 case 2: return lduw_phys(&address_space_memory, addr);
1911 case 4: return ldl_phys(&address_space_memory, addr);
1912 default: abort();
1913 }
1914 }
1915
1916 static void watch_mem_write(void *opaque, hwaddr addr,
1917 uint64_t val, unsigned size)
1918 {
1919 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1920 switch (size) {
1921 case 1:
1922 stb_phys(&address_space_memory, addr, val);
1923 break;
1924 case 2:
1925 stw_phys(&address_space_memory, addr, val);
1926 break;
1927 case 4:
1928 stl_phys(&address_space_memory, addr, val);
1929 break;
1930 default: abort();
1931 }
1932 }
1933
1934 static const MemoryRegionOps watch_mem_ops = {
1935 .read = watch_mem_read,
1936 .write = watch_mem_write,
1937 .endianness = DEVICE_NATIVE_ENDIAN,
1938 };
1939
1940 static uint64_t subpage_read(void *opaque, hwaddr addr,
1941 unsigned len)
1942 {
1943 subpage_t *subpage = opaque;
1944 uint8_t buf[8];
1945
1946 #if defined(DEBUG_SUBPAGE)
1947 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1948 subpage, len, addr);
1949 #endif
1950 address_space_read(subpage->as, addr + subpage->base, buf, len);
1951 switch (len) {
1952 case 1:
1953 return ldub_p(buf);
1954 case 2:
1955 return lduw_p(buf);
1956 case 4:
1957 return ldl_p(buf);
1958 case 8:
1959 return ldq_p(buf);
1960 default:
1961 abort();
1962 }
1963 }
1964
1965 static void subpage_write(void *opaque, hwaddr addr,
1966 uint64_t value, unsigned len)
1967 {
1968 subpage_t *subpage = opaque;
1969 uint8_t buf[8];
1970
1971 #if defined(DEBUG_SUBPAGE)
1972 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1973 " value %"PRIx64"\n",
1974 __func__, subpage, len, addr, value);
1975 #endif
1976 switch (len) {
1977 case 1:
1978 stb_p(buf, value);
1979 break;
1980 case 2:
1981 stw_p(buf, value);
1982 break;
1983 case 4:
1984 stl_p(buf, value);
1985 break;
1986 case 8:
1987 stq_p(buf, value);
1988 break;
1989 default:
1990 abort();
1991 }
1992 address_space_write(subpage->as, addr + subpage->base, buf, len);
1993 }
1994
1995 static bool subpage_accepts(void *opaque, hwaddr addr,
1996 unsigned len, bool is_write)
1997 {
1998 subpage_t *subpage = opaque;
1999 #if defined(DEBUG_SUBPAGE)
2000 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2001 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2002 #endif
2003
2004 return address_space_access_valid(subpage->as, addr + subpage->base,
2005 len, is_write);
2006 }
2007
2008 static const MemoryRegionOps subpage_ops = {
2009 .read = subpage_read,
2010 .write = subpage_write,
2011 .impl.min_access_size = 1,
2012 .impl.max_access_size = 8,
2013 .valid.min_access_size = 1,
2014 .valid.max_access_size = 8,
2015 .valid.accepts = subpage_accepts,
2016 .endianness = DEVICE_NATIVE_ENDIAN,
2017 };
2018
2019 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2020 uint16_t section)
2021 {
2022 int idx, eidx;
2023
2024 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2025 return -1;
2026 idx = SUBPAGE_IDX(start);
2027 eidx = SUBPAGE_IDX(end);
2028 #if defined(DEBUG_SUBPAGE)
2029 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2030 __func__, mmio, start, end, idx, eidx, section);
2031 #endif
2032 for (; idx <= eidx; idx++) {
2033 mmio->sub_section[idx] = section;
2034 }
2035
2036 return 0;
2037 }
2038
2039 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2040 {
2041 subpage_t *mmio;
2042
2043 mmio = g_malloc0(sizeof(subpage_t));
2044
2045 mmio->as = as;
2046 mmio->base = base;
2047 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2048 NULL, TARGET_PAGE_SIZE);
2049 mmio->iomem.subpage = true;
2050 #if defined(DEBUG_SUBPAGE)
2051 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2052 mmio, base, TARGET_PAGE_SIZE);
2053 #endif
2054 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2055
2056 return mmio;
2057 }
2058
2059 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2060 MemoryRegion *mr)
2061 {
2062 assert(as);
2063 MemoryRegionSection section = {
2064 .address_space = as,
2065 .mr = mr,
2066 .offset_within_address_space = 0,
2067 .offset_within_region = 0,
2068 .size = int128_2_64(),
2069 };
2070
2071 return phys_section_add(map, &section);
2072 }
2073
2074 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2075 {
2076 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2077 MemoryRegionSection *sections = d->map.sections;
2078
2079 return sections[index & ~TARGET_PAGE_MASK].mr;
2080 }
2081
2082 static void io_mem_init(void)
2083 {
2084 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2085 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2086 NULL, UINT64_MAX);
2087 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2088 NULL, UINT64_MAX);
2089 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2090 NULL, UINT64_MAX);
2091 }
2092
2093 static void mem_begin(MemoryListener *listener)
2094 {
2095 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2096 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2097 uint16_t n;
2098
2099 n = dummy_section(&d->map, as, &io_mem_unassigned);
2100 assert(n == PHYS_SECTION_UNASSIGNED);
2101 n = dummy_section(&d->map, as, &io_mem_notdirty);
2102 assert(n == PHYS_SECTION_NOTDIRTY);
2103 n = dummy_section(&d->map, as, &io_mem_rom);
2104 assert(n == PHYS_SECTION_ROM);
2105 n = dummy_section(&d->map, as, &io_mem_watch);
2106 assert(n == PHYS_SECTION_WATCH);
2107
2108 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2109 d->as = as;
2110 as->next_dispatch = d;
2111 }
2112
2113 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2114 {
2115 phys_sections_free(&d->map);
2116 g_free(d);
2117 }
2118
2119 static void mem_commit(MemoryListener *listener)
2120 {
2121 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2122 AddressSpaceDispatch *cur = as->dispatch;
2123 AddressSpaceDispatch *next = as->next_dispatch;
2124
2125 phys_page_compact_all(next, next->map.nodes_nb);
2126
2127 atomic_rcu_set(&as->dispatch, next);
2128 if (cur) {
2129 call_rcu(cur, address_space_dispatch_free, rcu);
2130 }
2131 }
2132
2133 static void tcg_commit(MemoryListener *listener)
2134 {
2135 CPUState *cpu;
2136
2137 /* since each CPU stores ram addresses in its TLB cache, we must
2138 reset the modified entries */
2139 /* XXX: slow ! */
2140 CPU_FOREACH(cpu) {
2141 /* FIXME: Disentangle the cpu.h circular files deps so we can
2142 directly get the right CPU from listener. */
2143 if (cpu->tcg_as_listener != listener) {
2144 continue;
2145 }
2146 cpu_reload_memory_map(cpu);
2147 }
2148 }
2149
2150 static void core_log_global_start(MemoryListener *listener)
2151 {
2152 cpu_physical_memory_set_dirty_tracking(true);
2153 }
2154
2155 static void core_log_global_stop(MemoryListener *listener)
2156 {
2157 cpu_physical_memory_set_dirty_tracking(false);
2158 }
2159
2160 static MemoryListener core_memory_listener = {
2161 .log_global_start = core_log_global_start,
2162 .log_global_stop = core_log_global_stop,
2163 .priority = 1,
2164 };
2165
2166 void address_space_init_dispatch(AddressSpace *as)
2167 {
2168 as->dispatch = NULL;
2169 as->dispatch_listener = (MemoryListener) {
2170 .begin = mem_begin,
2171 .commit = mem_commit,
2172 .region_add = mem_add,
2173 .region_nop = mem_add,
2174 .priority = 0,
2175 };
2176 memory_listener_register(&as->dispatch_listener, as);
2177 }
2178
2179 void address_space_unregister(AddressSpace *as)
2180 {
2181 memory_listener_unregister(&as->dispatch_listener);
2182 }
2183
2184 void address_space_destroy_dispatch(AddressSpace *as)
2185 {
2186 AddressSpaceDispatch *d = as->dispatch;
2187
2188 atomic_rcu_set(&as->dispatch, NULL);
2189 if (d) {
2190 call_rcu(d, address_space_dispatch_free, rcu);
2191 }
2192 }
2193
2194 static void memory_map_init(void)
2195 {
2196 system_memory = g_malloc(sizeof(*system_memory));
2197
2198 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2199 address_space_init(&address_space_memory, system_memory, "memory");
2200
2201 system_io = g_malloc(sizeof(*system_io));
2202 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2203 65536);
2204 address_space_init(&address_space_io, system_io, "I/O");
2205
2206 memory_listener_register(&core_memory_listener, &address_space_memory);
2207 }
2208
2209 MemoryRegion *get_system_memory(void)
2210 {
2211 return system_memory;
2212 }
2213
2214 MemoryRegion *get_system_io(void)
2215 {
2216 return system_io;
2217 }
2218
2219 #endif /* !defined(CONFIG_USER_ONLY) */
2220
2221 /* physical memory access (slow version, mainly for debug) */
2222 #if defined(CONFIG_USER_ONLY)
2223 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2224 uint8_t *buf, int len, int is_write)
2225 {
2226 int l, flags;
2227 target_ulong page;
2228 void * p;
2229
2230 while (len > 0) {
2231 page = addr & TARGET_PAGE_MASK;
2232 l = (page + TARGET_PAGE_SIZE) - addr;
2233 if (l > len)
2234 l = len;
2235 flags = page_get_flags(page);
2236 if (!(flags & PAGE_VALID))
2237 return -1;
2238 if (is_write) {
2239 if (!(flags & PAGE_WRITE))
2240 return -1;
2241 /* XXX: this code should not depend on lock_user */
2242 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2243 return -1;
2244 memcpy(p, buf, l);
2245 unlock_user(p, addr, l);
2246 } else {
2247 if (!(flags & PAGE_READ))
2248 return -1;
2249 /* XXX: this code should not depend on lock_user */
2250 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2251 return -1;
2252 memcpy(buf, p, l);
2253 unlock_user(p, addr, 0);
2254 }
2255 len -= l;
2256 buf += l;
2257 addr += l;
2258 }
2259 return 0;
2260 }
2261
2262 #else
2263
2264 static void invalidate_and_set_dirty(hwaddr addr,
2265 hwaddr length)
2266 {
2267 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2268 tb_invalidate_phys_range(addr, addr + length, 0);
2269 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2270 }
2271 xen_modified_memory(addr, length);
2272 }
2273
2274 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2275 {
2276 unsigned access_size_max = mr->ops->valid.max_access_size;
2277
2278 /* Regions are assumed to support 1-4 byte accesses unless
2279 otherwise specified. */
2280 if (access_size_max == 0) {
2281 access_size_max = 4;
2282 }
2283
2284 /* Bound the maximum access by the alignment of the address. */
2285 if (!mr->ops->impl.unaligned) {
2286 unsigned align_size_max = addr & -addr;
2287 if (align_size_max != 0 && align_size_max < access_size_max) {
2288 access_size_max = align_size_max;
2289 }
2290 }
2291
2292 /* Don't attempt accesses larger than the maximum. */
2293 if (l > access_size_max) {
2294 l = access_size_max;
2295 }
2296 if (l & (l - 1)) {
2297 l = 1 << (qemu_fls(l) - 1);
2298 }
2299
2300 return l;
2301 }
2302
2303 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2304 int len, bool is_write)
2305 {
2306 hwaddr l;
2307 uint8_t *ptr;
2308 uint64_t val;
2309 hwaddr addr1;
2310 MemoryRegion *mr;
2311 bool error = false;
2312
2313 while (len > 0) {
2314 l = len;
2315 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2316
2317 if (is_write) {
2318 if (!memory_access_is_direct(mr, is_write)) {
2319 l = memory_access_size(mr, l, addr1);
2320 /* XXX: could force current_cpu to NULL to avoid
2321 potential bugs */
2322 switch (l) {
2323 case 8:
2324 /* 64 bit write access */
2325 val = ldq_p(buf);
2326 error |= io_mem_write(mr, addr1, val, 8);
2327 break;
2328 case 4:
2329 /* 32 bit write access */
2330 val = ldl_p(buf);
2331 error |= io_mem_write(mr, addr1, val, 4);
2332 break;
2333 case 2:
2334 /* 16 bit write access */
2335 val = lduw_p(buf);
2336 error |= io_mem_write(mr, addr1, val, 2);
2337 break;
2338 case 1:
2339 /* 8 bit write access */
2340 val = ldub_p(buf);
2341 error |= io_mem_write(mr, addr1, val, 1);
2342 break;
2343 default:
2344 abort();
2345 }
2346 } else {
2347 addr1 += memory_region_get_ram_addr(mr);
2348 /* RAM case */
2349 ptr = qemu_get_ram_ptr(addr1);
2350 memcpy(ptr, buf, l);
2351 invalidate_and_set_dirty(addr1, l);
2352 }
2353 } else {
2354 if (!memory_access_is_direct(mr, is_write)) {
2355 /* I/O case */
2356 l = memory_access_size(mr, l, addr1);
2357 switch (l) {
2358 case 8:
2359 /* 64 bit read access */
2360 error |= io_mem_read(mr, addr1, &val, 8);
2361 stq_p(buf, val);
2362 break;
2363 case 4:
2364 /* 32 bit read access */
2365 error |= io_mem_read(mr, addr1, &val, 4);
2366 stl_p(buf, val);
2367 break;
2368 case 2:
2369 /* 16 bit read access */
2370 error |= io_mem_read(mr, addr1, &val, 2);
2371 stw_p(buf, val);
2372 break;
2373 case 1:
2374 /* 8 bit read access */
2375 error |= io_mem_read(mr, addr1, &val, 1);
2376 stb_p(buf, val);
2377 break;
2378 default:
2379 abort();
2380 }
2381 } else {
2382 /* RAM case */
2383 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2384 memcpy(buf, ptr, l);
2385 }
2386 }
2387 len -= l;
2388 buf += l;
2389 addr += l;
2390 }
2391
2392 return error;
2393 }
2394
2395 bool address_space_write(AddressSpace *as, hwaddr addr,
2396 const uint8_t *buf, int len)
2397 {
2398 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2399 }
2400
2401 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2402 {
2403 return address_space_rw(as, addr, buf, len, false);
2404 }
2405
2406
2407 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2408 int len, int is_write)
2409 {
2410 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2411 }
2412
2413 enum write_rom_type {
2414 WRITE_DATA,
2415 FLUSH_CACHE,
2416 };
2417
2418 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2419 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2420 {
2421 hwaddr l;
2422 uint8_t *ptr;
2423 hwaddr addr1;
2424 MemoryRegion *mr;
2425
2426 while (len > 0) {
2427 l = len;
2428 mr = address_space_translate(as, addr, &addr1, &l, true);
2429
2430 if (!(memory_region_is_ram(mr) ||
2431 memory_region_is_romd(mr))) {
2432 /* do nothing */
2433 } else {
2434 addr1 += memory_region_get_ram_addr(mr);
2435 /* ROM/RAM case */
2436 ptr = qemu_get_ram_ptr(addr1);
2437 switch (type) {
2438 case WRITE_DATA:
2439 memcpy(ptr, buf, l);
2440 invalidate_and_set_dirty(addr1, l);
2441 break;
2442 case FLUSH_CACHE:
2443 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2444 break;
2445 }
2446 }
2447 len -= l;
2448 buf += l;
2449 addr += l;
2450 }
2451 }
2452
2453 /* used for ROM loading : can write in RAM and ROM */
2454 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2455 const uint8_t *buf, int len)
2456 {
2457 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2458 }
2459
2460 void cpu_flush_icache_range(hwaddr start, int len)
2461 {
2462 /*
2463 * This function should do the same thing as an icache flush that was
2464 * triggered from within the guest. For TCG we are always cache coherent,
2465 * so there is no need to flush anything. For KVM / Xen we need to flush
2466 * the host's instruction cache at least.
2467 */
2468 if (tcg_enabled()) {
2469 return;
2470 }
2471
2472 cpu_physical_memory_write_rom_internal(&address_space_memory,
2473 start, NULL, len, FLUSH_CACHE);
2474 }
2475
2476 typedef struct {
2477 MemoryRegion *mr;
2478 void *buffer;
2479 hwaddr addr;
2480 hwaddr len;
2481 } BounceBuffer;
2482
2483 static BounceBuffer bounce;
2484
2485 typedef struct MapClient {
2486 void *opaque;
2487 void (*callback)(void *opaque);
2488 QLIST_ENTRY(MapClient) link;
2489 } MapClient;
2490
2491 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2492 = QLIST_HEAD_INITIALIZER(map_client_list);
2493
2494 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2495 {
2496 MapClient *client = g_malloc(sizeof(*client));
2497
2498 client->opaque = opaque;
2499 client->callback = callback;
2500 QLIST_INSERT_HEAD(&map_client_list, client, link);
2501 return client;
2502 }
2503
2504 static void cpu_unregister_map_client(void *_client)
2505 {
2506 MapClient *client = (MapClient *)_client;
2507
2508 QLIST_REMOVE(client, link);
2509 g_free(client);
2510 }
2511
2512 static void cpu_notify_map_clients(void)
2513 {
2514 MapClient *client;
2515
2516 while (!QLIST_EMPTY(&map_client_list)) {
2517 client = QLIST_FIRST(&map_client_list);
2518 client->callback(client->opaque);
2519 cpu_unregister_map_client(client);
2520 }
2521 }
2522
2523 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2524 {
2525 MemoryRegion *mr;
2526 hwaddr l, xlat;
2527
2528 while (len > 0) {
2529 l = len;
2530 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2531 if (!memory_access_is_direct(mr, is_write)) {
2532 l = memory_access_size(mr, l, addr);
2533 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2534 return false;
2535 }
2536 }
2537
2538 len -= l;
2539 addr += l;
2540 }
2541 return true;
2542 }
2543
2544 /* Map a physical memory region into a host virtual address.
2545 * May map a subset of the requested range, given by and returned in *plen.
2546 * May return NULL if resources needed to perform the mapping are exhausted.
2547 * Use only for reads OR writes - not for read-modify-write operations.
2548 * Use cpu_register_map_client() to know when retrying the map operation is
2549 * likely to succeed.
2550 */
2551 void *address_space_map(AddressSpace *as,
2552 hwaddr addr,
2553 hwaddr *plen,
2554 bool is_write)
2555 {
2556 hwaddr len = *plen;
2557 hwaddr done = 0;
2558 hwaddr l, xlat, base;
2559 MemoryRegion *mr, *this_mr;
2560 ram_addr_t raddr;
2561
2562 if (len == 0) {
2563 return NULL;
2564 }
2565
2566 l = len;
2567 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2568 if (!memory_access_is_direct(mr, is_write)) {
2569 if (bounce.buffer) {
2570 return NULL;
2571 }
2572 /* Avoid unbounded allocations */
2573 l = MIN(l, TARGET_PAGE_SIZE);
2574 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2575 bounce.addr = addr;
2576 bounce.len = l;
2577
2578 memory_region_ref(mr);
2579 bounce.mr = mr;
2580 if (!is_write) {
2581 address_space_read(as, addr, bounce.buffer, l);
2582 }
2583
2584 *plen = l;
2585 return bounce.buffer;
2586 }
2587
2588 base = xlat;
2589 raddr = memory_region_get_ram_addr(mr);
2590
2591 for (;;) {
2592 len -= l;
2593 addr += l;
2594 done += l;
2595 if (len == 0) {
2596 break;
2597 }
2598
2599 l = len;
2600 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2601 if (this_mr != mr || xlat != base + done) {
2602 break;
2603 }
2604 }
2605
2606 memory_region_ref(mr);
2607 *plen = done;
2608 return qemu_ram_ptr_length(raddr + base, plen);
2609 }
2610
2611 /* Unmaps a memory region previously mapped by address_space_map().
2612 * Will also mark the memory as dirty if is_write == 1. access_len gives
2613 * the amount of memory that was actually read or written by the caller.
2614 */
2615 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2616 int is_write, hwaddr access_len)
2617 {
2618 if (buffer != bounce.buffer) {
2619 MemoryRegion *mr;
2620 ram_addr_t addr1;
2621
2622 mr = qemu_ram_addr_from_host(buffer, &addr1);
2623 assert(mr != NULL);
2624 if (is_write) {
2625 invalidate_and_set_dirty(addr1, access_len);
2626 }
2627 if (xen_enabled()) {
2628 xen_invalidate_map_cache_entry(buffer);
2629 }
2630 memory_region_unref(mr);
2631 return;
2632 }
2633 if (is_write) {
2634 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2635 }
2636 qemu_vfree(bounce.buffer);
2637 bounce.buffer = NULL;
2638 memory_region_unref(bounce.mr);
2639 cpu_notify_map_clients();
2640 }
2641
2642 void *cpu_physical_memory_map(hwaddr addr,
2643 hwaddr *plen,
2644 int is_write)
2645 {
2646 return address_space_map(&address_space_memory, addr, plen, is_write);
2647 }
2648
2649 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2650 int is_write, hwaddr access_len)
2651 {
2652 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2653 }
2654
2655 /* warning: addr must be aligned */
2656 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2657 enum device_endian endian)
2658 {
2659 uint8_t *ptr;
2660 uint64_t val;
2661 MemoryRegion *mr;
2662 hwaddr l = 4;
2663 hwaddr addr1;
2664
2665 mr = address_space_translate(as, addr, &addr1, &l, false);
2666 if (l < 4 || !memory_access_is_direct(mr, false)) {
2667 /* I/O case */
2668 io_mem_read(mr, addr1, &val, 4);
2669 #if defined(TARGET_WORDS_BIGENDIAN)
2670 if (endian == DEVICE_LITTLE_ENDIAN) {
2671 val = bswap32(val);
2672 }
2673 #else
2674 if (endian == DEVICE_BIG_ENDIAN) {
2675 val = bswap32(val);
2676 }
2677 #endif
2678 } else {
2679 /* RAM case */
2680 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2681 & TARGET_PAGE_MASK)
2682 + addr1);
2683 switch (endian) {
2684 case DEVICE_LITTLE_ENDIAN:
2685 val = ldl_le_p(ptr);
2686 break;
2687 case DEVICE_BIG_ENDIAN:
2688 val = ldl_be_p(ptr);
2689 break;
2690 default:
2691 val = ldl_p(ptr);
2692 break;
2693 }
2694 }
2695 return val;
2696 }
2697
2698 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2699 {
2700 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2701 }
2702
2703 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2704 {
2705 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2706 }
2707
2708 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2709 {
2710 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2711 }
2712
2713 /* warning: addr must be aligned */
2714 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2715 enum device_endian endian)
2716 {
2717 uint8_t *ptr;
2718 uint64_t val;
2719 MemoryRegion *mr;
2720 hwaddr l = 8;
2721 hwaddr addr1;
2722
2723 mr = address_space_translate(as, addr, &addr1, &l,
2724 false);
2725 if (l < 8 || !memory_access_is_direct(mr, false)) {
2726 /* I/O case */
2727 io_mem_read(mr, addr1, &val, 8);
2728 #if defined(TARGET_WORDS_BIGENDIAN)
2729 if (endian == DEVICE_LITTLE_ENDIAN) {
2730 val = bswap64(val);
2731 }
2732 #else
2733 if (endian == DEVICE_BIG_ENDIAN) {
2734 val = bswap64(val);
2735 }
2736 #endif
2737 } else {
2738 /* RAM case */
2739 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2740 & TARGET_PAGE_MASK)
2741 + addr1);
2742 switch (endian) {
2743 case DEVICE_LITTLE_ENDIAN:
2744 val = ldq_le_p(ptr);
2745 break;
2746 case DEVICE_BIG_ENDIAN:
2747 val = ldq_be_p(ptr);
2748 break;
2749 default:
2750 val = ldq_p(ptr);
2751 break;
2752 }
2753 }
2754 return val;
2755 }
2756
2757 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2758 {
2759 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2760 }
2761
2762 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2763 {
2764 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2765 }
2766
2767 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2768 {
2769 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2770 }
2771
2772 /* XXX: optimize */
2773 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2774 {
2775 uint8_t val;
2776 address_space_rw(as, addr, &val, 1, 0);
2777 return val;
2778 }
2779
2780 /* warning: addr must be aligned */
2781 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2782 enum device_endian endian)
2783 {
2784 uint8_t *ptr;
2785 uint64_t val;
2786 MemoryRegion *mr;
2787 hwaddr l = 2;
2788 hwaddr addr1;
2789
2790 mr = address_space_translate(as, addr, &addr1, &l,
2791 false);
2792 if (l < 2 || !memory_access_is_direct(mr, false)) {
2793 /* I/O case */
2794 io_mem_read(mr, addr1, &val, 2);
2795 #if defined(TARGET_WORDS_BIGENDIAN)
2796 if (endian == DEVICE_LITTLE_ENDIAN) {
2797 val = bswap16(val);
2798 }
2799 #else
2800 if (endian == DEVICE_BIG_ENDIAN) {
2801 val = bswap16(val);
2802 }
2803 #endif
2804 } else {
2805 /* RAM case */
2806 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2807 & TARGET_PAGE_MASK)
2808 + addr1);
2809 switch (endian) {
2810 case DEVICE_LITTLE_ENDIAN:
2811 val = lduw_le_p(ptr);
2812 break;
2813 case DEVICE_BIG_ENDIAN:
2814 val = lduw_be_p(ptr);
2815 break;
2816 default:
2817 val = lduw_p(ptr);
2818 break;
2819 }
2820 }
2821 return val;
2822 }
2823
2824 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2825 {
2826 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2827 }
2828
2829 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2830 {
2831 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2832 }
2833
2834 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2835 {
2836 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2837 }
2838
2839 /* warning: addr must be aligned. The ram page is not masked as dirty
2840 and the code inside is not invalidated. It is useful if the dirty
2841 bits are used to track modified PTEs */
2842 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2843 {
2844 uint8_t *ptr;
2845 MemoryRegion *mr;
2846 hwaddr l = 4;
2847 hwaddr addr1;
2848
2849 mr = address_space_translate(as, addr, &addr1, &l,
2850 true);
2851 if (l < 4 || !memory_access_is_direct(mr, true)) {
2852 io_mem_write(mr, addr1, val, 4);
2853 } else {
2854 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2855 ptr = qemu_get_ram_ptr(addr1);
2856 stl_p(ptr, val);
2857
2858 if (unlikely(in_migration)) {
2859 if (cpu_physical_memory_is_clean(addr1)) {
2860 /* invalidate code */
2861 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2862 /* set dirty bit */
2863 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2864 }
2865 }
2866 }
2867 }
2868
2869 /* warning: addr must be aligned */
2870 static inline void stl_phys_internal(AddressSpace *as,
2871 hwaddr addr, uint32_t val,
2872 enum device_endian endian)
2873 {
2874 uint8_t *ptr;
2875 MemoryRegion *mr;
2876 hwaddr l = 4;
2877 hwaddr addr1;
2878
2879 mr = address_space_translate(as, addr, &addr1, &l,
2880 true);
2881 if (l < 4 || !memory_access_is_direct(mr, true)) {
2882 #if defined(TARGET_WORDS_BIGENDIAN)
2883 if (endian == DEVICE_LITTLE_ENDIAN) {
2884 val = bswap32(val);
2885 }
2886 #else
2887 if (endian == DEVICE_BIG_ENDIAN) {
2888 val = bswap32(val);
2889 }
2890 #endif
2891 io_mem_write(mr, addr1, val, 4);
2892 } else {
2893 /* RAM case */
2894 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2895 ptr = qemu_get_ram_ptr(addr1);
2896 switch (endian) {
2897 case DEVICE_LITTLE_ENDIAN:
2898 stl_le_p(ptr, val);
2899 break;
2900 case DEVICE_BIG_ENDIAN:
2901 stl_be_p(ptr, val);
2902 break;
2903 default:
2904 stl_p(ptr, val);
2905 break;
2906 }
2907 invalidate_and_set_dirty(addr1, 4);
2908 }
2909 }
2910
2911 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2912 {
2913 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2914 }
2915
2916 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2917 {
2918 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2919 }
2920
2921 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2922 {
2923 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2924 }
2925
2926 /* XXX: optimize */
2927 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2928 {
2929 uint8_t v = val;
2930 address_space_rw(as, addr, &v, 1, 1);
2931 }
2932
2933 /* warning: addr must be aligned */
2934 static inline void stw_phys_internal(AddressSpace *as,
2935 hwaddr addr, uint32_t val,
2936 enum device_endian endian)
2937 {
2938 uint8_t *ptr;
2939 MemoryRegion *mr;
2940 hwaddr l = 2;
2941 hwaddr addr1;
2942
2943 mr = address_space_translate(as, addr, &addr1, &l, true);
2944 if (l < 2 || !memory_access_is_direct(mr, true)) {
2945 #if defined(TARGET_WORDS_BIGENDIAN)
2946 if (endian == DEVICE_LITTLE_ENDIAN) {
2947 val = bswap16(val);
2948 }
2949 #else
2950 if (endian == DEVICE_BIG_ENDIAN) {
2951 val = bswap16(val);
2952 }
2953 #endif
2954 io_mem_write(mr, addr1, val, 2);
2955 } else {
2956 /* RAM case */
2957 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2958 ptr = qemu_get_ram_ptr(addr1);
2959 switch (endian) {
2960 case DEVICE_LITTLE_ENDIAN:
2961 stw_le_p(ptr, val);
2962 break;
2963 case DEVICE_BIG_ENDIAN:
2964 stw_be_p(ptr, val);
2965 break;
2966 default:
2967 stw_p(ptr, val);
2968 break;
2969 }
2970 invalidate_and_set_dirty(addr1, 2);
2971 }
2972 }
2973
2974 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2975 {
2976 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2977 }
2978
2979 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2980 {
2981 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2982 }
2983
2984 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2985 {
2986 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2987 }
2988
2989 /* XXX: optimize */
2990 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2991 {
2992 val = tswap64(val);
2993 address_space_rw(as, addr, (void *) &val, 8, 1);
2994 }
2995
2996 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2997 {
2998 val = cpu_to_le64(val);
2999 address_space_rw(as, addr, (void *) &val, 8, 1);
3000 }
3001
3002 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3003 {
3004 val = cpu_to_be64(val);
3005 address_space_rw(as, addr, (void *) &val, 8, 1);
3006 }
3007
3008 /* virtual memory access for debug (includes writing to ROM) */
3009 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3010 uint8_t *buf, int len, int is_write)
3011 {
3012 int l;
3013 hwaddr phys_addr;
3014 target_ulong page;
3015
3016 while (len > 0) {
3017 page = addr & TARGET_PAGE_MASK;
3018 phys_addr = cpu_get_phys_page_debug(cpu, page);
3019 /* if no physical page mapped, return an error */
3020 if (phys_addr == -1)
3021 return -1;
3022 l = (page + TARGET_PAGE_SIZE) - addr;
3023 if (l > len)
3024 l = len;
3025 phys_addr += (addr & ~TARGET_PAGE_MASK);
3026 if (is_write) {
3027 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3028 } else {
3029 address_space_rw(cpu->as, phys_addr, buf, l, 0);
3030 }
3031 len -= l;
3032 buf += l;
3033 addr += l;
3034 }
3035 return 0;
3036 }
3037 #endif
3038
3039 /*
3040 * A helper function for the _utterly broken_ virtio device model to find out if
3041 * it's running on a big endian machine. Don't do this at home kids!
3042 */
3043 bool target_words_bigendian(void);
3044 bool target_words_bigendian(void)
3045 {
3046 #if defined(TARGET_WORDS_BIGENDIAN)
3047 return true;
3048 #else
3049 return false;
3050 #endif
3051 }
3052
3053 #ifndef CONFIG_USER_ONLY
3054 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3055 {
3056 MemoryRegion*mr;
3057 hwaddr l = 1;
3058
3059 mr = address_space_translate(&address_space_memory,
3060 phys_addr, &phys_addr, &l, false);
3061
3062 return !(memory_region_is_ram(mr) ||
3063 memory_region_is_romd(mr));
3064 }
3065
3066 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3067 {
3068 RAMBlock *block;
3069
3070 rcu_read_lock();
3071 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3072 func(block->host, block->offset, block->used_length, opaque);
3073 }
3074 rcu_read_unlock();
3075 }
3076 #endif