]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec.c: Make address_space_rw take transaction attributes
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
53
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
56
57 #include "qemu/range.h"
58
59 //#define DEBUG_SUBPAGE
60
61 #if !defined(CONFIG_USER_ONLY)
62 static bool in_migration;
63
64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
66 */
67 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
68
69 static MemoryRegion *system_memory;
70 static MemoryRegion *system_io;
71
72 AddressSpace address_space_io;
73 AddressSpace address_space_memory;
74
75 MemoryRegion io_mem_rom, io_mem_notdirty;
76 static MemoryRegion io_mem_unassigned;
77
78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79 #define RAM_PREALLOC (1 << 0)
80
81 /* RAM is mmap-ed with MAP_SHARED */
82 #define RAM_SHARED (1 << 1)
83
84 /* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
86 */
87 #define RAM_RESIZEABLE (1 << 2)
88
89 #endif
90
91 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
92 /* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
94 DEFINE_TLS(CPUState *, current_cpu);
95 /* 0 = Do not count executed instructions.
96 1 = Precise instruction counting.
97 2 = Adaptive rate instruction counting. */
98 int use_icount;
99
100 #if !defined(CONFIG_USER_ONLY)
101
102 typedef struct PhysPageEntry PhysPageEntry;
103
104 struct PhysPageEntry {
105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
106 uint32_t skip : 6;
107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
108 uint32_t ptr : 26;
109 };
110
111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112
113 /* Size of the L2 (and L3, etc) page tables. */
114 #define ADDR_SPACE_BITS 64
115
116 #define P_L2_BITS 9
117 #define P_L2_SIZE (1 << P_L2_BITS)
118
119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120
121 typedef PhysPageEntry Node[P_L2_SIZE];
122
123 typedef struct PhysPageMap {
124 struct rcu_head rcu;
125
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132 } PhysPageMap;
133
134 struct AddressSpaceDispatch {
135 struct rcu_head rcu;
136
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
139 */
140 PhysPageEntry phys_map;
141 PhysPageMap map;
142 AddressSpace *as;
143 };
144
145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146 typedef struct subpage_t {
147 MemoryRegion iomem;
148 AddressSpace *as;
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151 } subpage_t;
152
153 #define PHYS_SECTION_UNASSIGNED 0
154 #define PHYS_SECTION_NOTDIRTY 1
155 #define PHYS_SECTION_ROM 2
156 #define PHYS_SECTION_WATCH 3
157
158 static void io_mem_init(void);
159 static void memory_map_init(void);
160 static void tcg_commit(MemoryListener *listener);
161
162 static MemoryRegion io_mem_watch;
163 #endif
164
165 #if !defined(CONFIG_USER_ONLY)
166
167 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
168 {
169 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
171 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
172 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
173 }
174 }
175
176 static uint32_t phys_map_node_alloc(PhysPageMap *map)
177 {
178 unsigned i;
179 uint32_t ret;
180
181 ret = map->nodes_nb++;
182 assert(ret != PHYS_MAP_NODE_NIL);
183 assert(ret != map->nodes_nb_alloc);
184 for (i = 0; i < P_L2_SIZE; ++i) {
185 map->nodes[ret][i].skip = 1;
186 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
187 }
188 return ret;
189 }
190
191 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
192 hwaddr *index, hwaddr *nb, uint16_t leaf,
193 int level)
194 {
195 PhysPageEntry *p;
196 int i;
197 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
198
199 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
200 lp->ptr = phys_map_node_alloc(map);
201 p = map->nodes[lp->ptr];
202 if (level == 0) {
203 for (i = 0; i < P_L2_SIZE; i++) {
204 p[i].skip = 0;
205 p[i].ptr = PHYS_SECTION_UNASSIGNED;
206 }
207 }
208 } else {
209 p = map->nodes[lp->ptr];
210 }
211 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
212
213 while (*nb && lp < &p[P_L2_SIZE]) {
214 if ((*index & (step - 1)) == 0 && *nb >= step) {
215 lp->skip = 0;
216 lp->ptr = leaf;
217 *index += step;
218 *nb -= step;
219 } else {
220 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
221 }
222 ++lp;
223 }
224 }
225
226 static void phys_page_set(AddressSpaceDispatch *d,
227 hwaddr index, hwaddr nb,
228 uint16_t leaf)
229 {
230 /* Wildly overreserve - it doesn't matter much. */
231 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
232
233 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
234 }
235
236 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
238 */
239 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
240 {
241 unsigned valid_ptr = P_L2_SIZE;
242 int valid = 0;
243 PhysPageEntry *p;
244 int i;
245
246 if (lp->ptr == PHYS_MAP_NODE_NIL) {
247 return;
248 }
249
250 p = nodes[lp->ptr];
251 for (i = 0; i < P_L2_SIZE; i++) {
252 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
253 continue;
254 }
255
256 valid_ptr = i;
257 valid++;
258 if (p[i].skip) {
259 phys_page_compact(&p[i], nodes, compacted);
260 }
261 }
262
263 /* We can only compress if there's only one child. */
264 if (valid != 1) {
265 return;
266 }
267
268 assert(valid_ptr < P_L2_SIZE);
269
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
272 return;
273 }
274
275 lp->ptr = p[valid_ptr].ptr;
276 if (!p[valid_ptr].skip) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
281 * change this rule.
282 */
283 lp->skip = 0;
284 } else {
285 lp->skip += p[valid_ptr].skip;
286 }
287 }
288
289 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
290 {
291 DECLARE_BITMAP(compacted, nodes_nb);
292
293 if (d->phys_map.skip) {
294 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
295 }
296 }
297
298 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
299 Node *nodes, MemoryRegionSection *sections)
300 {
301 PhysPageEntry *p;
302 hwaddr index = addr >> TARGET_PAGE_BITS;
303 int i;
304
305 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
306 if (lp.ptr == PHYS_MAP_NODE_NIL) {
307 return &sections[PHYS_SECTION_UNASSIGNED];
308 }
309 p = nodes[lp.ptr];
310 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
311 }
312
313 if (sections[lp.ptr].size.hi ||
314 range_covers_byte(sections[lp.ptr].offset_within_address_space,
315 sections[lp.ptr].size.lo, addr)) {
316 return &sections[lp.ptr];
317 } else {
318 return &sections[PHYS_SECTION_UNASSIGNED];
319 }
320 }
321
322 bool memory_region_is_unassigned(MemoryRegion *mr)
323 {
324 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
325 && mr != &io_mem_watch;
326 }
327
328 /* Called from RCU critical section */
329 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
330 hwaddr addr,
331 bool resolve_subpage)
332 {
333 MemoryRegionSection *section;
334 subpage_t *subpage;
335
336 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
337 if (resolve_subpage && section->mr->subpage) {
338 subpage = container_of(section->mr, subpage_t, iomem);
339 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
340 }
341 return section;
342 }
343
344 /* Called from RCU critical section */
345 static MemoryRegionSection *
346 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
347 hwaddr *plen, bool resolve_subpage)
348 {
349 MemoryRegionSection *section;
350 Int128 diff;
351
352 section = address_space_lookup_region(d, addr, resolve_subpage);
353 /* Compute offset within MemoryRegionSection */
354 addr -= section->offset_within_address_space;
355
356 /* Compute offset within MemoryRegion */
357 *xlat = addr + section->offset_within_region;
358
359 diff = int128_sub(section->mr->size, int128_make64(addr));
360 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
361 return section;
362 }
363
364 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
365 {
366 if (memory_region_is_ram(mr)) {
367 return !(is_write && mr->readonly);
368 }
369 if (memory_region_is_romd(mr)) {
370 return !is_write;
371 }
372
373 return false;
374 }
375
376 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
377 hwaddr *xlat, hwaddr *plen,
378 bool is_write)
379 {
380 IOMMUTLBEntry iotlb;
381 MemoryRegionSection *section;
382 MemoryRegion *mr;
383 hwaddr len = *plen;
384
385 rcu_read_lock();
386 for (;;) {
387 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
388 section = address_space_translate_internal(d, addr, &addr, plen, true);
389 mr = section->mr;
390
391 if (!mr->iommu_ops) {
392 break;
393 }
394
395 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
396 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
397 | (addr & iotlb.addr_mask));
398 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
399 if (!(iotlb.perm & (1 << is_write))) {
400 mr = &io_mem_unassigned;
401 break;
402 }
403
404 as = iotlb.target_as;
405 }
406
407 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
408 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
409 len = MIN(page, len);
410 }
411
412 *plen = len;
413 *xlat = addr;
414 rcu_read_unlock();
415 return mr;
416 }
417
418 /* Called from RCU critical section */
419 MemoryRegionSection *
420 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen)
422 {
423 MemoryRegionSection *section;
424 section = address_space_translate_internal(cpu->memory_dispatch,
425 addr, xlat, plen, false);
426
427 assert(!section->mr->iommu_ops);
428 return section;
429 }
430 #endif
431
432 void cpu_exec_init_all(void)
433 {
434 #if !defined(CONFIG_USER_ONLY)
435 qemu_mutex_init(&ram_list.mutex);
436 memory_map_init();
437 io_mem_init();
438 #endif
439 }
440
441 #if !defined(CONFIG_USER_ONLY)
442
443 static int cpu_common_post_load(void *opaque, int version_id)
444 {
445 CPUState *cpu = opaque;
446
447 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
448 version_id is increased. */
449 cpu->interrupt_request &= ~0x01;
450 tlb_flush(cpu, 1);
451
452 return 0;
453 }
454
455 static int cpu_common_pre_load(void *opaque)
456 {
457 CPUState *cpu = opaque;
458
459 cpu->exception_index = -1;
460
461 return 0;
462 }
463
464 static bool cpu_common_exception_index_needed(void *opaque)
465 {
466 CPUState *cpu = opaque;
467
468 return tcg_enabled() && cpu->exception_index != -1;
469 }
470
471 static const VMStateDescription vmstate_cpu_common_exception_index = {
472 .name = "cpu_common/exception_index",
473 .version_id = 1,
474 .minimum_version_id = 1,
475 .fields = (VMStateField[]) {
476 VMSTATE_INT32(exception_index, CPUState),
477 VMSTATE_END_OF_LIST()
478 }
479 };
480
481 const VMStateDescription vmstate_cpu_common = {
482 .name = "cpu_common",
483 .version_id = 1,
484 .minimum_version_id = 1,
485 .pre_load = cpu_common_pre_load,
486 .post_load = cpu_common_post_load,
487 .fields = (VMStateField[]) {
488 VMSTATE_UINT32(halted, CPUState),
489 VMSTATE_UINT32(interrupt_request, CPUState),
490 VMSTATE_END_OF_LIST()
491 },
492 .subsections = (VMStateSubsection[]) {
493 {
494 .vmsd = &vmstate_cpu_common_exception_index,
495 .needed = cpu_common_exception_index_needed,
496 } , {
497 /* empty */
498 }
499 }
500 };
501
502 #endif
503
504 CPUState *qemu_get_cpu(int index)
505 {
506 CPUState *cpu;
507
508 CPU_FOREACH(cpu) {
509 if (cpu->cpu_index == index) {
510 return cpu;
511 }
512 }
513
514 return NULL;
515 }
516
517 #if !defined(CONFIG_USER_ONLY)
518 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
519 {
520 /* We only support one address space per cpu at the moment. */
521 assert(cpu->as == as);
522
523 if (cpu->tcg_as_listener) {
524 memory_listener_unregister(cpu->tcg_as_listener);
525 } else {
526 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
527 }
528 cpu->tcg_as_listener->commit = tcg_commit;
529 memory_listener_register(cpu->tcg_as_listener, as);
530 }
531 #endif
532
533 void cpu_exec_init(CPUArchState *env)
534 {
535 CPUState *cpu = ENV_GET_CPU(env);
536 CPUClass *cc = CPU_GET_CLASS(cpu);
537 CPUState *some_cpu;
538 int cpu_index;
539
540 #if defined(CONFIG_USER_ONLY)
541 cpu_list_lock();
542 #endif
543 cpu_index = 0;
544 CPU_FOREACH(some_cpu) {
545 cpu_index++;
546 }
547 cpu->cpu_index = cpu_index;
548 cpu->numa_node = 0;
549 QTAILQ_INIT(&cpu->breakpoints);
550 QTAILQ_INIT(&cpu->watchpoints);
551 #ifndef CONFIG_USER_ONLY
552 cpu->as = &address_space_memory;
553 cpu->thread_id = qemu_get_thread_id();
554 cpu_reload_memory_map(cpu);
555 #endif
556 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
557 #if defined(CONFIG_USER_ONLY)
558 cpu_list_unlock();
559 #endif
560 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
561 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
562 }
563 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
564 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
565 cpu_save, cpu_load, env);
566 assert(cc->vmsd == NULL);
567 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
568 #endif
569 if (cc->vmsd != NULL) {
570 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
571 }
572 }
573
574 #if defined(CONFIG_USER_ONLY)
575 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
576 {
577 tb_invalidate_phys_page_range(pc, pc + 1, 0);
578 }
579 #else
580 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
581 {
582 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
583 if (phys != -1) {
584 tb_invalidate_phys_addr(cpu->as,
585 phys | (pc & ~TARGET_PAGE_MASK));
586 }
587 }
588 #endif
589
590 #if defined(CONFIG_USER_ONLY)
591 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
592
593 {
594 }
595
596 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
597 int flags)
598 {
599 return -ENOSYS;
600 }
601
602 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
603 {
604 }
605
606 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
607 int flags, CPUWatchpoint **watchpoint)
608 {
609 return -ENOSYS;
610 }
611 #else
612 /* Add a watchpoint. */
613 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
614 int flags, CPUWatchpoint **watchpoint)
615 {
616 CPUWatchpoint *wp;
617
618 /* forbid ranges which are empty or run off the end of the address space */
619 if (len == 0 || (addr + len - 1) < addr) {
620 error_report("tried to set invalid watchpoint at %"
621 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
622 return -EINVAL;
623 }
624 wp = g_malloc(sizeof(*wp));
625
626 wp->vaddr = addr;
627 wp->len = len;
628 wp->flags = flags;
629
630 /* keep all GDB-injected watchpoints in front */
631 if (flags & BP_GDB) {
632 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
633 } else {
634 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
635 }
636
637 tlb_flush_page(cpu, addr);
638
639 if (watchpoint)
640 *watchpoint = wp;
641 return 0;
642 }
643
644 /* Remove a specific watchpoint. */
645 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
646 int flags)
647 {
648 CPUWatchpoint *wp;
649
650 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
651 if (addr == wp->vaddr && len == wp->len
652 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
653 cpu_watchpoint_remove_by_ref(cpu, wp);
654 return 0;
655 }
656 }
657 return -ENOENT;
658 }
659
660 /* Remove a specific watchpoint by reference. */
661 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
662 {
663 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
664
665 tlb_flush_page(cpu, watchpoint->vaddr);
666
667 g_free(watchpoint);
668 }
669
670 /* Remove all matching watchpoints. */
671 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
672 {
673 CPUWatchpoint *wp, *next;
674
675 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
676 if (wp->flags & mask) {
677 cpu_watchpoint_remove_by_ref(cpu, wp);
678 }
679 }
680 }
681
682 /* Return true if this watchpoint address matches the specified
683 * access (ie the address range covered by the watchpoint overlaps
684 * partially or completely with the address range covered by the
685 * access).
686 */
687 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
688 vaddr addr,
689 vaddr len)
690 {
691 /* We know the lengths are non-zero, but a little caution is
692 * required to avoid errors in the case where the range ends
693 * exactly at the top of the address space and so addr + len
694 * wraps round to zero.
695 */
696 vaddr wpend = wp->vaddr + wp->len - 1;
697 vaddr addrend = addr + len - 1;
698
699 return !(addr > wpend || wp->vaddr > addrend);
700 }
701
702 #endif
703
704 /* Add a breakpoint. */
705 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
706 CPUBreakpoint **breakpoint)
707 {
708 CPUBreakpoint *bp;
709
710 bp = g_malloc(sizeof(*bp));
711
712 bp->pc = pc;
713 bp->flags = flags;
714
715 /* keep all GDB-injected breakpoints in front */
716 if (flags & BP_GDB) {
717 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
718 } else {
719 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
720 }
721
722 breakpoint_invalidate(cpu, pc);
723
724 if (breakpoint) {
725 *breakpoint = bp;
726 }
727 return 0;
728 }
729
730 /* Remove a specific breakpoint. */
731 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
732 {
733 CPUBreakpoint *bp;
734
735 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
736 if (bp->pc == pc && bp->flags == flags) {
737 cpu_breakpoint_remove_by_ref(cpu, bp);
738 return 0;
739 }
740 }
741 return -ENOENT;
742 }
743
744 /* Remove a specific breakpoint by reference. */
745 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
746 {
747 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
748
749 breakpoint_invalidate(cpu, breakpoint->pc);
750
751 g_free(breakpoint);
752 }
753
754 /* Remove all matching breakpoints. */
755 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
756 {
757 CPUBreakpoint *bp, *next;
758
759 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
760 if (bp->flags & mask) {
761 cpu_breakpoint_remove_by_ref(cpu, bp);
762 }
763 }
764 }
765
766 /* enable or disable single step mode. EXCP_DEBUG is returned by the
767 CPU loop after each instruction */
768 void cpu_single_step(CPUState *cpu, int enabled)
769 {
770 if (cpu->singlestep_enabled != enabled) {
771 cpu->singlestep_enabled = enabled;
772 if (kvm_enabled()) {
773 kvm_update_guest_debug(cpu, 0);
774 } else {
775 /* must flush all the translated code to avoid inconsistencies */
776 /* XXX: only flush what is necessary */
777 CPUArchState *env = cpu->env_ptr;
778 tb_flush(env);
779 }
780 }
781 }
782
783 void cpu_abort(CPUState *cpu, const char *fmt, ...)
784 {
785 va_list ap;
786 va_list ap2;
787
788 va_start(ap, fmt);
789 va_copy(ap2, ap);
790 fprintf(stderr, "qemu: fatal: ");
791 vfprintf(stderr, fmt, ap);
792 fprintf(stderr, "\n");
793 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
794 if (qemu_log_enabled()) {
795 qemu_log("qemu: fatal: ");
796 qemu_log_vprintf(fmt, ap2);
797 qemu_log("\n");
798 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
799 qemu_log_flush();
800 qemu_log_close();
801 }
802 va_end(ap2);
803 va_end(ap);
804 #if defined(CONFIG_USER_ONLY)
805 {
806 struct sigaction act;
807 sigfillset(&act.sa_mask);
808 act.sa_handler = SIG_DFL;
809 sigaction(SIGABRT, &act, NULL);
810 }
811 #endif
812 abort();
813 }
814
815 #if !defined(CONFIG_USER_ONLY)
816 /* Called from RCU critical section */
817 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
818 {
819 RAMBlock *block;
820
821 block = atomic_rcu_read(&ram_list.mru_block);
822 if (block && addr - block->offset < block->max_length) {
823 goto found;
824 }
825 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
826 if (addr - block->offset < block->max_length) {
827 goto found;
828 }
829 }
830
831 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
832 abort();
833
834 found:
835 /* It is safe to write mru_block outside the iothread lock. This
836 * is what happens:
837 *
838 * mru_block = xxx
839 * rcu_read_unlock()
840 * xxx removed from list
841 * rcu_read_lock()
842 * read mru_block
843 * mru_block = NULL;
844 * call_rcu(reclaim_ramblock, xxx);
845 * rcu_read_unlock()
846 *
847 * atomic_rcu_set is not needed here. The block was already published
848 * when it was placed into the list. Here we're just making an extra
849 * copy of the pointer.
850 */
851 ram_list.mru_block = block;
852 return block;
853 }
854
855 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
856 {
857 ram_addr_t start1;
858 RAMBlock *block;
859 ram_addr_t end;
860
861 end = TARGET_PAGE_ALIGN(start + length);
862 start &= TARGET_PAGE_MASK;
863
864 rcu_read_lock();
865 block = qemu_get_ram_block(start);
866 assert(block == qemu_get_ram_block(end - 1));
867 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
868 cpu_tlb_reset_dirty_all(start1, length);
869 rcu_read_unlock();
870 }
871
872 /* Note: start and end must be within the same ram block. */
873 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
874 unsigned client)
875 {
876 if (length == 0)
877 return;
878 cpu_physical_memory_clear_dirty_range_type(start, length, client);
879
880 if (tcg_enabled()) {
881 tlb_reset_dirty_range_all(start, length);
882 }
883 }
884
885 static void cpu_physical_memory_set_dirty_tracking(bool enable)
886 {
887 in_migration = enable;
888 }
889
890 /* Called from RCU critical section */
891 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
892 MemoryRegionSection *section,
893 target_ulong vaddr,
894 hwaddr paddr, hwaddr xlat,
895 int prot,
896 target_ulong *address)
897 {
898 hwaddr iotlb;
899 CPUWatchpoint *wp;
900
901 if (memory_region_is_ram(section->mr)) {
902 /* Normal RAM. */
903 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
904 + xlat;
905 if (!section->readonly) {
906 iotlb |= PHYS_SECTION_NOTDIRTY;
907 } else {
908 iotlb |= PHYS_SECTION_ROM;
909 }
910 } else {
911 iotlb = section - section->address_space->dispatch->map.sections;
912 iotlb += xlat;
913 }
914
915 /* Make accesses to pages with watchpoints go via the
916 watchpoint trap routines. */
917 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
918 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
919 /* Avoid trapping reads of pages with a write breakpoint. */
920 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
921 iotlb = PHYS_SECTION_WATCH + paddr;
922 *address |= TLB_MMIO;
923 break;
924 }
925 }
926 }
927
928 return iotlb;
929 }
930 #endif /* defined(CONFIG_USER_ONLY) */
931
932 #if !defined(CONFIG_USER_ONLY)
933
934 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
935 uint16_t section);
936 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
937
938 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
939 qemu_anon_ram_alloc;
940
941 /*
942 * Set a custom physical guest memory alloator.
943 * Accelerators with unusual needs may need this. Hopefully, we can
944 * get rid of it eventually.
945 */
946 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
947 {
948 phys_mem_alloc = alloc;
949 }
950
951 static uint16_t phys_section_add(PhysPageMap *map,
952 MemoryRegionSection *section)
953 {
954 /* The physical section number is ORed with a page-aligned
955 * pointer to produce the iotlb entries. Thus it should
956 * never overflow into the page-aligned value.
957 */
958 assert(map->sections_nb < TARGET_PAGE_SIZE);
959
960 if (map->sections_nb == map->sections_nb_alloc) {
961 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
962 map->sections = g_renew(MemoryRegionSection, map->sections,
963 map->sections_nb_alloc);
964 }
965 map->sections[map->sections_nb] = *section;
966 memory_region_ref(section->mr);
967 return map->sections_nb++;
968 }
969
970 static void phys_section_destroy(MemoryRegion *mr)
971 {
972 memory_region_unref(mr);
973
974 if (mr->subpage) {
975 subpage_t *subpage = container_of(mr, subpage_t, iomem);
976 object_unref(OBJECT(&subpage->iomem));
977 g_free(subpage);
978 }
979 }
980
981 static void phys_sections_free(PhysPageMap *map)
982 {
983 while (map->sections_nb > 0) {
984 MemoryRegionSection *section = &map->sections[--map->sections_nb];
985 phys_section_destroy(section->mr);
986 }
987 g_free(map->sections);
988 g_free(map->nodes);
989 }
990
991 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
992 {
993 subpage_t *subpage;
994 hwaddr base = section->offset_within_address_space
995 & TARGET_PAGE_MASK;
996 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
997 d->map.nodes, d->map.sections);
998 MemoryRegionSection subsection = {
999 .offset_within_address_space = base,
1000 .size = int128_make64(TARGET_PAGE_SIZE),
1001 };
1002 hwaddr start, end;
1003
1004 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1005
1006 if (!(existing->mr->subpage)) {
1007 subpage = subpage_init(d->as, base);
1008 subsection.address_space = d->as;
1009 subsection.mr = &subpage->iomem;
1010 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1011 phys_section_add(&d->map, &subsection));
1012 } else {
1013 subpage = container_of(existing->mr, subpage_t, iomem);
1014 }
1015 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1016 end = start + int128_get64(section->size) - 1;
1017 subpage_register(subpage, start, end,
1018 phys_section_add(&d->map, section));
1019 }
1020
1021
1022 static void register_multipage(AddressSpaceDispatch *d,
1023 MemoryRegionSection *section)
1024 {
1025 hwaddr start_addr = section->offset_within_address_space;
1026 uint16_t section_index = phys_section_add(&d->map, section);
1027 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1028 TARGET_PAGE_BITS));
1029
1030 assert(num_pages);
1031 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1032 }
1033
1034 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1035 {
1036 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1037 AddressSpaceDispatch *d = as->next_dispatch;
1038 MemoryRegionSection now = *section, remain = *section;
1039 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1040
1041 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1042 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1043 - now.offset_within_address_space;
1044
1045 now.size = int128_min(int128_make64(left), now.size);
1046 register_subpage(d, &now);
1047 } else {
1048 now.size = int128_zero();
1049 }
1050 while (int128_ne(remain.size, now.size)) {
1051 remain.size = int128_sub(remain.size, now.size);
1052 remain.offset_within_address_space += int128_get64(now.size);
1053 remain.offset_within_region += int128_get64(now.size);
1054 now = remain;
1055 if (int128_lt(remain.size, page_size)) {
1056 register_subpage(d, &now);
1057 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1058 now.size = page_size;
1059 register_subpage(d, &now);
1060 } else {
1061 now.size = int128_and(now.size, int128_neg(page_size));
1062 register_multipage(d, &now);
1063 }
1064 }
1065 }
1066
1067 void qemu_flush_coalesced_mmio_buffer(void)
1068 {
1069 if (kvm_enabled())
1070 kvm_flush_coalesced_mmio_buffer();
1071 }
1072
1073 void qemu_mutex_lock_ramlist(void)
1074 {
1075 qemu_mutex_lock(&ram_list.mutex);
1076 }
1077
1078 void qemu_mutex_unlock_ramlist(void)
1079 {
1080 qemu_mutex_unlock(&ram_list.mutex);
1081 }
1082
1083 #ifdef __linux__
1084
1085 #include <sys/vfs.h>
1086
1087 #define HUGETLBFS_MAGIC 0x958458f6
1088
1089 static long gethugepagesize(const char *path, Error **errp)
1090 {
1091 struct statfs fs;
1092 int ret;
1093
1094 do {
1095 ret = statfs(path, &fs);
1096 } while (ret != 0 && errno == EINTR);
1097
1098 if (ret != 0) {
1099 error_setg_errno(errp, errno, "failed to get page size of file %s",
1100 path);
1101 return 0;
1102 }
1103
1104 if (fs.f_type != HUGETLBFS_MAGIC)
1105 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1106
1107 return fs.f_bsize;
1108 }
1109
1110 static void *file_ram_alloc(RAMBlock *block,
1111 ram_addr_t memory,
1112 const char *path,
1113 Error **errp)
1114 {
1115 char *filename;
1116 char *sanitized_name;
1117 char *c;
1118 void *area = NULL;
1119 int fd;
1120 uint64_t hpagesize;
1121 Error *local_err = NULL;
1122
1123 hpagesize = gethugepagesize(path, &local_err);
1124 if (local_err) {
1125 error_propagate(errp, local_err);
1126 goto error;
1127 }
1128 block->mr->align = hpagesize;
1129
1130 if (memory < hpagesize) {
1131 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1132 "or larger than huge page size 0x%" PRIx64,
1133 memory, hpagesize);
1134 goto error;
1135 }
1136
1137 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1138 error_setg(errp,
1139 "host lacks kvm mmu notifiers, -mem-path unsupported");
1140 goto error;
1141 }
1142
1143 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1144 sanitized_name = g_strdup(memory_region_name(block->mr));
1145 for (c = sanitized_name; *c != '\0'; c++) {
1146 if (*c == '/')
1147 *c = '_';
1148 }
1149
1150 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1151 sanitized_name);
1152 g_free(sanitized_name);
1153
1154 fd = mkstemp(filename);
1155 if (fd < 0) {
1156 error_setg_errno(errp, errno,
1157 "unable to create backing store for hugepages");
1158 g_free(filename);
1159 goto error;
1160 }
1161 unlink(filename);
1162 g_free(filename);
1163
1164 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1165
1166 /*
1167 * ftruncate is not supported by hugetlbfs in older
1168 * hosts, so don't bother bailing out on errors.
1169 * If anything goes wrong with it under other filesystems,
1170 * mmap will fail.
1171 */
1172 if (ftruncate(fd, memory)) {
1173 perror("ftruncate");
1174 }
1175
1176 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1177 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1178 fd, 0);
1179 if (area == MAP_FAILED) {
1180 error_setg_errno(errp, errno,
1181 "unable to map backing store for hugepages");
1182 close(fd);
1183 goto error;
1184 }
1185
1186 if (mem_prealloc) {
1187 os_mem_prealloc(fd, area, memory);
1188 }
1189
1190 block->fd = fd;
1191 return area;
1192
1193 error:
1194 if (mem_prealloc) {
1195 error_report("%s", error_get_pretty(*errp));
1196 exit(1);
1197 }
1198 return NULL;
1199 }
1200 #endif
1201
1202 /* Called with the ramlist lock held. */
1203 static ram_addr_t find_ram_offset(ram_addr_t size)
1204 {
1205 RAMBlock *block, *next_block;
1206 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1207
1208 assert(size != 0); /* it would hand out same offset multiple times */
1209
1210 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1211 return 0;
1212 }
1213
1214 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1215 ram_addr_t end, next = RAM_ADDR_MAX;
1216
1217 end = block->offset + block->max_length;
1218
1219 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1220 if (next_block->offset >= end) {
1221 next = MIN(next, next_block->offset);
1222 }
1223 }
1224 if (next - end >= size && next - end < mingap) {
1225 offset = end;
1226 mingap = next - end;
1227 }
1228 }
1229
1230 if (offset == RAM_ADDR_MAX) {
1231 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1232 (uint64_t)size);
1233 abort();
1234 }
1235
1236 return offset;
1237 }
1238
1239 ram_addr_t last_ram_offset(void)
1240 {
1241 RAMBlock *block;
1242 ram_addr_t last = 0;
1243
1244 rcu_read_lock();
1245 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1246 last = MAX(last, block->offset + block->max_length);
1247 }
1248 rcu_read_unlock();
1249 return last;
1250 }
1251
1252 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1253 {
1254 int ret;
1255
1256 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1257 if (!machine_dump_guest_core(current_machine)) {
1258 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1259 if (ret) {
1260 perror("qemu_madvise");
1261 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1262 "but dump_guest_core=off specified\n");
1263 }
1264 }
1265 }
1266
1267 /* Called within an RCU critical section, or while the ramlist lock
1268 * is held.
1269 */
1270 static RAMBlock *find_ram_block(ram_addr_t addr)
1271 {
1272 RAMBlock *block;
1273
1274 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1275 if (block->offset == addr) {
1276 return block;
1277 }
1278 }
1279
1280 return NULL;
1281 }
1282
1283 /* Called with iothread lock held. */
1284 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1285 {
1286 RAMBlock *new_block, *block;
1287
1288 rcu_read_lock();
1289 new_block = find_ram_block(addr);
1290 assert(new_block);
1291 assert(!new_block->idstr[0]);
1292
1293 if (dev) {
1294 char *id = qdev_get_dev_path(dev);
1295 if (id) {
1296 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1297 g_free(id);
1298 }
1299 }
1300 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1301
1302 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1303 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1304 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1305 new_block->idstr);
1306 abort();
1307 }
1308 }
1309 rcu_read_unlock();
1310 }
1311
1312 /* Called with iothread lock held. */
1313 void qemu_ram_unset_idstr(ram_addr_t addr)
1314 {
1315 RAMBlock *block;
1316
1317 /* FIXME: arch_init.c assumes that this is not called throughout
1318 * migration. Ignore the problem since hot-unplug during migration
1319 * does not work anyway.
1320 */
1321
1322 rcu_read_lock();
1323 block = find_ram_block(addr);
1324 if (block) {
1325 memset(block->idstr, 0, sizeof(block->idstr));
1326 }
1327 rcu_read_unlock();
1328 }
1329
1330 static int memory_try_enable_merging(void *addr, size_t len)
1331 {
1332 if (!machine_mem_merge(current_machine)) {
1333 /* disabled by the user */
1334 return 0;
1335 }
1336
1337 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1338 }
1339
1340 /* Only legal before guest might have detected the memory size: e.g. on
1341 * incoming migration, or right after reset.
1342 *
1343 * As memory core doesn't know how is memory accessed, it is up to
1344 * resize callback to update device state and/or add assertions to detect
1345 * misuse, if necessary.
1346 */
1347 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1348 {
1349 RAMBlock *block = find_ram_block(base);
1350
1351 assert(block);
1352
1353 newsize = TARGET_PAGE_ALIGN(newsize);
1354
1355 if (block->used_length == newsize) {
1356 return 0;
1357 }
1358
1359 if (!(block->flags & RAM_RESIZEABLE)) {
1360 error_setg_errno(errp, EINVAL,
1361 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1362 " in != 0x" RAM_ADDR_FMT, block->idstr,
1363 newsize, block->used_length);
1364 return -EINVAL;
1365 }
1366
1367 if (block->max_length < newsize) {
1368 error_setg_errno(errp, EINVAL,
1369 "Length too large: %s: 0x" RAM_ADDR_FMT
1370 " > 0x" RAM_ADDR_FMT, block->idstr,
1371 newsize, block->max_length);
1372 return -EINVAL;
1373 }
1374
1375 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1376 block->used_length = newsize;
1377 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1378 memory_region_set_size(block->mr, newsize);
1379 if (block->resized) {
1380 block->resized(block->idstr, newsize, block->host);
1381 }
1382 return 0;
1383 }
1384
1385 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1386 {
1387 RAMBlock *block;
1388 RAMBlock *last_block = NULL;
1389 ram_addr_t old_ram_size, new_ram_size;
1390
1391 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1392
1393 qemu_mutex_lock_ramlist();
1394 new_block->offset = find_ram_offset(new_block->max_length);
1395
1396 if (!new_block->host) {
1397 if (xen_enabled()) {
1398 xen_ram_alloc(new_block->offset, new_block->max_length,
1399 new_block->mr);
1400 } else {
1401 new_block->host = phys_mem_alloc(new_block->max_length,
1402 &new_block->mr->align);
1403 if (!new_block->host) {
1404 error_setg_errno(errp, errno,
1405 "cannot set up guest memory '%s'",
1406 memory_region_name(new_block->mr));
1407 qemu_mutex_unlock_ramlist();
1408 return -1;
1409 }
1410 memory_try_enable_merging(new_block->host, new_block->max_length);
1411 }
1412 }
1413
1414 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1415 * QLIST (which has an RCU-friendly variant) does not have insertion at
1416 * tail, so save the last element in last_block.
1417 */
1418 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1419 last_block = block;
1420 if (block->max_length < new_block->max_length) {
1421 break;
1422 }
1423 }
1424 if (block) {
1425 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1426 } else if (last_block) {
1427 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1428 } else { /* list is empty */
1429 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1430 }
1431 ram_list.mru_block = NULL;
1432
1433 /* Write list before version */
1434 smp_wmb();
1435 ram_list.version++;
1436 qemu_mutex_unlock_ramlist();
1437
1438 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1439
1440 if (new_ram_size > old_ram_size) {
1441 int i;
1442
1443 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1444 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1445 ram_list.dirty_memory[i] =
1446 bitmap_zero_extend(ram_list.dirty_memory[i],
1447 old_ram_size, new_ram_size);
1448 }
1449 }
1450 cpu_physical_memory_set_dirty_range(new_block->offset,
1451 new_block->used_length);
1452
1453 if (new_block->host) {
1454 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1455 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1456 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1457 if (kvm_enabled()) {
1458 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1459 }
1460 }
1461
1462 return new_block->offset;
1463 }
1464
1465 #ifdef __linux__
1466 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1467 bool share, const char *mem_path,
1468 Error **errp)
1469 {
1470 RAMBlock *new_block;
1471 ram_addr_t addr;
1472 Error *local_err = NULL;
1473
1474 if (xen_enabled()) {
1475 error_setg(errp, "-mem-path not supported with Xen");
1476 return -1;
1477 }
1478
1479 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1480 /*
1481 * file_ram_alloc() needs to allocate just like
1482 * phys_mem_alloc, but we haven't bothered to provide
1483 * a hook there.
1484 */
1485 error_setg(errp,
1486 "-mem-path not supported with this accelerator");
1487 return -1;
1488 }
1489
1490 size = TARGET_PAGE_ALIGN(size);
1491 new_block = g_malloc0(sizeof(*new_block));
1492 new_block->mr = mr;
1493 new_block->used_length = size;
1494 new_block->max_length = size;
1495 new_block->flags = share ? RAM_SHARED : 0;
1496 new_block->host = file_ram_alloc(new_block, size,
1497 mem_path, errp);
1498 if (!new_block->host) {
1499 g_free(new_block);
1500 return -1;
1501 }
1502
1503 addr = ram_block_add(new_block, &local_err);
1504 if (local_err) {
1505 g_free(new_block);
1506 error_propagate(errp, local_err);
1507 return -1;
1508 }
1509 return addr;
1510 }
1511 #endif
1512
1513 static
1514 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1515 void (*resized)(const char*,
1516 uint64_t length,
1517 void *host),
1518 void *host, bool resizeable,
1519 MemoryRegion *mr, Error **errp)
1520 {
1521 RAMBlock *new_block;
1522 ram_addr_t addr;
1523 Error *local_err = NULL;
1524
1525 size = TARGET_PAGE_ALIGN(size);
1526 max_size = TARGET_PAGE_ALIGN(max_size);
1527 new_block = g_malloc0(sizeof(*new_block));
1528 new_block->mr = mr;
1529 new_block->resized = resized;
1530 new_block->used_length = size;
1531 new_block->max_length = max_size;
1532 assert(max_size >= size);
1533 new_block->fd = -1;
1534 new_block->host = host;
1535 if (host) {
1536 new_block->flags |= RAM_PREALLOC;
1537 }
1538 if (resizeable) {
1539 new_block->flags |= RAM_RESIZEABLE;
1540 }
1541 addr = ram_block_add(new_block, &local_err);
1542 if (local_err) {
1543 g_free(new_block);
1544 error_propagate(errp, local_err);
1545 return -1;
1546 }
1547 return addr;
1548 }
1549
1550 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1551 MemoryRegion *mr, Error **errp)
1552 {
1553 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1554 }
1555
1556 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1557 {
1558 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1559 }
1560
1561 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1562 void (*resized)(const char*,
1563 uint64_t length,
1564 void *host),
1565 MemoryRegion *mr, Error **errp)
1566 {
1567 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1568 }
1569
1570 void qemu_ram_free_from_ptr(ram_addr_t addr)
1571 {
1572 RAMBlock *block;
1573
1574 qemu_mutex_lock_ramlist();
1575 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1576 if (addr == block->offset) {
1577 QLIST_REMOVE_RCU(block, next);
1578 ram_list.mru_block = NULL;
1579 /* Write list before version */
1580 smp_wmb();
1581 ram_list.version++;
1582 g_free_rcu(block, rcu);
1583 break;
1584 }
1585 }
1586 qemu_mutex_unlock_ramlist();
1587 }
1588
1589 static void reclaim_ramblock(RAMBlock *block)
1590 {
1591 if (block->flags & RAM_PREALLOC) {
1592 ;
1593 } else if (xen_enabled()) {
1594 xen_invalidate_map_cache_entry(block->host);
1595 #ifndef _WIN32
1596 } else if (block->fd >= 0) {
1597 munmap(block->host, block->max_length);
1598 close(block->fd);
1599 #endif
1600 } else {
1601 qemu_anon_ram_free(block->host, block->max_length);
1602 }
1603 g_free(block);
1604 }
1605
1606 void qemu_ram_free(ram_addr_t addr)
1607 {
1608 RAMBlock *block;
1609
1610 qemu_mutex_lock_ramlist();
1611 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1612 if (addr == block->offset) {
1613 QLIST_REMOVE_RCU(block, next);
1614 ram_list.mru_block = NULL;
1615 /* Write list before version */
1616 smp_wmb();
1617 ram_list.version++;
1618 call_rcu(block, reclaim_ramblock, rcu);
1619 break;
1620 }
1621 }
1622 qemu_mutex_unlock_ramlist();
1623 }
1624
1625 #ifndef _WIN32
1626 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1627 {
1628 RAMBlock *block;
1629 ram_addr_t offset;
1630 int flags;
1631 void *area, *vaddr;
1632
1633 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1634 offset = addr - block->offset;
1635 if (offset < block->max_length) {
1636 vaddr = ramblock_ptr(block, offset);
1637 if (block->flags & RAM_PREALLOC) {
1638 ;
1639 } else if (xen_enabled()) {
1640 abort();
1641 } else {
1642 flags = MAP_FIXED;
1643 if (block->fd >= 0) {
1644 flags |= (block->flags & RAM_SHARED ?
1645 MAP_SHARED : MAP_PRIVATE);
1646 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1647 flags, block->fd, offset);
1648 } else {
1649 /*
1650 * Remap needs to match alloc. Accelerators that
1651 * set phys_mem_alloc never remap. If they did,
1652 * we'd need a remap hook here.
1653 */
1654 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1655
1656 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1657 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1658 flags, -1, 0);
1659 }
1660 if (area != vaddr) {
1661 fprintf(stderr, "Could not remap addr: "
1662 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1663 length, addr);
1664 exit(1);
1665 }
1666 memory_try_enable_merging(vaddr, length);
1667 qemu_ram_setup_dump(vaddr, length);
1668 }
1669 }
1670 }
1671 }
1672 #endif /* !_WIN32 */
1673
1674 int qemu_get_ram_fd(ram_addr_t addr)
1675 {
1676 RAMBlock *block;
1677 int fd;
1678
1679 rcu_read_lock();
1680 block = qemu_get_ram_block(addr);
1681 fd = block->fd;
1682 rcu_read_unlock();
1683 return fd;
1684 }
1685
1686 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1687 {
1688 RAMBlock *block;
1689 void *ptr;
1690
1691 rcu_read_lock();
1692 block = qemu_get_ram_block(addr);
1693 ptr = ramblock_ptr(block, 0);
1694 rcu_read_unlock();
1695 return ptr;
1696 }
1697
1698 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1699 * This should not be used for general purpose DMA. Use address_space_map
1700 * or address_space_rw instead. For local memory (e.g. video ram) that the
1701 * device owns, use memory_region_get_ram_ptr.
1702 *
1703 * By the time this function returns, the returned pointer is not protected
1704 * by RCU anymore. If the caller is not within an RCU critical section and
1705 * does not hold the iothread lock, it must have other means of protecting the
1706 * pointer, such as a reference to the region that includes the incoming
1707 * ram_addr_t.
1708 */
1709 void *qemu_get_ram_ptr(ram_addr_t addr)
1710 {
1711 RAMBlock *block;
1712 void *ptr;
1713
1714 rcu_read_lock();
1715 block = qemu_get_ram_block(addr);
1716
1717 if (xen_enabled() && block->host == NULL) {
1718 /* We need to check if the requested address is in the RAM
1719 * because we don't want to map the entire memory in QEMU.
1720 * In that case just map until the end of the page.
1721 */
1722 if (block->offset == 0) {
1723 ptr = xen_map_cache(addr, 0, 0);
1724 goto unlock;
1725 }
1726
1727 block->host = xen_map_cache(block->offset, block->max_length, 1);
1728 }
1729 ptr = ramblock_ptr(block, addr - block->offset);
1730
1731 unlock:
1732 rcu_read_unlock();
1733 return ptr;
1734 }
1735
1736 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1737 * but takes a size argument.
1738 *
1739 * By the time this function returns, the returned pointer is not protected
1740 * by RCU anymore. If the caller is not within an RCU critical section and
1741 * does not hold the iothread lock, it must have other means of protecting the
1742 * pointer, such as a reference to the region that includes the incoming
1743 * ram_addr_t.
1744 */
1745 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1746 {
1747 void *ptr;
1748 if (*size == 0) {
1749 return NULL;
1750 }
1751 if (xen_enabled()) {
1752 return xen_map_cache(addr, *size, 1);
1753 } else {
1754 RAMBlock *block;
1755 rcu_read_lock();
1756 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1757 if (addr - block->offset < block->max_length) {
1758 if (addr - block->offset + *size > block->max_length)
1759 *size = block->max_length - addr + block->offset;
1760 ptr = ramblock_ptr(block, addr - block->offset);
1761 rcu_read_unlock();
1762 return ptr;
1763 }
1764 }
1765
1766 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1767 abort();
1768 }
1769 }
1770
1771 /* Some of the softmmu routines need to translate from a host pointer
1772 * (typically a TLB entry) back to a ram offset.
1773 *
1774 * By the time this function returns, the returned pointer is not protected
1775 * by RCU anymore. If the caller is not within an RCU critical section and
1776 * does not hold the iothread lock, it must have other means of protecting the
1777 * pointer, such as a reference to the region that includes the incoming
1778 * ram_addr_t.
1779 */
1780 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1781 {
1782 RAMBlock *block;
1783 uint8_t *host = ptr;
1784 MemoryRegion *mr;
1785
1786 if (xen_enabled()) {
1787 rcu_read_lock();
1788 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1789 mr = qemu_get_ram_block(*ram_addr)->mr;
1790 rcu_read_unlock();
1791 return mr;
1792 }
1793
1794 rcu_read_lock();
1795 block = atomic_rcu_read(&ram_list.mru_block);
1796 if (block && block->host && host - block->host < block->max_length) {
1797 goto found;
1798 }
1799
1800 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1801 /* This case append when the block is not mapped. */
1802 if (block->host == NULL) {
1803 continue;
1804 }
1805 if (host - block->host < block->max_length) {
1806 goto found;
1807 }
1808 }
1809
1810 rcu_read_unlock();
1811 return NULL;
1812
1813 found:
1814 *ram_addr = block->offset + (host - block->host);
1815 mr = block->mr;
1816 rcu_read_unlock();
1817 return mr;
1818 }
1819
1820 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1821 uint64_t val, unsigned size)
1822 {
1823 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1824 tb_invalidate_phys_page_fast(ram_addr, size);
1825 }
1826 switch (size) {
1827 case 1:
1828 stb_p(qemu_get_ram_ptr(ram_addr), val);
1829 break;
1830 case 2:
1831 stw_p(qemu_get_ram_ptr(ram_addr), val);
1832 break;
1833 case 4:
1834 stl_p(qemu_get_ram_ptr(ram_addr), val);
1835 break;
1836 default:
1837 abort();
1838 }
1839 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1840 /* we remove the notdirty callback only if the code has been
1841 flushed */
1842 if (!cpu_physical_memory_is_clean(ram_addr)) {
1843 CPUArchState *env = current_cpu->env_ptr;
1844 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1845 }
1846 }
1847
1848 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1849 unsigned size, bool is_write)
1850 {
1851 return is_write;
1852 }
1853
1854 static const MemoryRegionOps notdirty_mem_ops = {
1855 .write = notdirty_mem_write,
1856 .valid.accepts = notdirty_mem_accepts,
1857 .endianness = DEVICE_NATIVE_ENDIAN,
1858 };
1859
1860 /* Generate a debug exception if a watchpoint has been hit. */
1861 static void check_watchpoint(int offset, int len, int flags)
1862 {
1863 CPUState *cpu = current_cpu;
1864 CPUArchState *env = cpu->env_ptr;
1865 target_ulong pc, cs_base;
1866 target_ulong vaddr;
1867 CPUWatchpoint *wp;
1868 int cpu_flags;
1869
1870 if (cpu->watchpoint_hit) {
1871 /* We re-entered the check after replacing the TB. Now raise
1872 * the debug interrupt so that is will trigger after the
1873 * current instruction. */
1874 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1875 return;
1876 }
1877 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1878 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1879 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1880 && (wp->flags & flags)) {
1881 if (flags == BP_MEM_READ) {
1882 wp->flags |= BP_WATCHPOINT_HIT_READ;
1883 } else {
1884 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1885 }
1886 wp->hitaddr = vaddr;
1887 if (!cpu->watchpoint_hit) {
1888 cpu->watchpoint_hit = wp;
1889 tb_check_watchpoint(cpu);
1890 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1891 cpu->exception_index = EXCP_DEBUG;
1892 cpu_loop_exit(cpu);
1893 } else {
1894 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1895 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1896 cpu_resume_from_signal(cpu, NULL);
1897 }
1898 }
1899 } else {
1900 wp->flags &= ~BP_WATCHPOINT_HIT;
1901 }
1902 }
1903 }
1904
1905 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1906 so these check for a hit then pass through to the normal out-of-line
1907 phys routines. */
1908 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1909 unsigned size)
1910 {
1911 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1912 switch (size) {
1913 case 1: return ldub_phys(&address_space_memory, addr);
1914 case 2: return lduw_phys(&address_space_memory, addr);
1915 case 4: return ldl_phys(&address_space_memory, addr);
1916 default: abort();
1917 }
1918 }
1919
1920 static void watch_mem_write(void *opaque, hwaddr addr,
1921 uint64_t val, unsigned size)
1922 {
1923 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1924 switch (size) {
1925 case 1:
1926 stb_phys(&address_space_memory, addr, val);
1927 break;
1928 case 2:
1929 stw_phys(&address_space_memory, addr, val);
1930 break;
1931 case 4:
1932 stl_phys(&address_space_memory, addr, val);
1933 break;
1934 default: abort();
1935 }
1936 }
1937
1938 static const MemoryRegionOps watch_mem_ops = {
1939 .read = watch_mem_read,
1940 .write = watch_mem_write,
1941 .endianness = DEVICE_NATIVE_ENDIAN,
1942 };
1943
1944 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1945 unsigned len, MemTxAttrs attrs)
1946 {
1947 subpage_t *subpage = opaque;
1948 uint8_t buf[8];
1949 MemTxResult res;
1950
1951 #if defined(DEBUG_SUBPAGE)
1952 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1953 subpage, len, addr);
1954 #endif
1955 res = address_space_read(subpage->as, addr + subpage->base,
1956 attrs, buf, len);
1957 if (res) {
1958 return res;
1959 }
1960 switch (len) {
1961 case 1:
1962 *data = ldub_p(buf);
1963 return MEMTX_OK;
1964 case 2:
1965 *data = lduw_p(buf);
1966 return MEMTX_OK;
1967 case 4:
1968 *data = ldl_p(buf);
1969 return MEMTX_OK;
1970 case 8:
1971 *data = ldq_p(buf);
1972 return MEMTX_OK;
1973 default:
1974 abort();
1975 }
1976 }
1977
1978 static MemTxResult subpage_write(void *opaque, hwaddr addr,
1979 uint64_t value, unsigned len, MemTxAttrs attrs)
1980 {
1981 subpage_t *subpage = opaque;
1982 uint8_t buf[8];
1983
1984 #if defined(DEBUG_SUBPAGE)
1985 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1986 " value %"PRIx64"\n",
1987 __func__, subpage, len, addr, value);
1988 #endif
1989 switch (len) {
1990 case 1:
1991 stb_p(buf, value);
1992 break;
1993 case 2:
1994 stw_p(buf, value);
1995 break;
1996 case 4:
1997 stl_p(buf, value);
1998 break;
1999 case 8:
2000 stq_p(buf, value);
2001 break;
2002 default:
2003 abort();
2004 }
2005 return address_space_write(subpage->as, addr + subpage->base,
2006 attrs, buf, len);
2007 }
2008
2009 static bool subpage_accepts(void *opaque, hwaddr addr,
2010 unsigned len, bool is_write)
2011 {
2012 subpage_t *subpage = opaque;
2013 #if defined(DEBUG_SUBPAGE)
2014 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2015 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2016 #endif
2017
2018 return address_space_access_valid(subpage->as, addr + subpage->base,
2019 len, is_write);
2020 }
2021
2022 static const MemoryRegionOps subpage_ops = {
2023 .read_with_attrs = subpage_read,
2024 .write_with_attrs = subpage_write,
2025 .impl.min_access_size = 1,
2026 .impl.max_access_size = 8,
2027 .valid.min_access_size = 1,
2028 .valid.max_access_size = 8,
2029 .valid.accepts = subpage_accepts,
2030 .endianness = DEVICE_NATIVE_ENDIAN,
2031 };
2032
2033 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2034 uint16_t section)
2035 {
2036 int idx, eidx;
2037
2038 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2039 return -1;
2040 idx = SUBPAGE_IDX(start);
2041 eidx = SUBPAGE_IDX(end);
2042 #if defined(DEBUG_SUBPAGE)
2043 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2044 __func__, mmio, start, end, idx, eidx, section);
2045 #endif
2046 for (; idx <= eidx; idx++) {
2047 mmio->sub_section[idx] = section;
2048 }
2049
2050 return 0;
2051 }
2052
2053 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2054 {
2055 subpage_t *mmio;
2056
2057 mmio = g_malloc0(sizeof(subpage_t));
2058
2059 mmio->as = as;
2060 mmio->base = base;
2061 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2062 NULL, TARGET_PAGE_SIZE);
2063 mmio->iomem.subpage = true;
2064 #if defined(DEBUG_SUBPAGE)
2065 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2066 mmio, base, TARGET_PAGE_SIZE);
2067 #endif
2068 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2069
2070 return mmio;
2071 }
2072
2073 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2074 MemoryRegion *mr)
2075 {
2076 assert(as);
2077 MemoryRegionSection section = {
2078 .address_space = as,
2079 .mr = mr,
2080 .offset_within_address_space = 0,
2081 .offset_within_region = 0,
2082 .size = int128_2_64(),
2083 };
2084
2085 return phys_section_add(map, &section);
2086 }
2087
2088 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2089 {
2090 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2091 MemoryRegionSection *sections = d->map.sections;
2092
2093 return sections[index & ~TARGET_PAGE_MASK].mr;
2094 }
2095
2096 static void io_mem_init(void)
2097 {
2098 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2099 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2100 NULL, UINT64_MAX);
2101 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2102 NULL, UINT64_MAX);
2103 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2104 NULL, UINT64_MAX);
2105 }
2106
2107 static void mem_begin(MemoryListener *listener)
2108 {
2109 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2110 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2111 uint16_t n;
2112
2113 n = dummy_section(&d->map, as, &io_mem_unassigned);
2114 assert(n == PHYS_SECTION_UNASSIGNED);
2115 n = dummy_section(&d->map, as, &io_mem_notdirty);
2116 assert(n == PHYS_SECTION_NOTDIRTY);
2117 n = dummy_section(&d->map, as, &io_mem_rom);
2118 assert(n == PHYS_SECTION_ROM);
2119 n = dummy_section(&d->map, as, &io_mem_watch);
2120 assert(n == PHYS_SECTION_WATCH);
2121
2122 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2123 d->as = as;
2124 as->next_dispatch = d;
2125 }
2126
2127 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2128 {
2129 phys_sections_free(&d->map);
2130 g_free(d);
2131 }
2132
2133 static void mem_commit(MemoryListener *listener)
2134 {
2135 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2136 AddressSpaceDispatch *cur = as->dispatch;
2137 AddressSpaceDispatch *next = as->next_dispatch;
2138
2139 phys_page_compact_all(next, next->map.nodes_nb);
2140
2141 atomic_rcu_set(&as->dispatch, next);
2142 if (cur) {
2143 call_rcu(cur, address_space_dispatch_free, rcu);
2144 }
2145 }
2146
2147 static void tcg_commit(MemoryListener *listener)
2148 {
2149 CPUState *cpu;
2150
2151 /* since each CPU stores ram addresses in its TLB cache, we must
2152 reset the modified entries */
2153 /* XXX: slow ! */
2154 CPU_FOREACH(cpu) {
2155 /* FIXME: Disentangle the cpu.h circular files deps so we can
2156 directly get the right CPU from listener. */
2157 if (cpu->tcg_as_listener != listener) {
2158 continue;
2159 }
2160 cpu_reload_memory_map(cpu);
2161 }
2162 }
2163
2164 static void core_log_global_start(MemoryListener *listener)
2165 {
2166 cpu_physical_memory_set_dirty_tracking(true);
2167 }
2168
2169 static void core_log_global_stop(MemoryListener *listener)
2170 {
2171 cpu_physical_memory_set_dirty_tracking(false);
2172 }
2173
2174 static MemoryListener core_memory_listener = {
2175 .log_global_start = core_log_global_start,
2176 .log_global_stop = core_log_global_stop,
2177 .priority = 1,
2178 };
2179
2180 void address_space_init_dispatch(AddressSpace *as)
2181 {
2182 as->dispatch = NULL;
2183 as->dispatch_listener = (MemoryListener) {
2184 .begin = mem_begin,
2185 .commit = mem_commit,
2186 .region_add = mem_add,
2187 .region_nop = mem_add,
2188 .priority = 0,
2189 };
2190 memory_listener_register(&as->dispatch_listener, as);
2191 }
2192
2193 void address_space_unregister(AddressSpace *as)
2194 {
2195 memory_listener_unregister(&as->dispatch_listener);
2196 }
2197
2198 void address_space_destroy_dispatch(AddressSpace *as)
2199 {
2200 AddressSpaceDispatch *d = as->dispatch;
2201
2202 atomic_rcu_set(&as->dispatch, NULL);
2203 if (d) {
2204 call_rcu(d, address_space_dispatch_free, rcu);
2205 }
2206 }
2207
2208 static void memory_map_init(void)
2209 {
2210 system_memory = g_malloc(sizeof(*system_memory));
2211
2212 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2213 address_space_init(&address_space_memory, system_memory, "memory");
2214
2215 system_io = g_malloc(sizeof(*system_io));
2216 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2217 65536);
2218 address_space_init(&address_space_io, system_io, "I/O");
2219
2220 memory_listener_register(&core_memory_listener, &address_space_memory);
2221 }
2222
2223 MemoryRegion *get_system_memory(void)
2224 {
2225 return system_memory;
2226 }
2227
2228 MemoryRegion *get_system_io(void)
2229 {
2230 return system_io;
2231 }
2232
2233 #endif /* !defined(CONFIG_USER_ONLY) */
2234
2235 /* physical memory access (slow version, mainly for debug) */
2236 #if defined(CONFIG_USER_ONLY)
2237 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2238 uint8_t *buf, int len, int is_write)
2239 {
2240 int l, flags;
2241 target_ulong page;
2242 void * p;
2243
2244 while (len > 0) {
2245 page = addr & TARGET_PAGE_MASK;
2246 l = (page + TARGET_PAGE_SIZE) - addr;
2247 if (l > len)
2248 l = len;
2249 flags = page_get_flags(page);
2250 if (!(flags & PAGE_VALID))
2251 return -1;
2252 if (is_write) {
2253 if (!(flags & PAGE_WRITE))
2254 return -1;
2255 /* XXX: this code should not depend on lock_user */
2256 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2257 return -1;
2258 memcpy(p, buf, l);
2259 unlock_user(p, addr, l);
2260 } else {
2261 if (!(flags & PAGE_READ))
2262 return -1;
2263 /* XXX: this code should not depend on lock_user */
2264 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2265 return -1;
2266 memcpy(buf, p, l);
2267 unlock_user(p, addr, 0);
2268 }
2269 len -= l;
2270 buf += l;
2271 addr += l;
2272 }
2273 return 0;
2274 }
2275
2276 #else
2277
2278 static void invalidate_and_set_dirty(hwaddr addr,
2279 hwaddr length)
2280 {
2281 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2282 tb_invalidate_phys_range(addr, addr + length, 0);
2283 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2284 }
2285 xen_modified_memory(addr, length);
2286 }
2287
2288 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2289 {
2290 unsigned access_size_max = mr->ops->valid.max_access_size;
2291
2292 /* Regions are assumed to support 1-4 byte accesses unless
2293 otherwise specified. */
2294 if (access_size_max == 0) {
2295 access_size_max = 4;
2296 }
2297
2298 /* Bound the maximum access by the alignment of the address. */
2299 if (!mr->ops->impl.unaligned) {
2300 unsigned align_size_max = addr & -addr;
2301 if (align_size_max != 0 && align_size_max < access_size_max) {
2302 access_size_max = align_size_max;
2303 }
2304 }
2305
2306 /* Don't attempt accesses larger than the maximum. */
2307 if (l > access_size_max) {
2308 l = access_size_max;
2309 }
2310 if (l & (l - 1)) {
2311 l = 1 << (qemu_fls(l) - 1);
2312 }
2313
2314 return l;
2315 }
2316
2317 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2318 uint8_t *buf, int len, bool is_write)
2319 {
2320 hwaddr l;
2321 uint8_t *ptr;
2322 uint64_t val;
2323 hwaddr addr1;
2324 MemoryRegion *mr;
2325 MemTxResult result = MEMTX_OK;
2326
2327 while (len > 0) {
2328 l = len;
2329 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2330
2331 if (is_write) {
2332 if (!memory_access_is_direct(mr, is_write)) {
2333 l = memory_access_size(mr, l, addr1);
2334 /* XXX: could force current_cpu to NULL to avoid
2335 potential bugs */
2336 switch (l) {
2337 case 8:
2338 /* 64 bit write access */
2339 val = ldq_p(buf);
2340 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2341 attrs);
2342 break;
2343 case 4:
2344 /* 32 bit write access */
2345 val = ldl_p(buf);
2346 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2347 attrs);
2348 break;
2349 case 2:
2350 /* 16 bit write access */
2351 val = lduw_p(buf);
2352 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2353 attrs);
2354 break;
2355 case 1:
2356 /* 8 bit write access */
2357 val = ldub_p(buf);
2358 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2359 attrs);
2360 break;
2361 default:
2362 abort();
2363 }
2364 } else {
2365 addr1 += memory_region_get_ram_addr(mr);
2366 /* RAM case */
2367 ptr = qemu_get_ram_ptr(addr1);
2368 memcpy(ptr, buf, l);
2369 invalidate_and_set_dirty(addr1, l);
2370 }
2371 } else {
2372 if (!memory_access_is_direct(mr, is_write)) {
2373 /* I/O case */
2374 l = memory_access_size(mr, l, addr1);
2375 switch (l) {
2376 case 8:
2377 /* 64 bit read access */
2378 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2379 attrs);
2380 stq_p(buf, val);
2381 break;
2382 case 4:
2383 /* 32 bit read access */
2384 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2385 attrs);
2386 stl_p(buf, val);
2387 break;
2388 case 2:
2389 /* 16 bit read access */
2390 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2391 attrs);
2392 stw_p(buf, val);
2393 break;
2394 case 1:
2395 /* 8 bit read access */
2396 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2397 attrs);
2398 stb_p(buf, val);
2399 break;
2400 default:
2401 abort();
2402 }
2403 } else {
2404 /* RAM case */
2405 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2406 memcpy(buf, ptr, l);
2407 }
2408 }
2409 len -= l;
2410 buf += l;
2411 addr += l;
2412 }
2413
2414 return result;
2415 }
2416
2417 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2418 const uint8_t *buf, int len)
2419 {
2420 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2421 }
2422
2423 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2424 uint8_t *buf, int len)
2425 {
2426 return address_space_rw(as, addr, attrs, buf, len, false);
2427 }
2428
2429
2430 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2431 int len, int is_write)
2432 {
2433 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2434 buf, len, is_write);
2435 }
2436
2437 enum write_rom_type {
2438 WRITE_DATA,
2439 FLUSH_CACHE,
2440 };
2441
2442 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2443 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2444 {
2445 hwaddr l;
2446 uint8_t *ptr;
2447 hwaddr addr1;
2448 MemoryRegion *mr;
2449
2450 while (len > 0) {
2451 l = len;
2452 mr = address_space_translate(as, addr, &addr1, &l, true);
2453
2454 if (!(memory_region_is_ram(mr) ||
2455 memory_region_is_romd(mr))) {
2456 /* do nothing */
2457 } else {
2458 addr1 += memory_region_get_ram_addr(mr);
2459 /* ROM/RAM case */
2460 ptr = qemu_get_ram_ptr(addr1);
2461 switch (type) {
2462 case WRITE_DATA:
2463 memcpy(ptr, buf, l);
2464 invalidate_and_set_dirty(addr1, l);
2465 break;
2466 case FLUSH_CACHE:
2467 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2468 break;
2469 }
2470 }
2471 len -= l;
2472 buf += l;
2473 addr += l;
2474 }
2475 }
2476
2477 /* used for ROM loading : can write in RAM and ROM */
2478 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2479 const uint8_t *buf, int len)
2480 {
2481 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2482 }
2483
2484 void cpu_flush_icache_range(hwaddr start, int len)
2485 {
2486 /*
2487 * This function should do the same thing as an icache flush that was
2488 * triggered from within the guest. For TCG we are always cache coherent,
2489 * so there is no need to flush anything. For KVM / Xen we need to flush
2490 * the host's instruction cache at least.
2491 */
2492 if (tcg_enabled()) {
2493 return;
2494 }
2495
2496 cpu_physical_memory_write_rom_internal(&address_space_memory,
2497 start, NULL, len, FLUSH_CACHE);
2498 }
2499
2500 typedef struct {
2501 MemoryRegion *mr;
2502 void *buffer;
2503 hwaddr addr;
2504 hwaddr len;
2505 } BounceBuffer;
2506
2507 static BounceBuffer bounce;
2508
2509 typedef struct MapClient {
2510 void *opaque;
2511 void (*callback)(void *opaque);
2512 QLIST_ENTRY(MapClient) link;
2513 } MapClient;
2514
2515 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2516 = QLIST_HEAD_INITIALIZER(map_client_list);
2517
2518 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2519 {
2520 MapClient *client = g_malloc(sizeof(*client));
2521
2522 client->opaque = opaque;
2523 client->callback = callback;
2524 QLIST_INSERT_HEAD(&map_client_list, client, link);
2525 return client;
2526 }
2527
2528 static void cpu_unregister_map_client(void *_client)
2529 {
2530 MapClient *client = (MapClient *)_client;
2531
2532 QLIST_REMOVE(client, link);
2533 g_free(client);
2534 }
2535
2536 static void cpu_notify_map_clients(void)
2537 {
2538 MapClient *client;
2539
2540 while (!QLIST_EMPTY(&map_client_list)) {
2541 client = QLIST_FIRST(&map_client_list);
2542 client->callback(client->opaque);
2543 cpu_unregister_map_client(client);
2544 }
2545 }
2546
2547 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2548 {
2549 MemoryRegion *mr;
2550 hwaddr l, xlat;
2551
2552 while (len > 0) {
2553 l = len;
2554 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2555 if (!memory_access_is_direct(mr, is_write)) {
2556 l = memory_access_size(mr, l, addr);
2557 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2558 return false;
2559 }
2560 }
2561
2562 len -= l;
2563 addr += l;
2564 }
2565 return true;
2566 }
2567
2568 /* Map a physical memory region into a host virtual address.
2569 * May map a subset of the requested range, given by and returned in *plen.
2570 * May return NULL if resources needed to perform the mapping are exhausted.
2571 * Use only for reads OR writes - not for read-modify-write operations.
2572 * Use cpu_register_map_client() to know when retrying the map operation is
2573 * likely to succeed.
2574 */
2575 void *address_space_map(AddressSpace *as,
2576 hwaddr addr,
2577 hwaddr *plen,
2578 bool is_write)
2579 {
2580 hwaddr len = *plen;
2581 hwaddr done = 0;
2582 hwaddr l, xlat, base;
2583 MemoryRegion *mr, *this_mr;
2584 ram_addr_t raddr;
2585
2586 if (len == 0) {
2587 return NULL;
2588 }
2589
2590 l = len;
2591 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2592 if (!memory_access_is_direct(mr, is_write)) {
2593 if (bounce.buffer) {
2594 return NULL;
2595 }
2596 /* Avoid unbounded allocations */
2597 l = MIN(l, TARGET_PAGE_SIZE);
2598 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2599 bounce.addr = addr;
2600 bounce.len = l;
2601
2602 memory_region_ref(mr);
2603 bounce.mr = mr;
2604 if (!is_write) {
2605 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2606 bounce.buffer, l);
2607 }
2608
2609 *plen = l;
2610 return bounce.buffer;
2611 }
2612
2613 base = xlat;
2614 raddr = memory_region_get_ram_addr(mr);
2615
2616 for (;;) {
2617 len -= l;
2618 addr += l;
2619 done += l;
2620 if (len == 0) {
2621 break;
2622 }
2623
2624 l = len;
2625 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2626 if (this_mr != mr || xlat != base + done) {
2627 break;
2628 }
2629 }
2630
2631 memory_region_ref(mr);
2632 *plen = done;
2633 return qemu_ram_ptr_length(raddr + base, plen);
2634 }
2635
2636 /* Unmaps a memory region previously mapped by address_space_map().
2637 * Will also mark the memory as dirty if is_write == 1. access_len gives
2638 * the amount of memory that was actually read or written by the caller.
2639 */
2640 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2641 int is_write, hwaddr access_len)
2642 {
2643 if (buffer != bounce.buffer) {
2644 MemoryRegion *mr;
2645 ram_addr_t addr1;
2646
2647 mr = qemu_ram_addr_from_host(buffer, &addr1);
2648 assert(mr != NULL);
2649 if (is_write) {
2650 invalidate_and_set_dirty(addr1, access_len);
2651 }
2652 if (xen_enabled()) {
2653 xen_invalidate_map_cache_entry(buffer);
2654 }
2655 memory_region_unref(mr);
2656 return;
2657 }
2658 if (is_write) {
2659 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2660 bounce.buffer, access_len);
2661 }
2662 qemu_vfree(bounce.buffer);
2663 bounce.buffer = NULL;
2664 memory_region_unref(bounce.mr);
2665 cpu_notify_map_clients();
2666 }
2667
2668 void *cpu_physical_memory_map(hwaddr addr,
2669 hwaddr *plen,
2670 int is_write)
2671 {
2672 return address_space_map(&address_space_memory, addr, plen, is_write);
2673 }
2674
2675 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2676 int is_write, hwaddr access_len)
2677 {
2678 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2679 }
2680
2681 /* warning: addr must be aligned */
2682 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2683 enum device_endian endian)
2684 {
2685 uint8_t *ptr;
2686 uint64_t val;
2687 MemoryRegion *mr;
2688 hwaddr l = 4;
2689 hwaddr addr1;
2690
2691 mr = address_space_translate(as, addr, &addr1, &l, false);
2692 if (l < 4 || !memory_access_is_direct(mr, false)) {
2693 /* I/O case */
2694 memory_region_dispatch_read(mr, addr1, &val, 4,
2695 MEMTXATTRS_UNSPECIFIED);
2696 #if defined(TARGET_WORDS_BIGENDIAN)
2697 if (endian == DEVICE_LITTLE_ENDIAN) {
2698 val = bswap32(val);
2699 }
2700 #else
2701 if (endian == DEVICE_BIG_ENDIAN) {
2702 val = bswap32(val);
2703 }
2704 #endif
2705 } else {
2706 /* RAM case */
2707 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2708 & TARGET_PAGE_MASK)
2709 + addr1);
2710 switch (endian) {
2711 case DEVICE_LITTLE_ENDIAN:
2712 val = ldl_le_p(ptr);
2713 break;
2714 case DEVICE_BIG_ENDIAN:
2715 val = ldl_be_p(ptr);
2716 break;
2717 default:
2718 val = ldl_p(ptr);
2719 break;
2720 }
2721 }
2722 return val;
2723 }
2724
2725 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2726 {
2727 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2728 }
2729
2730 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2731 {
2732 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2733 }
2734
2735 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2736 {
2737 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2738 }
2739
2740 /* warning: addr must be aligned */
2741 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2742 enum device_endian endian)
2743 {
2744 uint8_t *ptr;
2745 uint64_t val;
2746 MemoryRegion *mr;
2747 hwaddr l = 8;
2748 hwaddr addr1;
2749
2750 mr = address_space_translate(as, addr, &addr1, &l,
2751 false);
2752 if (l < 8 || !memory_access_is_direct(mr, false)) {
2753 /* I/O case */
2754 memory_region_dispatch_read(mr, addr1, &val, 8,
2755 MEMTXATTRS_UNSPECIFIED);
2756 #if defined(TARGET_WORDS_BIGENDIAN)
2757 if (endian == DEVICE_LITTLE_ENDIAN) {
2758 val = bswap64(val);
2759 }
2760 #else
2761 if (endian == DEVICE_BIG_ENDIAN) {
2762 val = bswap64(val);
2763 }
2764 #endif
2765 } else {
2766 /* RAM case */
2767 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2768 & TARGET_PAGE_MASK)
2769 + addr1);
2770 switch (endian) {
2771 case DEVICE_LITTLE_ENDIAN:
2772 val = ldq_le_p(ptr);
2773 break;
2774 case DEVICE_BIG_ENDIAN:
2775 val = ldq_be_p(ptr);
2776 break;
2777 default:
2778 val = ldq_p(ptr);
2779 break;
2780 }
2781 }
2782 return val;
2783 }
2784
2785 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2786 {
2787 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2788 }
2789
2790 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2791 {
2792 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2793 }
2794
2795 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2796 {
2797 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2798 }
2799
2800 /* XXX: optimize */
2801 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2802 {
2803 uint8_t val;
2804 address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED, &val, 1, 0);
2805 return val;
2806 }
2807
2808 /* warning: addr must be aligned */
2809 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2810 enum device_endian endian)
2811 {
2812 uint8_t *ptr;
2813 uint64_t val;
2814 MemoryRegion *mr;
2815 hwaddr l = 2;
2816 hwaddr addr1;
2817
2818 mr = address_space_translate(as, addr, &addr1, &l,
2819 false);
2820 if (l < 2 || !memory_access_is_direct(mr, false)) {
2821 /* I/O case */
2822 memory_region_dispatch_read(mr, addr1, &val, 2,
2823 MEMTXATTRS_UNSPECIFIED);
2824 #if defined(TARGET_WORDS_BIGENDIAN)
2825 if (endian == DEVICE_LITTLE_ENDIAN) {
2826 val = bswap16(val);
2827 }
2828 #else
2829 if (endian == DEVICE_BIG_ENDIAN) {
2830 val = bswap16(val);
2831 }
2832 #endif
2833 } else {
2834 /* RAM case */
2835 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2836 & TARGET_PAGE_MASK)
2837 + addr1);
2838 switch (endian) {
2839 case DEVICE_LITTLE_ENDIAN:
2840 val = lduw_le_p(ptr);
2841 break;
2842 case DEVICE_BIG_ENDIAN:
2843 val = lduw_be_p(ptr);
2844 break;
2845 default:
2846 val = lduw_p(ptr);
2847 break;
2848 }
2849 }
2850 return val;
2851 }
2852
2853 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2854 {
2855 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2856 }
2857
2858 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2859 {
2860 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2861 }
2862
2863 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2864 {
2865 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2866 }
2867
2868 /* warning: addr must be aligned. The ram page is not masked as dirty
2869 and the code inside is not invalidated. It is useful if the dirty
2870 bits are used to track modified PTEs */
2871 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2872 {
2873 uint8_t *ptr;
2874 MemoryRegion *mr;
2875 hwaddr l = 4;
2876 hwaddr addr1;
2877
2878 mr = address_space_translate(as, addr, &addr1, &l,
2879 true);
2880 if (l < 4 || !memory_access_is_direct(mr, true)) {
2881 memory_region_dispatch_write(mr, addr1, val, 4,
2882 MEMTXATTRS_UNSPECIFIED);
2883 } else {
2884 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2885 ptr = qemu_get_ram_ptr(addr1);
2886 stl_p(ptr, val);
2887
2888 if (unlikely(in_migration)) {
2889 if (cpu_physical_memory_is_clean(addr1)) {
2890 /* invalidate code */
2891 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2892 /* set dirty bit */
2893 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2894 }
2895 }
2896 }
2897 }
2898
2899 /* warning: addr must be aligned */
2900 static inline void stl_phys_internal(AddressSpace *as,
2901 hwaddr addr, uint32_t val,
2902 enum device_endian endian)
2903 {
2904 uint8_t *ptr;
2905 MemoryRegion *mr;
2906 hwaddr l = 4;
2907 hwaddr addr1;
2908
2909 mr = address_space_translate(as, addr, &addr1, &l,
2910 true);
2911 if (l < 4 || !memory_access_is_direct(mr, true)) {
2912 #if defined(TARGET_WORDS_BIGENDIAN)
2913 if (endian == DEVICE_LITTLE_ENDIAN) {
2914 val = bswap32(val);
2915 }
2916 #else
2917 if (endian == DEVICE_BIG_ENDIAN) {
2918 val = bswap32(val);
2919 }
2920 #endif
2921 memory_region_dispatch_write(mr, addr1, val, 4,
2922 MEMTXATTRS_UNSPECIFIED);
2923 } else {
2924 /* RAM case */
2925 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2926 ptr = qemu_get_ram_ptr(addr1);
2927 switch (endian) {
2928 case DEVICE_LITTLE_ENDIAN:
2929 stl_le_p(ptr, val);
2930 break;
2931 case DEVICE_BIG_ENDIAN:
2932 stl_be_p(ptr, val);
2933 break;
2934 default:
2935 stl_p(ptr, val);
2936 break;
2937 }
2938 invalidate_and_set_dirty(addr1, 4);
2939 }
2940 }
2941
2942 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2943 {
2944 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2945 }
2946
2947 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2948 {
2949 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2950 }
2951
2952 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2953 {
2954 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2955 }
2956
2957 /* XXX: optimize */
2958 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2959 {
2960 uint8_t v = val;
2961 address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED, &v, 1, 1);
2962 }
2963
2964 /* warning: addr must be aligned */
2965 static inline void stw_phys_internal(AddressSpace *as,
2966 hwaddr addr, uint32_t val,
2967 enum device_endian endian)
2968 {
2969 uint8_t *ptr;
2970 MemoryRegion *mr;
2971 hwaddr l = 2;
2972 hwaddr addr1;
2973
2974 mr = address_space_translate(as, addr, &addr1, &l, true);
2975 if (l < 2 || !memory_access_is_direct(mr, true)) {
2976 #if defined(TARGET_WORDS_BIGENDIAN)
2977 if (endian == DEVICE_LITTLE_ENDIAN) {
2978 val = bswap16(val);
2979 }
2980 #else
2981 if (endian == DEVICE_BIG_ENDIAN) {
2982 val = bswap16(val);
2983 }
2984 #endif
2985 memory_region_dispatch_write(mr, addr1, val, 2,
2986 MEMTXATTRS_UNSPECIFIED);
2987 } else {
2988 /* RAM case */
2989 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2990 ptr = qemu_get_ram_ptr(addr1);
2991 switch (endian) {
2992 case DEVICE_LITTLE_ENDIAN:
2993 stw_le_p(ptr, val);
2994 break;
2995 case DEVICE_BIG_ENDIAN:
2996 stw_be_p(ptr, val);
2997 break;
2998 default:
2999 stw_p(ptr, val);
3000 break;
3001 }
3002 invalidate_and_set_dirty(addr1, 2);
3003 }
3004 }
3005
3006 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3007 {
3008 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
3009 }
3010
3011 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3012 {
3013 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
3014 }
3015
3016 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3017 {
3018 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
3019 }
3020
3021 /* XXX: optimize */
3022 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3023 {
3024 val = tswap64(val);
3025 address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED, (void *) &val, 8, 1);
3026 }
3027
3028 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3029 {
3030 val = cpu_to_le64(val);
3031 address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED, (void *) &val, 8, 1);
3032 }
3033
3034 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3035 {
3036 val = cpu_to_be64(val);
3037 address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED, (void *) &val, 8, 1);
3038 }
3039
3040 /* virtual memory access for debug (includes writing to ROM) */
3041 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3042 uint8_t *buf, int len, int is_write)
3043 {
3044 int l;
3045 hwaddr phys_addr;
3046 target_ulong page;
3047
3048 while (len > 0) {
3049 page = addr & TARGET_PAGE_MASK;
3050 phys_addr = cpu_get_phys_page_debug(cpu, page);
3051 /* if no physical page mapped, return an error */
3052 if (phys_addr == -1)
3053 return -1;
3054 l = (page + TARGET_PAGE_SIZE) - addr;
3055 if (l > len)
3056 l = len;
3057 phys_addr += (addr & ~TARGET_PAGE_MASK);
3058 if (is_write) {
3059 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3060 } else {
3061 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3062 buf, l, 0);
3063 }
3064 len -= l;
3065 buf += l;
3066 addr += l;
3067 }
3068 return 0;
3069 }
3070 #endif
3071
3072 /*
3073 * A helper function for the _utterly broken_ virtio device model to find out if
3074 * it's running on a big endian machine. Don't do this at home kids!
3075 */
3076 bool target_words_bigendian(void);
3077 bool target_words_bigendian(void)
3078 {
3079 #if defined(TARGET_WORDS_BIGENDIAN)
3080 return true;
3081 #else
3082 return false;
3083 #endif
3084 }
3085
3086 #ifndef CONFIG_USER_ONLY
3087 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3088 {
3089 MemoryRegion*mr;
3090 hwaddr l = 1;
3091
3092 mr = address_space_translate(&address_space_memory,
3093 phys_addr, &phys_addr, &l, false);
3094
3095 return !(memory_region_is_ram(mr) ||
3096 memory_region_is_romd(mr));
3097 }
3098
3099 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3100 {
3101 RAMBlock *block;
3102
3103 rcu_read_lock();
3104 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3105 func(block->host, block->offset, block->used_length, opaque);
3106 }
3107 rcu_read_unlock();
3108 }
3109 #endif