]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
translate-all: remove unnecessary argument to tb_invalidate_phys_range
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
53
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
56
57 #include "qemu/range.h"
58
59 //#define DEBUG_SUBPAGE
60
61 #if !defined(CONFIG_USER_ONLY)
62 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
63 * are protected by the ramlist lock.
64 */
65 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
66
67 static MemoryRegion *system_memory;
68 static MemoryRegion *system_io;
69
70 AddressSpace address_space_io;
71 AddressSpace address_space_memory;
72
73 MemoryRegion io_mem_rom, io_mem_notdirty;
74 static MemoryRegion io_mem_unassigned;
75
76 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
77 #define RAM_PREALLOC (1 << 0)
78
79 /* RAM is mmap-ed with MAP_SHARED */
80 #define RAM_SHARED (1 << 1)
81
82 /* Only a portion of RAM (used_length) is actually used, and migrated.
83 * This used_length size can change across reboots.
84 */
85 #define RAM_RESIZEABLE (1 << 2)
86
87 #endif
88
89 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
90 /* current CPU in the current thread. It is only valid inside
91 cpu_exec() */
92 DEFINE_TLS(CPUState *, current_cpu);
93 /* 0 = Do not count executed instructions.
94 1 = Precise instruction counting.
95 2 = Adaptive rate instruction counting. */
96 int use_icount;
97
98 #if !defined(CONFIG_USER_ONLY)
99
100 typedef struct PhysPageEntry PhysPageEntry;
101
102 struct PhysPageEntry {
103 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
104 uint32_t skip : 6;
105 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
106 uint32_t ptr : 26;
107 };
108
109 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
110
111 /* Size of the L2 (and L3, etc) page tables. */
112 #define ADDR_SPACE_BITS 64
113
114 #define P_L2_BITS 9
115 #define P_L2_SIZE (1 << P_L2_BITS)
116
117 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
118
119 typedef PhysPageEntry Node[P_L2_SIZE];
120
121 typedef struct PhysPageMap {
122 struct rcu_head rcu;
123
124 unsigned sections_nb;
125 unsigned sections_nb_alloc;
126 unsigned nodes_nb;
127 unsigned nodes_nb_alloc;
128 Node *nodes;
129 MemoryRegionSection *sections;
130 } PhysPageMap;
131
132 struct AddressSpaceDispatch {
133 struct rcu_head rcu;
134
135 /* This is a multi-level map on the physical address space.
136 * The bottom level has pointers to MemoryRegionSections.
137 */
138 PhysPageEntry phys_map;
139 PhysPageMap map;
140 AddressSpace *as;
141 };
142
143 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
144 typedef struct subpage_t {
145 MemoryRegion iomem;
146 AddressSpace *as;
147 hwaddr base;
148 uint16_t sub_section[TARGET_PAGE_SIZE];
149 } subpage_t;
150
151 #define PHYS_SECTION_UNASSIGNED 0
152 #define PHYS_SECTION_NOTDIRTY 1
153 #define PHYS_SECTION_ROM 2
154 #define PHYS_SECTION_WATCH 3
155
156 static void io_mem_init(void);
157 static void memory_map_init(void);
158 static void tcg_commit(MemoryListener *listener);
159
160 static MemoryRegion io_mem_watch;
161 #endif
162
163 #if !defined(CONFIG_USER_ONLY)
164
165 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
166 {
167 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
168 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
169 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
170 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
171 }
172 }
173
174 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
175 {
176 unsigned i;
177 uint32_t ret;
178 PhysPageEntry e;
179 PhysPageEntry *p;
180
181 ret = map->nodes_nb++;
182 p = map->nodes[ret];
183 assert(ret != PHYS_MAP_NODE_NIL);
184 assert(ret != map->nodes_nb_alloc);
185
186 e.skip = leaf ? 0 : 1;
187 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
188 for (i = 0; i < P_L2_SIZE; ++i) {
189 memcpy(&p[i], &e, sizeof(e));
190 }
191 return ret;
192 }
193
194 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
195 hwaddr *index, hwaddr *nb, uint16_t leaf,
196 int level)
197 {
198 PhysPageEntry *p;
199 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
200
201 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
202 lp->ptr = phys_map_node_alloc(map, level == 0);
203 }
204 p = map->nodes[lp->ptr];
205 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
206
207 while (*nb && lp < &p[P_L2_SIZE]) {
208 if ((*index & (step - 1)) == 0 && *nb >= step) {
209 lp->skip = 0;
210 lp->ptr = leaf;
211 *index += step;
212 *nb -= step;
213 } else {
214 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
215 }
216 ++lp;
217 }
218 }
219
220 static void phys_page_set(AddressSpaceDispatch *d,
221 hwaddr index, hwaddr nb,
222 uint16_t leaf)
223 {
224 /* Wildly overreserve - it doesn't matter much. */
225 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
226
227 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
228 }
229
230 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
231 * and update our entry so we can skip it and go directly to the destination.
232 */
233 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
234 {
235 unsigned valid_ptr = P_L2_SIZE;
236 int valid = 0;
237 PhysPageEntry *p;
238 int i;
239
240 if (lp->ptr == PHYS_MAP_NODE_NIL) {
241 return;
242 }
243
244 p = nodes[lp->ptr];
245 for (i = 0; i < P_L2_SIZE; i++) {
246 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
247 continue;
248 }
249
250 valid_ptr = i;
251 valid++;
252 if (p[i].skip) {
253 phys_page_compact(&p[i], nodes, compacted);
254 }
255 }
256
257 /* We can only compress if there's only one child. */
258 if (valid != 1) {
259 return;
260 }
261
262 assert(valid_ptr < P_L2_SIZE);
263
264 /* Don't compress if it won't fit in the # of bits we have. */
265 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
266 return;
267 }
268
269 lp->ptr = p[valid_ptr].ptr;
270 if (!p[valid_ptr].skip) {
271 /* If our only child is a leaf, make this a leaf. */
272 /* By design, we should have made this node a leaf to begin with so we
273 * should never reach here.
274 * But since it's so simple to handle this, let's do it just in case we
275 * change this rule.
276 */
277 lp->skip = 0;
278 } else {
279 lp->skip += p[valid_ptr].skip;
280 }
281 }
282
283 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
284 {
285 DECLARE_BITMAP(compacted, nodes_nb);
286
287 if (d->phys_map.skip) {
288 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
289 }
290 }
291
292 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
293 Node *nodes, MemoryRegionSection *sections)
294 {
295 PhysPageEntry *p;
296 hwaddr index = addr >> TARGET_PAGE_BITS;
297 int i;
298
299 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
300 if (lp.ptr == PHYS_MAP_NODE_NIL) {
301 return &sections[PHYS_SECTION_UNASSIGNED];
302 }
303 p = nodes[lp.ptr];
304 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
305 }
306
307 if (sections[lp.ptr].size.hi ||
308 range_covers_byte(sections[lp.ptr].offset_within_address_space,
309 sections[lp.ptr].size.lo, addr)) {
310 return &sections[lp.ptr];
311 } else {
312 return &sections[PHYS_SECTION_UNASSIGNED];
313 }
314 }
315
316 bool memory_region_is_unassigned(MemoryRegion *mr)
317 {
318 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
319 && mr != &io_mem_watch;
320 }
321
322 /* Called from RCU critical section */
323 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
324 hwaddr addr,
325 bool resolve_subpage)
326 {
327 MemoryRegionSection *section;
328 subpage_t *subpage;
329
330 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
331 if (resolve_subpage && section->mr->subpage) {
332 subpage = container_of(section->mr, subpage_t, iomem);
333 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
334 }
335 return section;
336 }
337
338 /* Called from RCU critical section */
339 static MemoryRegionSection *
340 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
341 hwaddr *plen, bool resolve_subpage)
342 {
343 MemoryRegionSection *section;
344 Int128 diff;
345
346 section = address_space_lookup_region(d, addr, resolve_subpage);
347 /* Compute offset within MemoryRegionSection */
348 addr -= section->offset_within_address_space;
349
350 /* Compute offset within MemoryRegion */
351 *xlat = addr + section->offset_within_region;
352
353 diff = int128_sub(section->mr->size, int128_make64(addr));
354 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
355 return section;
356 }
357
358 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
359 {
360 if (memory_region_is_ram(mr)) {
361 return !(is_write && mr->readonly);
362 }
363 if (memory_region_is_romd(mr)) {
364 return !is_write;
365 }
366
367 return false;
368 }
369
370 /* Called from RCU critical section */
371 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
372 hwaddr *xlat, hwaddr *plen,
373 bool is_write)
374 {
375 IOMMUTLBEntry iotlb;
376 MemoryRegionSection *section;
377 MemoryRegion *mr;
378
379 for (;;) {
380 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
381 section = address_space_translate_internal(d, addr, &addr, plen, true);
382 mr = section->mr;
383
384 if (!mr->iommu_ops) {
385 break;
386 }
387
388 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
389 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
390 | (addr & iotlb.addr_mask));
391 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
392 if (!(iotlb.perm & (1 << is_write))) {
393 mr = &io_mem_unassigned;
394 break;
395 }
396
397 as = iotlb.target_as;
398 }
399
400 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
401 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
402 *plen = MIN(page, *plen);
403 }
404
405 *xlat = addr;
406 return mr;
407 }
408
409 /* Called from RCU critical section */
410 MemoryRegionSection *
411 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
412 hwaddr *xlat, hwaddr *plen)
413 {
414 MemoryRegionSection *section;
415 section = address_space_translate_internal(cpu->memory_dispatch,
416 addr, xlat, plen, false);
417
418 assert(!section->mr->iommu_ops);
419 return section;
420 }
421 #endif
422
423 #if !defined(CONFIG_USER_ONLY)
424
425 static int cpu_common_post_load(void *opaque, int version_id)
426 {
427 CPUState *cpu = opaque;
428
429 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
430 version_id is increased. */
431 cpu->interrupt_request &= ~0x01;
432 tlb_flush(cpu, 1);
433
434 return 0;
435 }
436
437 static int cpu_common_pre_load(void *opaque)
438 {
439 CPUState *cpu = opaque;
440
441 cpu->exception_index = -1;
442
443 return 0;
444 }
445
446 static bool cpu_common_exception_index_needed(void *opaque)
447 {
448 CPUState *cpu = opaque;
449
450 return tcg_enabled() && cpu->exception_index != -1;
451 }
452
453 static const VMStateDescription vmstate_cpu_common_exception_index = {
454 .name = "cpu_common/exception_index",
455 .version_id = 1,
456 .minimum_version_id = 1,
457 .fields = (VMStateField[]) {
458 VMSTATE_INT32(exception_index, CPUState),
459 VMSTATE_END_OF_LIST()
460 }
461 };
462
463 const VMStateDescription vmstate_cpu_common = {
464 .name = "cpu_common",
465 .version_id = 1,
466 .minimum_version_id = 1,
467 .pre_load = cpu_common_pre_load,
468 .post_load = cpu_common_post_load,
469 .fields = (VMStateField[]) {
470 VMSTATE_UINT32(halted, CPUState),
471 VMSTATE_UINT32(interrupt_request, CPUState),
472 VMSTATE_END_OF_LIST()
473 },
474 .subsections = (VMStateSubsection[]) {
475 {
476 .vmsd = &vmstate_cpu_common_exception_index,
477 .needed = cpu_common_exception_index_needed,
478 } , {
479 /* empty */
480 }
481 }
482 };
483
484 #endif
485
486 CPUState *qemu_get_cpu(int index)
487 {
488 CPUState *cpu;
489
490 CPU_FOREACH(cpu) {
491 if (cpu->cpu_index == index) {
492 return cpu;
493 }
494 }
495
496 return NULL;
497 }
498
499 #if !defined(CONFIG_USER_ONLY)
500 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
501 {
502 /* We only support one address space per cpu at the moment. */
503 assert(cpu->as == as);
504
505 if (cpu->tcg_as_listener) {
506 memory_listener_unregister(cpu->tcg_as_listener);
507 } else {
508 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
509 }
510 cpu->tcg_as_listener->commit = tcg_commit;
511 memory_listener_register(cpu->tcg_as_listener, as);
512 }
513 #endif
514
515 void cpu_exec_init(CPUArchState *env)
516 {
517 CPUState *cpu = ENV_GET_CPU(env);
518 CPUClass *cc = CPU_GET_CLASS(cpu);
519 CPUState *some_cpu;
520 int cpu_index;
521
522 #if defined(CONFIG_USER_ONLY)
523 cpu_list_lock();
524 #endif
525 cpu_index = 0;
526 CPU_FOREACH(some_cpu) {
527 cpu_index++;
528 }
529 cpu->cpu_index = cpu_index;
530 cpu->numa_node = 0;
531 QTAILQ_INIT(&cpu->breakpoints);
532 QTAILQ_INIT(&cpu->watchpoints);
533 #ifndef CONFIG_USER_ONLY
534 cpu->as = &address_space_memory;
535 cpu->thread_id = qemu_get_thread_id();
536 cpu_reload_memory_map(cpu);
537 #endif
538 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
539 #if defined(CONFIG_USER_ONLY)
540 cpu_list_unlock();
541 #endif
542 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
543 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
544 }
545 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
546 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
547 cpu_save, cpu_load, env);
548 assert(cc->vmsd == NULL);
549 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
550 #endif
551 if (cc->vmsd != NULL) {
552 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
553 }
554 }
555
556 #if defined(CONFIG_USER_ONLY)
557 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
558 {
559 tb_invalidate_phys_page_range(pc, pc + 1, 0);
560 }
561 #else
562 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
563 {
564 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
565 if (phys != -1) {
566 tb_invalidate_phys_addr(cpu->as,
567 phys | (pc & ~TARGET_PAGE_MASK));
568 }
569 }
570 #endif
571
572 #if defined(CONFIG_USER_ONLY)
573 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
574
575 {
576 }
577
578 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
579 int flags)
580 {
581 return -ENOSYS;
582 }
583
584 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
585 {
586 }
587
588 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
589 int flags, CPUWatchpoint **watchpoint)
590 {
591 return -ENOSYS;
592 }
593 #else
594 /* Add a watchpoint. */
595 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
596 int flags, CPUWatchpoint **watchpoint)
597 {
598 CPUWatchpoint *wp;
599
600 /* forbid ranges which are empty or run off the end of the address space */
601 if (len == 0 || (addr + len - 1) < addr) {
602 error_report("tried to set invalid watchpoint at %"
603 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
604 return -EINVAL;
605 }
606 wp = g_malloc(sizeof(*wp));
607
608 wp->vaddr = addr;
609 wp->len = len;
610 wp->flags = flags;
611
612 /* keep all GDB-injected watchpoints in front */
613 if (flags & BP_GDB) {
614 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
615 } else {
616 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
617 }
618
619 tlb_flush_page(cpu, addr);
620
621 if (watchpoint)
622 *watchpoint = wp;
623 return 0;
624 }
625
626 /* Remove a specific watchpoint. */
627 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
628 int flags)
629 {
630 CPUWatchpoint *wp;
631
632 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
633 if (addr == wp->vaddr && len == wp->len
634 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
635 cpu_watchpoint_remove_by_ref(cpu, wp);
636 return 0;
637 }
638 }
639 return -ENOENT;
640 }
641
642 /* Remove a specific watchpoint by reference. */
643 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
644 {
645 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
646
647 tlb_flush_page(cpu, watchpoint->vaddr);
648
649 g_free(watchpoint);
650 }
651
652 /* Remove all matching watchpoints. */
653 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
654 {
655 CPUWatchpoint *wp, *next;
656
657 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
658 if (wp->flags & mask) {
659 cpu_watchpoint_remove_by_ref(cpu, wp);
660 }
661 }
662 }
663
664 /* Return true if this watchpoint address matches the specified
665 * access (ie the address range covered by the watchpoint overlaps
666 * partially or completely with the address range covered by the
667 * access).
668 */
669 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
670 vaddr addr,
671 vaddr len)
672 {
673 /* We know the lengths are non-zero, but a little caution is
674 * required to avoid errors in the case where the range ends
675 * exactly at the top of the address space and so addr + len
676 * wraps round to zero.
677 */
678 vaddr wpend = wp->vaddr + wp->len - 1;
679 vaddr addrend = addr + len - 1;
680
681 return !(addr > wpend || wp->vaddr > addrend);
682 }
683
684 #endif
685
686 /* Add a breakpoint. */
687 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
688 CPUBreakpoint **breakpoint)
689 {
690 CPUBreakpoint *bp;
691
692 bp = g_malloc(sizeof(*bp));
693
694 bp->pc = pc;
695 bp->flags = flags;
696
697 /* keep all GDB-injected breakpoints in front */
698 if (flags & BP_GDB) {
699 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
700 } else {
701 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
702 }
703
704 breakpoint_invalidate(cpu, pc);
705
706 if (breakpoint) {
707 *breakpoint = bp;
708 }
709 return 0;
710 }
711
712 /* Remove a specific breakpoint. */
713 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
714 {
715 CPUBreakpoint *bp;
716
717 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
718 if (bp->pc == pc && bp->flags == flags) {
719 cpu_breakpoint_remove_by_ref(cpu, bp);
720 return 0;
721 }
722 }
723 return -ENOENT;
724 }
725
726 /* Remove a specific breakpoint by reference. */
727 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
728 {
729 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
730
731 breakpoint_invalidate(cpu, breakpoint->pc);
732
733 g_free(breakpoint);
734 }
735
736 /* Remove all matching breakpoints. */
737 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
738 {
739 CPUBreakpoint *bp, *next;
740
741 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
742 if (bp->flags & mask) {
743 cpu_breakpoint_remove_by_ref(cpu, bp);
744 }
745 }
746 }
747
748 /* enable or disable single step mode. EXCP_DEBUG is returned by the
749 CPU loop after each instruction */
750 void cpu_single_step(CPUState *cpu, int enabled)
751 {
752 if (cpu->singlestep_enabled != enabled) {
753 cpu->singlestep_enabled = enabled;
754 if (kvm_enabled()) {
755 kvm_update_guest_debug(cpu, 0);
756 } else {
757 /* must flush all the translated code to avoid inconsistencies */
758 /* XXX: only flush what is necessary */
759 CPUArchState *env = cpu->env_ptr;
760 tb_flush(env);
761 }
762 }
763 }
764
765 void cpu_abort(CPUState *cpu, const char *fmt, ...)
766 {
767 va_list ap;
768 va_list ap2;
769
770 va_start(ap, fmt);
771 va_copy(ap2, ap);
772 fprintf(stderr, "qemu: fatal: ");
773 vfprintf(stderr, fmt, ap);
774 fprintf(stderr, "\n");
775 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
776 if (qemu_log_enabled()) {
777 qemu_log("qemu: fatal: ");
778 qemu_log_vprintf(fmt, ap2);
779 qemu_log("\n");
780 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
781 qemu_log_flush();
782 qemu_log_close();
783 }
784 va_end(ap2);
785 va_end(ap);
786 #if defined(CONFIG_USER_ONLY)
787 {
788 struct sigaction act;
789 sigfillset(&act.sa_mask);
790 act.sa_handler = SIG_DFL;
791 sigaction(SIGABRT, &act, NULL);
792 }
793 #endif
794 abort();
795 }
796
797 #if !defined(CONFIG_USER_ONLY)
798 /* Called from RCU critical section */
799 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
800 {
801 RAMBlock *block;
802
803 block = atomic_rcu_read(&ram_list.mru_block);
804 if (block && addr - block->offset < block->max_length) {
805 goto found;
806 }
807 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
808 if (addr - block->offset < block->max_length) {
809 goto found;
810 }
811 }
812
813 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
814 abort();
815
816 found:
817 /* It is safe to write mru_block outside the iothread lock. This
818 * is what happens:
819 *
820 * mru_block = xxx
821 * rcu_read_unlock()
822 * xxx removed from list
823 * rcu_read_lock()
824 * read mru_block
825 * mru_block = NULL;
826 * call_rcu(reclaim_ramblock, xxx);
827 * rcu_read_unlock()
828 *
829 * atomic_rcu_set is not needed here. The block was already published
830 * when it was placed into the list. Here we're just making an extra
831 * copy of the pointer.
832 */
833 ram_list.mru_block = block;
834 return block;
835 }
836
837 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
838 {
839 ram_addr_t start1;
840 RAMBlock *block;
841 ram_addr_t end;
842
843 end = TARGET_PAGE_ALIGN(start + length);
844 start &= TARGET_PAGE_MASK;
845
846 rcu_read_lock();
847 block = qemu_get_ram_block(start);
848 assert(block == qemu_get_ram_block(end - 1));
849 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
850 cpu_tlb_reset_dirty_all(start1, length);
851 rcu_read_unlock();
852 }
853
854 /* Note: start and end must be within the same ram block. */
855 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
856 unsigned client)
857 {
858 if (length == 0)
859 return;
860 cpu_physical_memory_clear_dirty_range_type(start, length, client);
861
862 if (tcg_enabled()) {
863 tlb_reset_dirty_range_all(start, length);
864 }
865 }
866
867 /* Called from RCU critical section */
868 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
869 MemoryRegionSection *section,
870 target_ulong vaddr,
871 hwaddr paddr, hwaddr xlat,
872 int prot,
873 target_ulong *address)
874 {
875 hwaddr iotlb;
876 CPUWatchpoint *wp;
877
878 if (memory_region_is_ram(section->mr)) {
879 /* Normal RAM. */
880 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
881 + xlat;
882 if (!section->readonly) {
883 iotlb |= PHYS_SECTION_NOTDIRTY;
884 } else {
885 iotlb |= PHYS_SECTION_ROM;
886 }
887 } else {
888 iotlb = section - section->address_space->dispatch->map.sections;
889 iotlb += xlat;
890 }
891
892 /* Make accesses to pages with watchpoints go via the
893 watchpoint trap routines. */
894 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
895 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
896 /* Avoid trapping reads of pages with a write breakpoint. */
897 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
898 iotlb = PHYS_SECTION_WATCH + paddr;
899 *address |= TLB_MMIO;
900 break;
901 }
902 }
903 }
904
905 return iotlb;
906 }
907 #endif /* defined(CONFIG_USER_ONLY) */
908
909 #if !defined(CONFIG_USER_ONLY)
910
911 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
912 uint16_t section);
913 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
914
915 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
916 qemu_anon_ram_alloc;
917
918 /*
919 * Set a custom physical guest memory alloator.
920 * Accelerators with unusual needs may need this. Hopefully, we can
921 * get rid of it eventually.
922 */
923 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
924 {
925 phys_mem_alloc = alloc;
926 }
927
928 static uint16_t phys_section_add(PhysPageMap *map,
929 MemoryRegionSection *section)
930 {
931 /* The physical section number is ORed with a page-aligned
932 * pointer to produce the iotlb entries. Thus it should
933 * never overflow into the page-aligned value.
934 */
935 assert(map->sections_nb < TARGET_PAGE_SIZE);
936
937 if (map->sections_nb == map->sections_nb_alloc) {
938 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
939 map->sections = g_renew(MemoryRegionSection, map->sections,
940 map->sections_nb_alloc);
941 }
942 map->sections[map->sections_nb] = *section;
943 memory_region_ref(section->mr);
944 return map->sections_nb++;
945 }
946
947 static void phys_section_destroy(MemoryRegion *mr)
948 {
949 memory_region_unref(mr);
950
951 if (mr->subpage) {
952 subpage_t *subpage = container_of(mr, subpage_t, iomem);
953 object_unref(OBJECT(&subpage->iomem));
954 g_free(subpage);
955 }
956 }
957
958 static void phys_sections_free(PhysPageMap *map)
959 {
960 while (map->sections_nb > 0) {
961 MemoryRegionSection *section = &map->sections[--map->sections_nb];
962 phys_section_destroy(section->mr);
963 }
964 g_free(map->sections);
965 g_free(map->nodes);
966 }
967
968 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
969 {
970 subpage_t *subpage;
971 hwaddr base = section->offset_within_address_space
972 & TARGET_PAGE_MASK;
973 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
974 d->map.nodes, d->map.sections);
975 MemoryRegionSection subsection = {
976 .offset_within_address_space = base,
977 .size = int128_make64(TARGET_PAGE_SIZE),
978 };
979 hwaddr start, end;
980
981 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
982
983 if (!(existing->mr->subpage)) {
984 subpage = subpage_init(d->as, base);
985 subsection.address_space = d->as;
986 subsection.mr = &subpage->iomem;
987 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
988 phys_section_add(&d->map, &subsection));
989 } else {
990 subpage = container_of(existing->mr, subpage_t, iomem);
991 }
992 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
993 end = start + int128_get64(section->size) - 1;
994 subpage_register(subpage, start, end,
995 phys_section_add(&d->map, section));
996 }
997
998
999 static void register_multipage(AddressSpaceDispatch *d,
1000 MemoryRegionSection *section)
1001 {
1002 hwaddr start_addr = section->offset_within_address_space;
1003 uint16_t section_index = phys_section_add(&d->map, section);
1004 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1005 TARGET_PAGE_BITS));
1006
1007 assert(num_pages);
1008 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1009 }
1010
1011 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1012 {
1013 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1014 AddressSpaceDispatch *d = as->next_dispatch;
1015 MemoryRegionSection now = *section, remain = *section;
1016 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1017
1018 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1019 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1020 - now.offset_within_address_space;
1021
1022 now.size = int128_min(int128_make64(left), now.size);
1023 register_subpage(d, &now);
1024 } else {
1025 now.size = int128_zero();
1026 }
1027 while (int128_ne(remain.size, now.size)) {
1028 remain.size = int128_sub(remain.size, now.size);
1029 remain.offset_within_address_space += int128_get64(now.size);
1030 remain.offset_within_region += int128_get64(now.size);
1031 now = remain;
1032 if (int128_lt(remain.size, page_size)) {
1033 register_subpage(d, &now);
1034 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1035 now.size = page_size;
1036 register_subpage(d, &now);
1037 } else {
1038 now.size = int128_and(now.size, int128_neg(page_size));
1039 register_multipage(d, &now);
1040 }
1041 }
1042 }
1043
1044 void qemu_flush_coalesced_mmio_buffer(void)
1045 {
1046 if (kvm_enabled())
1047 kvm_flush_coalesced_mmio_buffer();
1048 }
1049
1050 void qemu_mutex_lock_ramlist(void)
1051 {
1052 qemu_mutex_lock(&ram_list.mutex);
1053 }
1054
1055 void qemu_mutex_unlock_ramlist(void)
1056 {
1057 qemu_mutex_unlock(&ram_list.mutex);
1058 }
1059
1060 #ifdef __linux__
1061
1062 #include <sys/vfs.h>
1063
1064 #define HUGETLBFS_MAGIC 0x958458f6
1065
1066 static long gethugepagesize(const char *path, Error **errp)
1067 {
1068 struct statfs fs;
1069 int ret;
1070
1071 do {
1072 ret = statfs(path, &fs);
1073 } while (ret != 0 && errno == EINTR);
1074
1075 if (ret != 0) {
1076 error_setg_errno(errp, errno, "failed to get page size of file %s",
1077 path);
1078 return 0;
1079 }
1080
1081 if (fs.f_type != HUGETLBFS_MAGIC)
1082 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1083
1084 return fs.f_bsize;
1085 }
1086
1087 static void *file_ram_alloc(RAMBlock *block,
1088 ram_addr_t memory,
1089 const char *path,
1090 Error **errp)
1091 {
1092 char *filename;
1093 char *sanitized_name;
1094 char *c;
1095 void *area = NULL;
1096 int fd;
1097 uint64_t hpagesize;
1098 Error *local_err = NULL;
1099
1100 hpagesize = gethugepagesize(path, &local_err);
1101 if (local_err) {
1102 error_propagate(errp, local_err);
1103 goto error;
1104 }
1105 block->mr->align = hpagesize;
1106
1107 if (memory < hpagesize) {
1108 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1109 "or larger than huge page size 0x%" PRIx64,
1110 memory, hpagesize);
1111 goto error;
1112 }
1113
1114 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1115 error_setg(errp,
1116 "host lacks kvm mmu notifiers, -mem-path unsupported");
1117 goto error;
1118 }
1119
1120 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1121 sanitized_name = g_strdup(memory_region_name(block->mr));
1122 for (c = sanitized_name; *c != '\0'; c++) {
1123 if (*c == '/')
1124 *c = '_';
1125 }
1126
1127 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1128 sanitized_name);
1129 g_free(sanitized_name);
1130
1131 fd = mkstemp(filename);
1132 if (fd < 0) {
1133 error_setg_errno(errp, errno,
1134 "unable to create backing store for hugepages");
1135 g_free(filename);
1136 goto error;
1137 }
1138 unlink(filename);
1139 g_free(filename);
1140
1141 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1142
1143 /*
1144 * ftruncate is not supported by hugetlbfs in older
1145 * hosts, so don't bother bailing out on errors.
1146 * If anything goes wrong with it under other filesystems,
1147 * mmap will fail.
1148 */
1149 if (ftruncate(fd, memory)) {
1150 perror("ftruncate");
1151 }
1152
1153 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1154 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1155 fd, 0);
1156 if (area == MAP_FAILED) {
1157 error_setg_errno(errp, errno,
1158 "unable to map backing store for hugepages");
1159 close(fd);
1160 goto error;
1161 }
1162
1163 if (mem_prealloc) {
1164 os_mem_prealloc(fd, area, memory);
1165 }
1166
1167 block->fd = fd;
1168 return area;
1169
1170 error:
1171 if (mem_prealloc) {
1172 error_report("%s", error_get_pretty(*errp));
1173 exit(1);
1174 }
1175 return NULL;
1176 }
1177 #endif
1178
1179 /* Called with the ramlist lock held. */
1180 static ram_addr_t find_ram_offset(ram_addr_t size)
1181 {
1182 RAMBlock *block, *next_block;
1183 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1184
1185 assert(size != 0); /* it would hand out same offset multiple times */
1186
1187 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1188 return 0;
1189 }
1190
1191 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1192 ram_addr_t end, next = RAM_ADDR_MAX;
1193
1194 end = block->offset + block->max_length;
1195
1196 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1197 if (next_block->offset >= end) {
1198 next = MIN(next, next_block->offset);
1199 }
1200 }
1201 if (next - end >= size && next - end < mingap) {
1202 offset = end;
1203 mingap = next - end;
1204 }
1205 }
1206
1207 if (offset == RAM_ADDR_MAX) {
1208 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1209 (uint64_t)size);
1210 abort();
1211 }
1212
1213 return offset;
1214 }
1215
1216 ram_addr_t last_ram_offset(void)
1217 {
1218 RAMBlock *block;
1219 ram_addr_t last = 0;
1220
1221 rcu_read_lock();
1222 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1223 last = MAX(last, block->offset + block->max_length);
1224 }
1225 rcu_read_unlock();
1226 return last;
1227 }
1228
1229 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1230 {
1231 int ret;
1232
1233 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1234 if (!machine_dump_guest_core(current_machine)) {
1235 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1236 if (ret) {
1237 perror("qemu_madvise");
1238 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1239 "but dump_guest_core=off specified\n");
1240 }
1241 }
1242 }
1243
1244 /* Called within an RCU critical section, or while the ramlist lock
1245 * is held.
1246 */
1247 static RAMBlock *find_ram_block(ram_addr_t addr)
1248 {
1249 RAMBlock *block;
1250
1251 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1252 if (block->offset == addr) {
1253 return block;
1254 }
1255 }
1256
1257 return NULL;
1258 }
1259
1260 /* Called with iothread lock held. */
1261 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1262 {
1263 RAMBlock *new_block, *block;
1264
1265 rcu_read_lock();
1266 new_block = find_ram_block(addr);
1267 assert(new_block);
1268 assert(!new_block->idstr[0]);
1269
1270 if (dev) {
1271 char *id = qdev_get_dev_path(dev);
1272 if (id) {
1273 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1274 g_free(id);
1275 }
1276 }
1277 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1278
1279 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1280 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1281 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1282 new_block->idstr);
1283 abort();
1284 }
1285 }
1286 rcu_read_unlock();
1287 }
1288
1289 /* Called with iothread lock held. */
1290 void qemu_ram_unset_idstr(ram_addr_t addr)
1291 {
1292 RAMBlock *block;
1293
1294 /* FIXME: arch_init.c assumes that this is not called throughout
1295 * migration. Ignore the problem since hot-unplug during migration
1296 * does not work anyway.
1297 */
1298
1299 rcu_read_lock();
1300 block = find_ram_block(addr);
1301 if (block) {
1302 memset(block->idstr, 0, sizeof(block->idstr));
1303 }
1304 rcu_read_unlock();
1305 }
1306
1307 static int memory_try_enable_merging(void *addr, size_t len)
1308 {
1309 if (!machine_mem_merge(current_machine)) {
1310 /* disabled by the user */
1311 return 0;
1312 }
1313
1314 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1315 }
1316
1317 /* Only legal before guest might have detected the memory size: e.g. on
1318 * incoming migration, or right after reset.
1319 *
1320 * As memory core doesn't know how is memory accessed, it is up to
1321 * resize callback to update device state and/or add assertions to detect
1322 * misuse, if necessary.
1323 */
1324 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1325 {
1326 RAMBlock *block = find_ram_block(base);
1327
1328 assert(block);
1329
1330 newsize = TARGET_PAGE_ALIGN(newsize);
1331
1332 if (block->used_length == newsize) {
1333 return 0;
1334 }
1335
1336 if (!(block->flags & RAM_RESIZEABLE)) {
1337 error_setg_errno(errp, EINVAL,
1338 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1339 " in != 0x" RAM_ADDR_FMT, block->idstr,
1340 newsize, block->used_length);
1341 return -EINVAL;
1342 }
1343
1344 if (block->max_length < newsize) {
1345 error_setg_errno(errp, EINVAL,
1346 "Length too large: %s: 0x" RAM_ADDR_FMT
1347 " > 0x" RAM_ADDR_FMT, block->idstr,
1348 newsize, block->max_length);
1349 return -EINVAL;
1350 }
1351
1352 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1353 block->used_length = newsize;
1354 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1355 memory_region_set_size(block->mr, newsize);
1356 if (block->resized) {
1357 block->resized(block->idstr, newsize, block->host);
1358 }
1359 return 0;
1360 }
1361
1362 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1363 {
1364 RAMBlock *block;
1365 RAMBlock *last_block = NULL;
1366 ram_addr_t old_ram_size, new_ram_size;
1367
1368 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1369
1370 qemu_mutex_lock_ramlist();
1371 new_block->offset = find_ram_offset(new_block->max_length);
1372
1373 if (!new_block->host) {
1374 if (xen_enabled()) {
1375 xen_ram_alloc(new_block->offset, new_block->max_length,
1376 new_block->mr);
1377 } else {
1378 new_block->host = phys_mem_alloc(new_block->max_length,
1379 &new_block->mr->align);
1380 if (!new_block->host) {
1381 error_setg_errno(errp, errno,
1382 "cannot set up guest memory '%s'",
1383 memory_region_name(new_block->mr));
1384 qemu_mutex_unlock_ramlist();
1385 return -1;
1386 }
1387 memory_try_enable_merging(new_block->host, new_block->max_length);
1388 }
1389 }
1390
1391 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1392 * QLIST (which has an RCU-friendly variant) does not have insertion at
1393 * tail, so save the last element in last_block.
1394 */
1395 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1396 last_block = block;
1397 if (block->max_length < new_block->max_length) {
1398 break;
1399 }
1400 }
1401 if (block) {
1402 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1403 } else if (last_block) {
1404 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1405 } else { /* list is empty */
1406 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1407 }
1408 ram_list.mru_block = NULL;
1409
1410 /* Write list before version */
1411 smp_wmb();
1412 ram_list.version++;
1413 qemu_mutex_unlock_ramlist();
1414
1415 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1416
1417 if (new_ram_size > old_ram_size) {
1418 int i;
1419
1420 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1421 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1422 ram_list.dirty_memory[i] =
1423 bitmap_zero_extend(ram_list.dirty_memory[i],
1424 old_ram_size, new_ram_size);
1425 }
1426 }
1427 cpu_physical_memory_set_dirty_range(new_block->offset,
1428 new_block->used_length);
1429
1430 if (new_block->host) {
1431 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1432 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1433 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1434 if (kvm_enabled()) {
1435 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1436 }
1437 }
1438
1439 return new_block->offset;
1440 }
1441
1442 #ifdef __linux__
1443 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1444 bool share, const char *mem_path,
1445 Error **errp)
1446 {
1447 RAMBlock *new_block;
1448 ram_addr_t addr;
1449 Error *local_err = NULL;
1450
1451 if (xen_enabled()) {
1452 error_setg(errp, "-mem-path not supported with Xen");
1453 return -1;
1454 }
1455
1456 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1457 /*
1458 * file_ram_alloc() needs to allocate just like
1459 * phys_mem_alloc, but we haven't bothered to provide
1460 * a hook there.
1461 */
1462 error_setg(errp,
1463 "-mem-path not supported with this accelerator");
1464 return -1;
1465 }
1466
1467 size = TARGET_PAGE_ALIGN(size);
1468 new_block = g_malloc0(sizeof(*new_block));
1469 new_block->mr = mr;
1470 new_block->used_length = size;
1471 new_block->max_length = size;
1472 new_block->flags = share ? RAM_SHARED : 0;
1473 new_block->host = file_ram_alloc(new_block, size,
1474 mem_path, errp);
1475 if (!new_block->host) {
1476 g_free(new_block);
1477 return -1;
1478 }
1479
1480 addr = ram_block_add(new_block, &local_err);
1481 if (local_err) {
1482 g_free(new_block);
1483 error_propagate(errp, local_err);
1484 return -1;
1485 }
1486 return addr;
1487 }
1488 #endif
1489
1490 static
1491 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1492 void (*resized)(const char*,
1493 uint64_t length,
1494 void *host),
1495 void *host, bool resizeable,
1496 MemoryRegion *mr, Error **errp)
1497 {
1498 RAMBlock *new_block;
1499 ram_addr_t addr;
1500 Error *local_err = NULL;
1501
1502 size = TARGET_PAGE_ALIGN(size);
1503 max_size = TARGET_PAGE_ALIGN(max_size);
1504 new_block = g_malloc0(sizeof(*new_block));
1505 new_block->mr = mr;
1506 new_block->resized = resized;
1507 new_block->used_length = size;
1508 new_block->max_length = max_size;
1509 assert(max_size >= size);
1510 new_block->fd = -1;
1511 new_block->host = host;
1512 if (host) {
1513 new_block->flags |= RAM_PREALLOC;
1514 }
1515 if (resizeable) {
1516 new_block->flags |= RAM_RESIZEABLE;
1517 }
1518 addr = ram_block_add(new_block, &local_err);
1519 if (local_err) {
1520 g_free(new_block);
1521 error_propagate(errp, local_err);
1522 return -1;
1523 }
1524 return addr;
1525 }
1526
1527 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1528 MemoryRegion *mr, Error **errp)
1529 {
1530 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1531 }
1532
1533 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1534 {
1535 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1536 }
1537
1538 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1539 void (*resized)(const char*,
1540 uint64_t length,
1541 void *host),
1542 MemoryRegion *mr, Error **errp)
1543 {
1544 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1545 }
1546
1547 void qemu_ram_free_from_ptr(ram_addr_t addr)
1548 {
1549 RAMBlock *block;
1550
1551 qemu_mutex_lock_ramlist();
1552 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1553 if (addr == block->offset) {
1554 QLIST_REMOVE_RCU(block, next);
1555 ram_list.mru_block = NULL;
1556 /* Write list before version */
1557 smp_wmb();
1558 ram_list.version++;
1559 g_free_rcu(block, rcu);
1560 break;
1561 }
1562 }
1563 qemu_mutex_unlock_ramlist();
1564 }
1565
1566 static void reclaim_ramblock(RAMBlock *block)
1567 {
1568 if (block->flags & RAM_PREALLOC) {
1569 ;
1570 } else if (xen_enabled()) {
1571 xen_invalidate_map_cache_entry(block->host);
1572 #ifndef _WIN32
1573 } else if (block->fd >= 0) {
1574 munmap(block->host, block->max_length);
1575 close(block->fd);
1576 #endif
1577 } else {
1578 qemu_anon_ram_free(block->host, block->max_length);
1579 }
1580 g_free(block);
1581 }
1582
1583 void qemu_ram_free(ram_addr_t addr)
1584 {
1585 RAMBlock *block;
1586
1587 qemu_mutex_lock_ramlist();
1588 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1589 if (addr == block->offset) {
1590 QLIST_REMOVE_RCU(block, next);
1591 ram_list.mru_block = NULL;
1592 /* Write list before version */
1593 smp_wmb();
1594 ram_list.version++;
1595 call_rcu(block, reclaim_ramblock, rcu);
1596 break;
1597 }
1598 }
1599 qemu_mutex_unlock_ramlist();
1600 }
1601
1602 #ifndef _WIN32
1603 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1604 {
1605 RAMBlock *block;
1606 ram_addr_t offset;
1607 int flags;
1608 void *area, *vaddr;
1609
1610 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1611 offset = addr - block->offset;
1612 if (offset < block->max_length) {
1613 vaddr = ramblock_ptr(block, offset);
1614 if (block->flags & RAM_PREALLOC) {
1615 ;
1616 } else if (xen_enabled()) {
1617 abort();
1618 } else {
1619 flags = MAP_FIXED;
1620 if (block->fd >= 0) {
1621 flags |= (block->flags & RAM_SHARED ?
1622 MAP_SHARED : MAP_PRIVATE);
1623 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1624 flags, block->fd, offset);
1625 } else {
1626 /*
1627 * Remap needs to match alloc. Accelerators that
1628 * set phys_mem_alloc never remap. If they did,
1629 * we'd need a remap hook here.
1630 */
1631 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1632
1633 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1634 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1635 flags, -1, 0);
1636 }
1637 if (area != vaddr) {
1638 fprintf(stderr, "Could not remap addr: "
1639 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1640 length, addr);
1641 exit(1);
1642 }
1643 memory_try_enable_merging(vaddr, length);
1644 qemu_ram_setup_dump(vaddr, length);
1645 }
1646 }
1647 }
1648 }
1649 #endif /* !_WIN32 */
1650
1651 int qemu_get_ram_fd(ram_addr_t addr)
1652 {
1653 RAMBlock *block;
1654 int fd;
1655
1656 rcu_read_lock();
1657 block = qemu_get_ram_block(addr);
1658 fd = block->fd;
1659 rcu_read_unlock();
1660 return fd;
1661 }
1662
1663 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1664 {
1665 RAMBlock *block;
1666 void *ptr;
1667
1668 rcu_read_lock();
1669 block = qemu_get_ram_block(addr);
1670 ptr = ramblock_ptr(block, 0);
1671 rcu_read_unlock();
1672 return ptr;
1673 }
1674
1675 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1676 * This should not be used for general purpose DMA. Use address_space_map
1677 * or address_space_rw instead. For local memory (e.g. video ram) that the
1678 * device owns, use memory_region_get_ram_ptr.
1679 *
1680 * By the time this function returns, the returned pointer is not protected
1681 * by RCU anymore. If the caller is not within an RCU critical section and
1682 * does not hold the iothread lock, it must have other means of protecting the
1683 * pointer, such as a reference to the region that includes the incoming
1684 * ram_addr_t.
1685 */
1686 void *qemu_get_ram_ptr(ram_addr_t addr)
1687 {
1688 RAMBlock *block;
1689 void *ptr;
1690
1691 rcu_read_lock();
1692 block = qemu_get_ram_block(addr);
1693
1694 if (xen_enabled() && block->host == NULL) {
1695 /* We need to check if the requested address is in the RAM
1696 * because we don't want to map the entire memory in QEMU.
1697 * In that case just map until the end of the page.
1698 */
1699 if (block->offset == 0) {
1700 ptr = xen_map_cache(addr, 0, 0);
1701 goto unlock;
1702 }
1703
1704 block->host = xen_map_cache(block->offset, block->max_length, 1);
1705 }
1706 ptr = ramblock_ptr(block, addr - block->offset);
1707
1708 unlock:
1709 rcu_read_unlock();
1710 return ptr;
1711 }
1712
1713 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1714 * but takes a size argument.
1715 *
1716 * By the time this function returns, the returned pointer is not protected
1717 * by RCU anymore. If the caller is not within an RCU critical section and
1718 * does not hold the iothread lock, it must have other means of protecting the
1719 * pointer, such as a reference to the region that includes the incoming
1720 * ram_addr_t.
1721 */
1722 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1723 {
1724 void *ptr;
1725 if (*size == 0) {
1726 return NULL;
1727 }
1728 if (xen_enabled()) {
1729 return xen_map_cache(addr, *size, 1);
1730 } else {
1731 RAMBlock *block;
1732 rcu_read_lock();
1733 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1734 if (addr - block->offset < block->max_length) {
1735 if (addr - block->offset + *size > block->max_length)
1736 *size = block->max_length - addr + block->offset;
1737 ptr = ramblock_ptr(block, addr - block->offset);
1738 rcu_read_unlock();
1739 return ptr;
1740 }
1741 }
1742
1743 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1744 abort();
1745 }
1746 }
1747
1748 /* Some of the softmmu routines need to translate from a host pointer
1749 * (typically a TLB entry) back to a ram offset.
1750 *
1751 * By the time this function returns, the returned pointer is not protected
1752 * by RCU anymore. If the caller is not within an RCU critical section and
1753 * does not hold the iothread lock, it must have other means of protecting the
1754 * pointer, such as a reference to the region that includes the incoming
1755 * ram_addr_t.
1756 */
1757 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1758 {
1759 RAMBlock *block;
1760 uint8_t *host = ptr;
1761 MemoryRegion *mr;
1762
1763 if (xen_enabled()) {
1764 rcu_read_lock();
1765 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1766 mr = qemu_get_ram_block(*ram_addr)->mr;
1767 rcu_read_unlock();
1768 return mr;
1769 }
1770
1771 rcu_read_lock();
1772 block = atomic_rcu_read(&ram_list.mru_block);
1773 if (block && block->host && host - block->host < block->max_length) {
1774 goto found;
1775 }
1776
1777 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1778 /* This case append when the block is not mapped. */
1779 if (block->host == NULL) {
1780 continue;
1781 }
1782 if (host - block->host < block->max_length) {
1783 goto found;
1784 }
1785 }
1786
1787 rcu_read_unlock();
1788 return NULL;
1789
1790 found:
1791 *ram_addr = block->offset + (host - block->host);
1792 mr = block->mr;
1793 rcu_read_unlock();
1794 return mr;
1795 }
1796
1797 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1798 uint64_t val, unsigned size)
1799 {
1800 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1801 tb_invalidate_phys_page_fast(ram_addr, size);
1802 }
1803 switch (size) {
1804 case 1:
1805 stb_p(qemu_get_ram_ptr(ram_addr), val);
1806 break;
1807 case 2:
1808 stw_p(qemu_get_ram_ptr(ram_addr), val);
1809 break;
1810 case 4:
1811 stl_p(qemu_get_ram_ptr(ram_addr), val);
1812 break;
1813 default:
1814 abort();
1815 }
1816 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1817 /* we remove the notdirty callback only if the code has been
1818 flushed */
1819 if (!cpu_physical_memory_is_clean(ram_addr)) {
1820 CPUArchState *env = current_cpu->env_ptr;
1821 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1822 }
1823 }
1824
1825 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1826 unsigned size, bool is_write)
1827 {
1828 return is_write;
1829 }
1830
1831 static const MemoryRegionOps notdirty_mem_ops = {
1832 .write = notdirty_mem_write,
1833 .valid.accepts = notdirty_mem_accepts,
1834 .endianness = DEVICE_NATIVE_ENDIAN,
1835 };
1836
1837 /* Generate a debug exception if a watchpoint has been hit. */
1838 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1839 {
1840 CPUState *cpu = current_cpu;
1841 CPUArchState *env = cpu->env_ptr;
1842 target_ulong pc, cs_base;
1843 target_ulong vaddr;
1844 CPUWatchpoint *wp;
1845 int cpu_flags;
1846
1847 if (cpu->watchpoint_hit) {
1848 /* We re-entered the check after replacing the TB. Now raise
1849 * the debug interrupt so that is will trigger after the
1850 * current instruction. */
1851 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1852 return;
1853 }
1854 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1855 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1856 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1857 && (wp->flags & flags)) {
1858 if (flags == BP_MEM_READ) {
1859 wp->flags |= BP_WATCHPOINT_HIT_READ;
1860 } else {
1861 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1862 }
1863 wp->hitaddr = vaddr;
1864 wp->hitattrs = attrs;
1865 if (!cpu->watchpoint_hit) {
1866 cpu->watchpoint_hit = wp;
1867 tb_check_watchpoint(cpu);
1868 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1869 cpu->exception_index = EXCP_DEBUG;
1870 cpu_loop_exit(cpu);
1871 } else {
1872 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1873 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1874 cpu_resume_from_signal(cpu, NULL);
1875 }
1876 }
1877 } else {
1878 wp->flags &= ~BP_WATCHPOINT_HIT;
1879 }
1880 }
1881 }
1882
1883 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1884 so these check for a hit then pass through to the normal out-of-line
1885 phys routines. */
1886 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1887 unsigned size, MemTxAttrs attrs)
1888 {
1889 MemTxResult res;
1890 uint64_t data;
1891
1892 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1893 switch (size) {
1894 case 1:
1895 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1896 break;
1897 case 2:
1898 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1899 break;
1900 case 4:
1901 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1902 break;
1903 default: abort();
1904 }
1905 *pdata = data;
1906 return res;
1907 }
1908
1909 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1910 uint64_t val, unsigned size,
1911 MemTxAttrs attrs)
1912 {
1913 MemTxResult res;
1914
1915 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1916 switch (size) {
1917 case 1:
1918 address_space_stb(&address_space_memory, addr, val, attrs, &res);
1919 break;
1920 case 2:
1921 address_space_stw(&address_space_memory, addr, val, attrs, &res);
1922 break;
1923 case 4:
1924 address_space_stl(&address_space_memory, addr, val, attrs, &res);
1925 break;
1926 default: abort();
1927 }
1928 return res;
1929 }
1930
1931 static const MemoryRegionOps watch_mem_ops = {
1932 .read_with_attrs = watch_mem_read,
1933 .write_with_attrs = watch_mem_write,
1934 .endianness = DEVICE_NATIVE_ENDIAN,
1935 };
1936
1937 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1938 unsigned len, MemTxAttrs attrs)
1939 {
1940 subpage_t *subpage = opaque;
1941 uint8_t buf[8];
1942 MemTxResult res;
1943
1944 #if defined(DEBUG_SUBPAGE)
1945 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1946 subpage, len, addr);
1947 #endif
1948 res = address_space_read(subpage->as, addr + subpage->base,
1949 attrs, buf, len);
1950 if (res) {
1951 return res;
1952 }
1953 switch (len) {
1954 case 1:
1955 *data = ldub_p(buf);
1956 return MEMTX_OK;
1957 case 2:
1958 *data = lduw_p(buf);
1959 return MEMTX_OK;
1960 case 4:
1961 *data = ldl_p(buf);
1962 return MEMTX_OK;
1963 case 8:
1964 *data = ldq_p(buf);
1965 return MEMTX_OK;
1966 default:
1967 abort();
1968 }
1969 }
1970
1971 static MemTxResult subpage_write(void *opaque, hwaddr addr,
1972 uint64_t value, unsigned len, MemTxAttrs attrs)
1973 {
1974 subpage_t *subpage = opaque;
1975 uint8_t buf[8];
1976
1977 #if defined(DEBUG_SUBPAGE)
1978 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1979 " value %"PRIx64"\n",
1980 __func__, subpage, len, addr, value);
1981 #endif
1982 switch (len) {
1983 case 1:
1984 stb_p(buf, value);
1985 break;
1986 case 2:
1987 stw_p(buf, value);
1988 break;
1989 case 4:
1990 stl_p(buf, value);
1991 break;
1992 case 8:
1993 stq_p(buf, value);
1994 break;
1995 default:
1996 abort();
1997 }
1998 return address_space_write(subpage->as, addr + subpage->base,
1999 attrs, buf, len);
2000 }
2001
2002 static bool subpage_accepts(void *opaque, hwaddr addr,
2003 unsigned len, bool is_write)
2004 {
2005 subpage_t *subpage = opaque;
2006 #if defined(DEBUG_SUBPAGE)
2007 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2008 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2009 #endif
2010
2011 return address_space_access_valid(subpage->as, addr + subpage->base,
2012 len, is_write);
2013 }
2014
2015 static const MemoryRegionOps subpage_ops = {
2016 .read_with_attrs = subpage_read,
2017 .write_with_attrs = subpage_write,
2018 .impl.min_access_size = 1,
2019 .impl.max_access_size = 8,
2020 .valid.min_access_size = 1,
2021 .valid.max_access_size = 8,
2022 .valid.accepts = subpage_accepts,
2023 .endianness = DEVICE_NATIVE_ENDIAN,
2024 };
2025
2026 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2027 uint16_t section)
2028 {
2029 int idx, eidx;
2030
2031 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2032 return -1;
2033 idx = SUBPAGE_IDX(start);
2034 eidx = SUBPAGE_IDX(end);
2035 #if defined(DEBUG_SUBPAGE)
2036 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2037 __func__, mmio, start, end, idx, eidx, section);
2038 #endif
2039 for (; idx <= eidx; idx++) {
2040 mmio->sub_section[idx] = section;
2041 }
2042
2043 return 0;
2044 }
2045
2046 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2047 {
2048 subpage_t *mmio;
2049
2050 mmio = g_malloc0(sizeof(subpage_t));
2051
2052 mmio->as = as;
2053 mmio->base = base;
2054 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2055 NULL, TARGET_PAGE_SIZE);
2056 mmio->iomem.subpage = true;
2057 #if defined(DEBUG_SUBPAGE)
2058 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2059 mmio, base, TARGET_PAGE_SIZE);
2060 #endif
2061 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2062
2063 return mmio;
2064 }
2065
2066 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2067 MemoryRegion *mr)
2068 {
2069 assert(as);
2070 MemoryRegionSection section = {
2071 .address_space = as,
2072 .mr = mr,
2073 .offset_within_address_space = 0,
2074 .offset_within_region = 0,
2075 .size = int128_2_64(),
2076 };
2077
2078 return phys_section_add(map, &section);
2079 }
2080
2081 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2082 {
2083 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2084 MemoryRegionSection *sections = d->map.sections;
2085
2086 return sections[index & ~TARGET_PAGE_MASK].mr;
2087 }
2088
2089 static void io_mem_init(void)
2090 {
2091 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2092 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2093 NULL, UINT64_MAX);
2094 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2095 NULL, UINT64_MAX);
2096 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2097 NULL, UINT64_MAX);
2098 }
2099
2100 static void mem_begin(MemoryListener *listener)
2101 {
2102 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2103 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2104 uint16_t n;
2105
2106 n = dummy_section(&d->map, as, &io_mem_unassigned);
2107 assert(n == PHYS_SECTION_UNASSIGNED);
2108 n = dummy_section(&d->map, as, &io_mem_notdirty);
2109 assert(n == PHYS_SECTION_NOTDIRTY);
2110 n = dummy_section(&d->map, as, &io_mem_rom);
2111 assert(n == PHYS_SECTION_ROM);
2112 n = dummy_section(&d->map, as, &io_mem_watch);
2113 assert(n == PHYS_SECTION_WATCH);
2114
2115 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2116 d->as = as;
2117 as->next_dispatch = d;
2118 }
2119
2120 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2121 {
2122 phys_sections_free(&d->map);
2123 g_free(d);
2124 }
2125
2126 static void mem_commit(MemoryListener *listener)
2127 {
2128 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2129 AddressSpaceDispatch *cur = as->dispatch;
2130 AddressSpaceDispatch *next = as->next_dispatch;
2131
2132 phys_page_compact_all(next, next->map.nodes_nb);
2133
2134 atomic_rcu_set(&as->dispatch, next);
2135 if (cur) {
2136 call_rcu(cur, address_space_dispatch_free, rcu);
2137 }
2138 }
2139
2140 static void tcg_commit(MemoryListener *listener)
2141 {
2142 CPUState *cpu;
2143
2144 /* since each CPU stores ram addresses in its TLB cache, we must
2145 reset the modified entries */
2146 /* XXX: slow ! */
2147 CPU_FOREACH(cpu) {
2148 /* FIXME: Disentangle the cpu.h circular files deps so we can
2149 directly get the right CPU from listener. */
2150 if (cpu->tcg_as_listener != listener) {
2151 continue;
2152 }
2153 cpu_reload_memory_map(cpu);
2154 }
2155 }
2156
2157 void address_space_init_dispatch(AddressSpace *as)
2158 {
2159 as->dispatch = NULL;
2160 as->dispatch_listener = (MemoryListener) {
2161 .begin = mem_begin,
2162 .commit = mem_commit,
2163 .region_add = mem_add,
2164 .region_nop = mem_add,
2165 .priority = 0,
2166 };
2167 memory_listener_register(&as->dispatch_listener, as);
2168 }
2169
2170 void address_space_unregister(AddressSpace *as)
2171 {
2172 memory_listener_unregister(&as->dispatch_listener);
2173 }
2174
2175 void address_space_destroy_dispatch(AddressSpace *as)
2176 {
2177 AddressSpaceDispatch *d = as->dispatch;
2178
2179 atomic_rcu_set(&as->dispatch, NULL);
2180 if (d) {
2181 call_rcu(d, address_space_dispatch_free, rcu);
2182 }
2183 }
2184
2185 static void memory_map_init(void)
2186 {
2187 system_memory = g_malloc(sizeof(*system_memory));
2188
2189 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2190 address_space_init(&address_space_memory, system_memory, "memory");
2191
2192 system_io = g_malloc(sizeof(*system_io));
2193 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2194 65536);
2195 address_space_init(&address_space_io, system_io, "I/O");
2196 }
2197
2198 MemoryRegion *get_system_memory(void)
2199 {
2200 return system_memory;
2201 }
2202
2203 MemoryRegion *get_system_io(void)
2204 {
2205 return system_io;
2206 }
2207
2208 #endif /* !defined(CONFIG_USER_ONLY) */
2209
2210 /* physical memory access (slow version, mainly for debug) */
2211 #if defined(CONFIG_USER_ONLY)
2212 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2213 uint8_t *buf, int len, int is_write)
2214 {
2215 int l, flags;
2216 target_ulong page;
2217 void * p;
2218
2219 while (len > 0) {
2220 page = addr & TARGET_PAGE_MASK;
2221 l = (page + TARGET_PAGE_SIZE) - addr;
2222 if (l > len)
2223 l = len;
2224 flags = page_get_flags(page);
2225 if (!(flags & PAGE_VALID))
2226 return -1;
2227 if (is_write) {
2228 if (!(flags & PAGE_WRITE))
2229 return -1;
2230 /* XXX: this code should not depend on lock_user */
2231 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2232 return -1;
2233 memcpy(p, buf, l);
2234 unlock_user(p, addr, l);
2235 } else {
2236 if (!(flags & PAGE_READ))
2237 return -1;
2238 /* XXX: this code should not depend on lock_user */
2239 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2240 return -1;
2241 memcpy(buf, p, l);
2242 unlock_user(p, addr, 0);
2243 }
2244 len -= l;
2245 buf += l;
2246 addr += l;
2247 }
2248 return 0;
2249 }
2250
2251 #else
2252
2253 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2254 hwaddr length)
2255 {
2256 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2257 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2258 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2259 tb_invalidate_phys_range(addr, addr + length);
2260 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2261 }
2262 if (dirty_log_mask) {
2263 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2264 }
2265 } else {
2266 xen_modified_memory(addr, length);
2267 }
2268 }
2269
2270 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2271 {
2272 unsigned access_size_max = mr->ops->valid.max_access_size;
2273
2274 /* Regions are assumed to support 1-4 byte accesses unless
2275 otherwise specified. */
2276 if (access_size_max == 0) {
2277 access_size_max = 4;
2278 }
2279
2280 /* Bound the maximum access by the alignment of the address. */
2281 if (!mr->ops->impl.unaligned) {
2282 unsigned align_size_max = addr & -addr;
2283 if (align_size_max != 0 && align_size_max < access_size_max) {
2284 access_size_max = align_size_max;
2285 }
2286 }
2287
2288 /* Don't attempt accesses larger than the maximum. */
2289 if (l > access_size_max) {
2290 l = access_size_max;
2291 }
2292 if (l & (l - 1)) {
2293 l = 1 << (qemu_fls(l) - 1);
2294 }
2295
2296 return l;
2297 }
2298
2299 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2300 uint8_t *buf, int len, bool is_write)
2301 {
2302 hwaddr l;
2303 uint8_t *ptr;
2304 uint64_t val;
2305 hwaddr addr1;
2306 MemoryRegion *mr;
2307 MemTxResult result = MEMTX_OK;
2308
2309 rcu_read_lock();
2310 while (len > 0) {
2311 l = len;
2312 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2313
2314 if (is_write) {
2315 if (!memory_access_is_direct(mr, is_write)) {
2316 l = memory_access_size(mr, l, addr1);
2317 /* XXX: could force current_cpu to NULL to avoid
2318 potential bugs */
2319 switch (l) {
2320 case 8:
2321 /* 64 bit write access */
2322 val = ldq_p(buf);
2323 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2324 attrs);
2325 break;
2326 case 4:
2327 /* 32 bit write access */
2328 val = ldl_p(buf);
2329 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2330 attrs);
2331 break;
2332 case 2:
2333 /* 16 bit write access */
2334 val = lduw_p(buf);
2335 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2336 attrs);
2337 break;
2338 case 1:
2339 /* 8 bit write access */
2340 val = ldub_p(buf);
2341 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2342 attrs);
2343 break;
2344 default:
2345 abort();
2346 }
2347 } else {
2348 addr1 += memory_region_get_ram_addr(mr);
2349 /* RAM case */
2350 ptr = qemu_get_ram_ptr(addr1);
2351 memcpy(ptr, buf, l);
2352 invalidate_and_set_dirty(mr, addr1, l);
2353 }
2354 } else {
2355 if (!memory_access_is_direct(mr, is_write)) {
2356 /* I/O case */
2357 l = memory_access_size(mr, l, addr1);
2358 switch (l) {
2359 case 8:
2360 /* 64 bit read access */
2361 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2362 attrs);
2363 stq_p(buf, val);
2364 break;
2365 case 4:
2366 /* 32 bit read access */
2367 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2368 attrs);
2369 stl_p(buf, val);
2370 break;
2371 case 2:
2372 /* 16 bit read access */
2373 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2374 attrs);
2375 stw_p(buf, val);
2376 break;
2377 case 1:
2378 /* 8 bit read access */
2379 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2380 attrs);
2381 stb_p(buf, val);
2382 break;
2383 default:
2384 abort();
2385 }
2386 } else {
2387 /* RAM case */
2388 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2389 memcpy(buf, ptr, l);
2390 }
2391 }
2392 len -= l;
2393 buf += l;
2394 addr += l;
2395 }
2396 rcu_read_unlock();
2397
2398 return result;
2399 }
2400
2401 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2402 const uint8_t *buf, int len)
2403 {
2404 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2405 }
2406
2407 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2408 uint8_t *buf, int len)
2409 {
2410 return address_space_rw(as, addr, attrs, buf, len, false);
2411 }
2412
2413
2414 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2415 int len, int is_write)
2416 {
2417 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2418 buf, len, is_write);
2419 }
2420
2421 enum write_rom_type {
2422 WRITE_DATA,
2423 FLUSH_CACHE,
2424 };
2425
2426 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2427 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2428 {
2429 hwaddr l;
2430 uint8_t *ptr;
2431 hwaddr addr1;
2432 MemoryRegion *mr;
2433
2434 rcu_read_lock();
2435 while (len > 0) {
2436 l = len;
2437 mr = address_space_translate(as, addr, &addr1, &l, true);
2438
2439 if (!(memory_region_is_ram(mr) ||
2440 memory_region_is_romd(mr))) {
2441 /* do nothing */
2442 } else {
2443 addr1 += memory_region_get_ram_addr(mr);
2444 /* ROM/RAM case */
2445 ptr = qemu_get_ram_ptr(addr1);
2446 switch (type) {
2447 case WRITE_DATA:
2448 memcpy(ptr, buf, l);
2449 invalidate_and_set_dirty(mr, addr1, l);
2450 break;
2451 case FLUSH_CACHE:
2452 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2453 break;
2454 }
2455 }
2456 len -= l;
2457 buf += l;
2458 addr += l;
2459 }
2460 rcu_read_unlock();
2461 }
2462
2463 /* used for ROM loading : can write in RAM and ROM */
2464 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2465 const uint8_t *buf, int len)
2466 {
2467 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2468 }
2469
2470 void cpu_flush_icache_range(hwaddr start, int len)
2471 {
2472 /*
2473 * This function should do the same thing as an icache flush that was
2474 * triggered from within the guest. For TCG we are always cache coherent,
2475 * so there is no need to flush anything. For KVM / Xen we need to flush
2476 * the host's instruction cache at least.
2477 */
2478 if (tcg_enabled()) {
2479 return;
2480 }
2481
2482 cpu_physical_memory_write_rom_internal(&address_space_memory,
2483 start, NULL, len, FLUSH_CACHE);
2484 }
2485
2486 typedef struct {
2487 MemoryRegion *mr;
2488 void *buffer;
2489 hwaddr addr;
2490 hwaddr len;
2491 bool in_use;
2492 } BounceBuffer;
2493
2494 static BounceBuffer bounce;
2495
2496 typedef struct MapClient {
2497 QEMUBH *bh;
2498 QLIST_ENTRY(MapClient) link;
2499 } MapClient;
2500
2501 QemuMutex map_client_list_lock;
2502 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2503 = QLIST_HEAD_INITIALIZER(map_client_list);
2504
2505 static void cpu_unregister_map_client_do(MapClient *client)
2506 {
2507 QLIST_REMOVE(client, link);
2508 g_free(client);
2509 }
2510
2511 static void cpu_notify_map_clients_locked(void)
2512 {
2513 MapClient *client;
2514
2515 while (!QLIST_EMPTY(&map_client_list)) {
2516 client = QLIST_FIRST(&map_client_list);
2517 qemu_bh_schedule(client->bh);
2518 cpu_unregister_map_client_do(client);
2519 }
2520 }
2521
2522 void cpu_register_map_client(QEMUBH *bh)
2523 {
2524 MapClient *client = g_malloc(sizeof(*client));
2525
2526 qemu_mutex_lock(&map_client_list_lock);
2527 client->bh = bh;
2528 QLIST_INSERT_HEAD(&map_client_list, client, link);
2529 if (!atomic_read(&bounce.in_use)) {
2530 cpu_notify_map_clients_locked();
2531 }
2532 qemu_mutex_unlock(&map_client_list_lock);
2533 }
2534
2535 void cpu_exec_init_all(void)
2536 {
2537 qemu_mutex_init(&ram_list.mutex);
2538 memory_map_init();
2539 io_mem_init();
2540 qemu_mutex_init(&map_client_list_lock);
2541 }
2542
2543 void cpu_unregister_map_client(QEMUBH *bh)
2544 {
2545 MapClient *client;
2546
2547 qemu_mutex_lock(&map_client_list_lock);
2548 QLIST_FOREACH(client, &map_client_list, link) {
2549 if (client->bh == bh) {
2550 cpu_unregister_map_client_do(client);
2551 break;
2552 }
2553 }
2554 qemu_mutex_unlock(&map_client_list_lock);
2555 }
2556
2557 static void cpu_notify_map_clients(void)
2558 {
2559 qemu_mutex_lock(&map_client_list_lock);
2560 cpu_notify_map_clients_locked();
2561 qemu_mutex_unlock(&map_client_list_lock);
2562 }
2563
2564 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2565 {
2566 MemoryRegion *mr;
2567 hwaddr l, xlat;
2568
2569 rcu_read_lock();
2570 while (len > 0) {
2571 l = len;
2572 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2573 if (!memory_access_is_direct(mr, is_write)) {
2574 l = memory_access_size(mr, l, addr);
2575 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2576 return false;
2577 }
2578 }
2579
2580 len -= l;
2581 addr += l;
2582 }
2583 rcu_read_unlock();
2584 return true;
2585 }
2586
2587 /* Map a physical memory region into a host virtual address.
2588 * May map a subset of the requested range, given by and returned in *plen.
2589 * May return NULL if resources needed to perform the mapping are exhausted.
2590 * Use only for reads OR writes - not for read-modify-write operations.
2591 * Use cpu_register_map_client() to know when retrying the map operation is
2592 * likely to succeed.
2593 */
2594 void *address_space_map(AddressSpace *as,
2595 hwaddr addr,
2596 hwaddr *plen,
2597 bool is_write)
2598 {
2599 hwaddr len = *plen;
2600 hwaddr done = 0;
2601 hwaddr l, xlat, base;
2602 MemoryRegion *mr, *this_mr;
2603 ram_addr_t raddr;
2604
2605 if (len == 0) {
2606 return NULL;
2607 }
2608
2609 l = len;
2610 rcu_read_lock();
2611 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2612
2613 if (!memory_access_is_direct(mr, is_write)) {
2614 if (atomic_xchg(&bounce.in_use, true)) {
2615 rcu_read_unlock();
2616 return NULL;
2617 }
2618 /* Avoid unbounded allocations */
2619 l = MIN(l, TARGET_PAGE_SIZE);
2620 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2621 bounce.addr = addr;
2622 bounce.len = l;
2623
2624 memory_region_ref(mr);
2625 bounce.mr = mr;
2626 if (!is_write) {
2627 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2628 bounce.buffer, l);
2629 }
2630
2631 rcu_read_unlock();
2632 *plen = l;
2633 return bounce.buffer;
2634 }
2635
2636 base = xlat;
2637 raddr = memory_region_get_ram_addr(mr);
2638
2639 for (;;) {
2640 len -= l;
2641 addr += l;
2642 done += l;
2643 if (len == 0) {
2644 break;
2645 }
2646
2647 l = len;
2648 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2649 if (this_mr != mr || xlat != base + done) {
2650 break;
2651 }
2652 }
2653
2654 memory_region_ref(mr);
2655 rcu_read_unlock();
2656 *plen = done;
2657 return qemu_ram_ptr_length(raddr + base, plen);
2658 }
2659
2660 /* Unmaps a memory region previously mapped by address_space_map().
2661 * Will also mark the memory as dirty if is_write == 1. access_len gives
2662 * the amount of memory that was actually read or written by the caller.
2663 */
2664 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2665 int is_write, hwaddr access_len)
2666 {
2667 if (buffer != bounce.buffer) {
2668 MemoryRegion *mr;
2669 ram_addr_t addr1;
2670
2671 mr = qemu_ram_addr_from_host(buffer, &addr1);
2672 assert(mr != NULL);
2673 if (is_write) {
2674 invalidate_and_set_dirty(mr, addr1, access_len);
2675 }
2676 if (xen_enabled()) {
2677 xen_invalidate_map_cache_entry(buffer);
2678 }
2679 memory_region_unref(mr);
2680 return;
2681 }
2682 if (is_write) {
2683 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2684 bounce.buffer, access_len);
2685 }
2686 qemu_vfree(bounce.buffer);
2687 bounce.buffer = NULL;
2688 memory_region_unref(bounce.mr);
2689 atomic_mb_set(&bounce.in_use, false);
2690 cpu_notify_map_clients();
2691 }
2692
2693 void *cpu_physical_memory_map(hwaddr addr,
2694 hwaddr *plen,
2695 int is_write)
2696 {
2697 return address_space_map(&address_space_memory, addr, plen, is_write);
2698 }
2699
2700 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2701 int is_write, hwaddr access_len)
2702 {
2703 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2704 }
2705
2706 /* warning: addr must be aligned */
2707 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2708 MemTxAttrs attrs,
2709 MemTxResult *result,
2710 enum device_endian endian)
2711 {
2712 uint8_t *ptr;
2713 uint64_t val;
2714 MemoryRegion *mr;
2715 hwaddr l = 4;
2716 hwaddr addr1;
2717 MemTxResult r;
2718
2719 rcu_read_lock();
2720 mr = address_space_translate(as, addr, &addr1, &l, false);
2721 if (l < 4 || !memory_access_is_direct(mr, false)) {
2722 /* I/O case */
2723 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2724 #if defined(TARGET_WORDS_BIGENDIAN)
2725 if (endian == DEVICE_LITTLE_ENDIAN) {
2726 val = bswap32(val);
2727 }
2728 #else
2729 if (endian == DEVICE_BIG_ENDIAN) {
2730 val = bswap32(val);
2731 }
2732 #endif
2733 } else {
2734 /* RAM case */
2735 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2736 & TARGET_PAGE_MASK)
2737 + addr1);
2738 switch (endian) {
2739 case DEVICE_LITTLE_ENDIAN:
2740 val = ldl_le_p(ptr);
2741 break;
2742 case DEVICE_BIG_ENDIAN:
2743 val = ldl_be_p(ptr);
2744 break;
2745 default:
2746 val = ldl_p(ptr);
2747 break;
2748 }
2749 r = MEMTX_OK;
2750 }
2751 if (result) {
2752 *result = r;
2753 }
2754 rcu_read_unlock();
2755 return val;
2756 }
2757
2758 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2759 MemTxAttrs attrs, MemTxResult *result)
2760 {
2761 return address_space_ldl_internal(as, addr, attrs, result,
2762 DEVICE_NATIVE_ENDIAN);
2763 }
2764
2765 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2766 MemTxAttrs attrs, MemTxResult *result)
2767 {
2768 return address_space_ldl_internal(as, addr, attrs, result,
2769 DEVICE_LITTLE_ENDIAN);
2770 }
2771
2772 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2773 MemTxAttrs attrs, MemTxResult *result)
2774 {
2775 return address_space_ldl_internal(as, addr, attrs, result,
2776 DEVICE_BIG_ENDIAN);
2777 }
2778
2779 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2780 {
2781 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2782 }
2783
2784 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2785 {
2786 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2787 }
2788
2789 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2790 {
2791 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2792 }
2793
2794 /* warning: addr must be aligned */
2795 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2796 MemTxAttrs attrs,
2797 MemTxResult *result,
2798 enum device_endian endian)
2799 {
2800 uint8_t *ptr;
2801 uint64_t val;
2802 MemoryRegion *mr;
2803 hwaddr l = 8;
2804 hwaddr addr1;
2805 MemTxResult r;
2806
2807 rcu_read_lock();
2808 mr = address_space_translate(as, addr, &addr1, &l,
2809 false);
2810 if (l < 8 || !memory_access_is_direct(mr, false)) {
2811 /* I/O case */
2812 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2813 #if defined(TARGET_WORDS_BIGENDIAN)
2814 if (endian == DEVICE_LITTLE_ENDIAN) {
2815 val = bswap64(val);
2816 }
2817 #else
2818 if (endian == DEVICE_BIG_ENDIAN) {
2819 val = bswap64(val);
2820 }
2821 #endif
2822 } else {
2823 /* RAM case */
2824 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2825 & TARGET_PAGE_MASK)
2826 + addr1);
2827 switch (endian) {
2828 case DEVICE_LITTLE_ENDIAN:
2829 val = ldq_le_p(ptr);
2830 break;
2831 case DEVICE_BIG_ENDIAN:
2832 val = ldq_be_p(ptr);
2833 break;
2834 default:
2835 val = ldq_p(ptr);
2836 break;
2837 }
2838 r = MEMTX_OK;
2839 }
2840 if (result) {
2841 *result = r;
2842 }
2843 rcu_read_unlock();
2844 return val;
2845 }
2846
2847 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2848 MemTxAttrs attrs, MemTxResult *result)
2849 {
2850 return address_space_ldq_internal(as, addr, attrs, result,
2851 DEVICE_NATIVE_ENDIAN);
2852 }
2853
2854 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2855 MemTxAttrs attrs, MemTxResult *result)
2856 {
2857 return address_space_ldq_internal(as, addr, attrs, result,
2858 DEVICE_LITTLE_ENDIAN);
2859 }
2860
2861 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2862 MemTxAttrs attrs, MemTxResult *result)
2863 {
2864 return address_space_ldq_internal(as, addr, attrs, result,
2865 DEVICE_BIG_ENDIAN);
2866 }
2867
2868 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2869 {
2870 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2871 }
2872
2873 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2874 {
2875 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2876 }
2877
2878 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2879 {
2880 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2881 }
2882
2883 /* XXX: optimize */
2884 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2885 MemTxAttrs attrs, MemTxResult *result)
2886 {
2887 uint8_t val;
2888 MemTxResult r;
2889
2890 r = address_space_rw(as, addr, attrs, &val, 1, 0);
2891 if (result) {
2892 *result = r;
2893 }
2894 return val;
2895 }
2896
2897 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2898 {
2899 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2900 }
2901
2902 /* warning: addr must be aligned */
2903 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2904 hwaddr addr,
2905 MemTxAttrs attrs,
2906 MemTxResult *result,
2907 enum device_endian endian)
2908 {
2909 uint8_t *ptr;
2910 uint64_t val;
2911 MemoryRegion *mr;
2912 hwaddr l = 2;
2913 hwaddr addr1;
2914 MemTxResult r;
2915
2916 rcu_read_lock();
2917 mr = address_space_translate(as, addr, &addr1, &l,
2918 false);
2919 if (l < 2 || !memory_access_is_direct(mr, false)) {
2920 /* I/O case */
2921 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
2922 #if defined(TARGET_WORDS_BIGENDIAN)
2923 if (endian == DEVICE_LITTLE_ENDIAN) {
2924 val = bswap16(val);
2925 }
2926 #else
2927 if (endian == DEVICE_BIG_ENDIAN) {
2928 val = bswap16(val);
2929 }
2930 #endif
2931 } else {
2932 /* RAM case */
2933 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2934 & TARGET_PAGE_MASK)
2935 + addr1);
2936 switch (endian) {
2937 case DEVICE_LITTLE_ENDIAN:
2938 val = lduw_le_p(ptr);
2939 break;
2940 case DEVICE_BIG_ENDIAN:
2941 val = lduw_be_p(ptr);
2942 break;
2943 default:
2944 val = lduw_p(ptr);
2945 break;
2946 }
2947 r = MEMTX_OK;
2948 }
2949 if (result) {
2950 *result = r;
2951 }
2952 rcu_read_unlock();
2953 return val;
2954 }
2955
2956 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
2957 MemTxAttrs attrs, MemTxResult *result)
2958 {
2959 return address_space_lduw_internal(as, addr, attrs, result,
2960 DEVICE_NATIVE_ENDIAN);
2961 }
2962
2963 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
2964 MemTxAttrs attrs, MemTxResult *result)
2965 {
2966 return address_space_lduw_internal(as, addr, attrs, result,
2967 DEVICE_LITTLE_ENDIAN);
2968 }
2969
2970 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
2971 MemTxAttrs attrs, MemTxResult *result)
2972 {
2973 return address_space_lduw_internal(as, addr, attrs, result,
2974 DEVICE_BIG_ENDIAN);
2975 }
2976
2977 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2978 {
2979 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2980 }
2981
2982 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2983 {
2984 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2985 }
2986
2987 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2988 {
2989 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2990 }
2991
2992 /* warning: addr must be aligned. The ram page is not masked as dirty
2993 and the code inside is not invalidated. It is useful if the dirty
2994 bits are used to track modified PTEs */
2995 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
2996 MemTxAttrs attrs, MemTxResult *result)
2997 {
2998 uint8_t *ptr;
2999 MemoryRegion *mr;
3000 hwaddr l = 4;
3001 hwaddr addr1;
3002 MemTxResult r;
3003 uint8_t dirty_log_mask;
3004
3005 rcu_read_lock();
3006 mr = address_space_translate(as, addr, &addr1, &l,
3007 true);
3008 if (l < 4 || !memory_access_is_direct(mr, true)) {
3009 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3010 } else {
3011 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3012 ptr = qemu_get_ram_ptr(addr1);
3013 stl_p(ptr, val);
3014
3015 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3016 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3017 if (dirty_log_mask) {
3018 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
3019 }
3020 r = MEMTX_OK;
3021 }
3022 if (result) {
3023 *result = r;
3024 }
3025 rcu_read_unlock();
3026 }
3027
3028 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3029 {
3030 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3031 }
3032
3033 /* warning: addr must be aligned */
3034 static inline void address_space_stl_internal(AddressSpace *as,
3035 hwaddr addr, uint32_t val,
3036 MemTxAttrs attrs,
3037 MemTxResult *result,
3038 enum device_endian endian)
3039 {
3040 uint8_t *ptr;
3041 MemoryRegion *mr;
3042 hwaddr l = 4;
3043 hwaddr addr1;
3044 MemTxResult r;
3045
3046 rcu_read_lock();
3047 mr = address_space_translate(as, addr, &addr1, &l,
3048 true);
3049 if (l < 4 || !memory_access_is_direct(mr, true)) {
3050 #if defined(TARGET_WORDS_BIGENDIAN)
3051 if (endian == DEVICE_LITTLE_ENDIAN) {
3052 val = bswap32(val);
3053 }
3054 #else
3055 if (endian == DEVICE_BIG_ENDIAN) {
3056 val = bswap32(val);
3057 }
3058 #endif
3059 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3060 } else {
3061 /* RAM case */
3062 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3063 ptr = qemu_get_ram_ptr(addr1);
3064 switch (endian) {
3065 case DEVICE_LITTLE_ENDIAN:
3066 stl_le_p(ptr, val);
3067 break;
3068 case DEVICE_BIG_ENDIAN:
3069 stl_be_p(ptr, val);
3070 break;
3071 default:
3072 stl_p(ptr, val);
3073 break;
3074 }
3075 invalidate_and_set_dirty(mr, addr1, 4);
3076 r = MEMTX_OK;
3077 }
3078 if (result) {
3079 *result = r;
3080 }
3081 rcu_read_unlock();
3082 }
3083
3084 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3085 MemTxAttrs attrs, MemTxResult *result)
3086 {
3087 address_space_stl_internal(as, addr, val, attrs, result,
3088 DEVICE_NATIVE_ENDIAN);
3089 }
3090
3091 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3092 MemTxAttrs attrs, MemTxResult *result)
3093 {
3094 address_space_stl_internal(as, addr, val, attrs, result,
3095 DEVICE_LITTLE_ENDIAN);
3096 }
3097
3098 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3099 MemTxAttrs attrs, MemTxResult *result)
3100 {
3101 address_space_stl_internal(as, addr, val, attrs, result,
3102 DEVICE_BIG_ENDIAN);
3103 }
3104
3105 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3106 {
3107 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3108 }
3109
3110 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3111 {
3112 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3113 }
3114
3115 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3116 {
3117 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3118 }
3119
3120 /* XXX: optimize */
3121 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3122 MemTxAttrs attrs, MemTxResult *result)
3123 {
3124 uint8_t v = val;
3125 MemTxResult r;
3126
3127 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3128 if (result) {
3129 *result = r;
3130 }
3131 }
3132
3133 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3134 {
3135 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3136 }
3137
3138 /* warning: addr must be aligned */
3139 static inline void address_space_stw_internal(AddressSpace *as,
3140 hwaddr addr, uint32_t val,
3141 MemTxAttrs attrs,
3142 MemTxResult *result,
3143 enum device_endian endian)
3144 {
3145 uint8_t *ptr;
3146 MemoryRegion *mr;
3147 hwaddr l = 2;
3148 hwaddr addr1;
3149 MemTxResult r;
3150
3151 rcu_read_lock();
3152 mr = address_space_translate(as, addr, &addr1, &l, true);
3153 if (l < 2 || !memory_access_is_direct(mr, true)) {
3154 #if defined(TARGET_WORDS_BIGENDIAN)
3155 if (endian == DEVICE_LITTLE_ENDIAN) {
3156 val = bswap16(val);
3157 }
3158 #else
3159 if (endian == DEVICE_BIG_ENDIAN) {
3160 val = bswap16(val);
3161 }
3162 #endif
3163 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3164 } else {
3165 /* RAM case */
3166 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3167 ptr = qemu_get_ram_ptr(addr1);
3168 switch (endian) {
3169 case DEVICE_LITTLE_ENDIAN:
3170 stw_le_p(ptr, val);
3171 break;
3172 case DEVICE_BIG_ENDIAN:
3173 stw_be_p(ptr, val);
3174 break;
3175 default:
3176 stw_p(ptr, val);
3177 break;
3178 }
3179 invalidate_and_set_dirty(mr, addr1, 2);
3180 r = MEMTX_OK;
3181 }
3182 if (result) {
3183 *result = r;
3184 }
3185 rcu_read_unlock();
3186 }
3187
3188 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3189 MemTxAttrs attrs, MemTxResult *result)
3190 {
3191 address_space_stw_internal(as, addr, val, attrs, result,
3192 DEVICE_NATIVE_ENDIAN);
3193 }
3194
3195 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3196 MemTxAttrs attrs, MemTxResult *result)
3197 {
3198 address_space_stw_internal(as, addr, val, attrs, result,
3199 DEVICE_LITTLE_ENDIAN);
3200 }
3201
3202 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3203 MemTxAttrs attrs, MemTxResult *result)
3204 {
3205 address_space_stw_internal(as, addr, val, attrs, result,
3206 DEVICE_BIG_ENDIAN);
3207 }
3208
3209 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3210 {
3211 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3212 }
3213
3214 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3215 {
3216 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3217 }
3218
3219 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3220 {
3221 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3222 }
3223
3224 /* XXX: optimize */
3225 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3226 MemTxAttrs attrs, MemTxResult *result)
3227 {
3228 MemTxResult r;
3229 val = tswap64(val);
3230 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3231 if (result) {
3232 *result = r;
3233 }
3234 }
3235
3236 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3237 MemTxAttrs attrs, MemTxResult *result)
3238 {
3239 MemTxResult r;
3240 val = cpu_to_le64(val);
3241 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3242 if (result) {
3243 *result = r;
3244 }
3245 }
3246 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3247 MemTxAttrs attrs, MemTxResult *result)
3248 {
3249 MemTxResult r;
3250 val = cpu_to_be64(val);
3251 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3252 if (result) {
3253 *result = r;
3254 }
3255 }
3256
3257 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3258 {
3259 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3260 }
3261
3262 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3263 {
3264 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3265 }
3266
3267 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3268 {
3269 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3270 }
3271
3272 /* virtual memory access for debug (includes writing to ROM) */
3273 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3274 uint8_t *buf, int len, int is_write)
3275 {
3276 int l;
3277 hwaddr phys_addr;
3278 target_ulong page;
3279
3280 while (len > 0) {
3281 page = addr & TARGET_PAGE_MASK;
3282 phys_addr = cpu_get_phys_page_debug(cpu, page);
3283 /* if no physical page mapped, return an error */
3284 if (phys_addr == -1)
3285 return -1;
3286 l = (page + TARGET_PAGE_SIZE) - addr;
3287 if (l > len)
3288 l = len;
3289 phys_addr += (addr & ~TARGET_PAGE_MASK);
3290 if (is_write) {
3291 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3292 } else {
3293 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3294 buf, l, 0);
3295 }
3296 len -= l;
3297 buf += l;
3298 addr += l;
3299 }
3300 return 0;
3301 }
3302 #endif
3303
3304 /*
3305 * A helper function for the _utterly broken_ virtio device model to find out if
3306 * it's running on a big endian machine. Don't do this at home kids!
3307 */
3308 bool target_words_bigendian(void);
3309 bool target_words_bigendian(void)
3310 {
3311 #if defined(TARGET_WORDS_BIGENDIAN)
3312 return true;
3313 #else
3314 return false;
3315 #endif
3316 }
3317
3318 #ifndef CONFIG_USER_ONLY
3319 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3320 {
3321 MemoryRegion*mr;
3322 hwaddr l = 1;
3323 bool res;
3324
3325 rcu_read_lock();
3326 mr = address_space_translate(&address_space_memory,
3327 phys_addr, &phys_addr, &l, false);
3328
3329 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3330 rcu_read_unlock();
3331 return res;
3332 }
3333
3334 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3335 {
3336 RAMBlock *block;
3337
3338 rcu_read_lock();
3339 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3340 func(block->host, block->offset, block->used_length, opaque);
3341 }
3342 rcu_read_unlock();
3343 }
3344 #endif