]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec: optimize phys_page_set_level
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
53
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
56
57 #include "qemu/range.h"
58
59 //#define DEBUG_SUBPAGE
60
61 #if !defined(CONFIG_USER_ONLY)
62 static bool in_migration;
63
64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
66 */
67 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
68
69 static MemoryRegion *system_memory;
70 static MemoryRegion *system_io;
71
72 AddressSpace address_space_io;
73 AddressSpace address_space_memory;
74
75 MemoryRegion io_mem_rom, io_mem_notdirty;
76 static MemoryRegion io_mem_unassigned;
77
78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79 #define RAM_PREALLOC (1 << 0)
80
81 /* RAM is mmap-ed with MAP_SHARED */
82 #define RAM_SHARED (1 << 1)
83
84 /* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
86 */
87 #define RAM_RESIZEABLE (1 << 2)
88
89 #endif
90
91 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
92 /* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
94 DEFINE_TLS(CPUState *, current_cpu);
95 /* 0 = Do not count executed instructions.
96 1 = Precise instruction counting.
97 2 = Adaptive rate instruction counting. */
98 int use_icount;
99
100 #if !defined(CONFIG_USER_ONLY)
101
102 typedef struct PhysPageEntry PhysPageEntry;
103
104 struct PhysPageEntry {
105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
106 uint32_t skip : 6;
107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
108 uint32_t ptr : 26;
109 };
110
111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112
113 /* Size of the L2 (and L3, etc) page tables. */
114 #define ADDR_SPACE_BITS 64
115
116 #define P_L2_BITS 9
117 #define P_L2_SIZE (1 << P_L2_BITS)
118
119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120
121 typedef PhysPageEntry Node[P_L2_SIZE];
122
123 typedef struct PhysPageMap {
124 struct rcu_head rcu;
125
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132 } PhysPageMap;
133
134 struct AddressSpaceDispatch {
135 struct rcu_head rcu;
136
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
139 */
140 PhysPageEntry phys_map;
141 PhysPageMap map;
142 AddressSpace *as;
143 };
144
145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146 typedef struct subpage_t {
147 MemoryRegion iomem;
148 AddressSpace *as;
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151 } subpage_t;
152
153 #define PHYS_SECTION_UNASSIGNED 0
154 #define PHYS_SECTION_NOTDIRTY 1
155 #define PHYS_SECTION_ROM 2
156 #define PHYS_SECTION_WATCH 3
157
158 static void io_mem_init(void);
159 static void memory_map_init(void);
160 static void tcg_commit(MemoryListener *listener);
161
162 static MemoryRegion io_mem_watch;
163 #endif
164
165 #if !defined(CONFIG_USER_ONLY)
166
167 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
168 {
169 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
171 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
172 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
173 }
174 }
175
176 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
177 {
178 unsigned i;
179 uint32_t ret;
180 PhysPageEntry e;
181 PhysPageEntry *p;
182
183 ret = map->nodes_nb++;
184 p = map->nodes[ret];
185 assert(ret != PHYS_MAP_NODE_NIL);
186 assert(ret != map->nodes_nb_alloc);
187
188 e.skip = leaf ? 0 : 1;
189 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
190 for (i = 0; i < P_L2_SIZE; ++i) {
191 memcpy(&p[i], &e, sizeof(e));
192 }
193 return ret;
194 }
195
196 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
197 hwaddr *index, hwaddr *nb, uint16_t leaf,
198 int level)
199 {
200 PhysPageEntry *p;
201 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
202
203 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
204 lp->ptr = phys_map_node_alloc(map, level == 0);
205 }
206 p = map->nodes[lp->ptr];
207 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
208
209 while (*nb && lp < &p[P_L2_SIZE]) {
210 if ((*index & (step - 1)) == 0 && *nb >= step) {
211 lp->skip = 0;
212 lp->ptr = leaf;
213 *index += step;
214 *nb -= step;
215 } else {
216 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
217 }
218 ++lp;
219 }
220 }
221
222 static void phys_page_set(AddressSpaceDispatch *d,
223 hwaddr index, hwaddr nb,
224 uint16_t leaf)
225 {
226 /* Wildly overreserve - it doesn't matter much. */
227 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
228
229 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
230 }
231
232 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
233 * and update our entry so we can skip it and go directly to the destination.
234 */
235 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
236 {
237 unsigned valid_ptr = P_L2_SIZE;
238 int valid = 0;
239 PhysPageEntry *p;
240 int i;
241
242 if (lp->ptr == PHYS_MAP_NODE_NIL) {
243 return;
244 }
245
246 p = nodes[lp->ptr];
247 for (i = 0; i < P_L2_SIZE; i++) {
248 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
249 continue;
250 }
251
252 valid_ptr = i;
253 valid++;
254 if (p[i].skip) {
255 phys_page_compact(&p[i], nodes, compacted);
256 }
257 }
258
259 /* We can only compress if there's only one child. */
260 if (valid != 1) {
261 return;
262 }
263
264 assert(valid_ptr < P_L2_SIZE);
265
266 /* Don't compress if it won't fit in the # of bits we have. */
267 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
268 return;
269 }
270
271 lp->ptr = p[valid_ptr].ptr;
272 if (!p[valid_ptr].skip) {
273 /* If our only child is a leaf, make this a leaf. */
274 /* By design, we should have made this node a leaf to begin with so we
275 * should never reach here.
276 * But since it's so simple to handle this, let's do it just in case we
277 * change this rule.
278 */
279 lp->skip = 0;
280 } else {
281 lp->skip += p[valid_ptr].skip;
282 }
283 }
284
285 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
286 {
287 DECLARE_BITMAP(compacted, nodes_nb);
288
289 if (d->phys_map.skip) {
290 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
291 }
292 }
293
294 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
295 Node *nodes, MemoryRegionSection *sections)
296 {
297 PhysPageEntry *p;
298 hwaddr index = addr >> TARGET_PAGE_BITS;
299 int i;
300
301 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
302 if (lp.ptr == PHYS_MAP_NODE_NIL) {
303 return &sections[PHYS_SECTION_UNASSIGNED];
304 }
305 p = nodes[lp.ptr];
306 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
307 }
308
309 if (sections[lp.ptr].size.hi ||
310 range_covers_byte(sections[lp.ptr].offset_within_address_space,
311 sections[lp.ptr].size.lo, addr)) {
312 return &sections[lp.ptr];
313 } else {
314 return &sections[PHYS_SECTION_UNASSIGNED];
315 }
316 }
317
318 bool memory_region_is_unassigned(MemoryRegion *mr)
319 {
320 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
321 && mr != &io_mem_watch;
322 }
323
324 /* Called from RCU critical section */
325 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
326 hwaddr addr,
327 bool resolve_subpage)
328 {
329 MemoryRegionSection *section;
330 subpage_t *subpage;
331
332 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
333 if (resolve_subpage && section->mr->subpage) {
334 subpage = container_of(section->mr, subpage_t, iomem);
335 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
336 }
337 return section;
338 }
339
340 /* Called from RCU critical section */
341 static MemoryRegionSection *
342 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
343 hwaddr *plen, bool resolve_subpage)
344 {
345 MemoryRegionSection *section;
346 Int128 diff;
347
348 section = address_space_lookup_region(d, addr, resolve_subpage);
349 /* Compute offset within MemoryRegionSection */
350 addr -= section->offset_within_address_space;
351
352 /* Compute offset within MemoryRegion */
353 *xlat = addr + section->offset_within_region;
354
355 diff = int128_sub(section->mr->size, int128_make64(addr));
356 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
357 return section;
358 }
359
360 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
361 {
362 if (memory_region_is_ram(mr)) {
363 return !(is_write && mr->readonly);
364 }
365 if (memory_region_is_romd(mr)) {
366 return !is_write;
367 }
368
369 return false;
370 }
371
372 /* Called from RCU critical section */
373 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
374 hwaddr *xlat, hwaddr *plen,
375 bool is_write)
376 {
377 IOMMUTLBEntry iotlb;
378 MemoryRegionSection *section;
379 MemoryRegion *mr;
380
381 for (;;) {
382 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
383 section = address_space_translate_internal(d, addr, &addr, plen, true);
384 mr = section->mr;
385
386 if (!mr->iommu_ops) {
387 break;
388 }
389
390 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
391 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
392 | (addr & iotlb.addr_mask));
393 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
394 if (!(iotlb.perm & (1 << is_write))) {
395 mr = &io_mem_unassigned;
396 break;
397 }
398
399 as = iotlb.target_as;
400 }
401
402 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
403 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
404 *plen = MIN(page, *plen);
405 }
406
407 *xlat = addr;
408 return mr;
409 }
410
411 /* Called from RCU critical section */
412 MemoryRegionSection *
413 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
414 hwaddr *xlat, hwaddr *plen)
415 {
416 MemoryRegionSection *section;
417 section = address_space_translate_internal(cpu->memory_dispatch,
418 addr, xlat, plen, false);
419
420 assert(!section->mr->iommu_ops);
421 return section;
422 }
423 #endif
424
425 #if !defined(CONFIG_USER_ONLY)
426
427 static int cpu_common_post_load(void *opaque, int version_id)
428 {
429 CPUState *cpu = opaque;
430
431 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
432 version_id is increased. */
433 cpu->interrupt_request &= ~0x01;
434 tlb_flush(cpu, 1);
435
436 return 0;
437 }
438
439 static int cpu_common_pre_load(void *opaque)
440 {
441 CPUState *cpu = opaque;
442
443 cpu->exception_index = -1;
444
445 return 0;
446 }
447
448 static bool cpu_common_exception_index_needed(void *opaque)
449 {
450 CPUState *cpu = opaque;
451
452 return tcg_enabled() && cpu->exception_index != -1;
453 }
454
455 static const VMStateDescription vmstate_cpu_common_exception_index = {
456 .name = "cpu_common/exception_index",
457 .version_id = 1,
458 .minimum_version_id = 1,
459 .fields = (VMStateField[]) {
460 VMSTATE_INT32(exception_index, CPUState),
461 VMSTATE_END_OF_LIST()
462 }
463 };
464
465 const VMStateDescription vmstate_cpu_common = {
466 .name = "cpu_common",
467 .version_id = 1,
468 .minimum_version_id = 1,
469 .pre_load = cpu_common_pre_load,
470 .post_load = cpu_common_post_load,
471 .fields = (VMStateField[]) {
472 VMSTATE_UINT32(halted, CPUState),
473 VMSTATE_UINT32(interrupt_request, CPUState),
474 VMSTATE_END_OF_LIST()
475 },
476 .subsections = (VMStateSubsection[]) {
477 {
478 .vmsd = &vmstate_cpu_common_exception_index,
479 .needed = cpu_common_exception_index_needed,
480 } , {
481 /* empty */
482 }
483 }
484 };
485
486 #endif
487
488 CPUState *qemu_get_cpu(int index)
489 {
490 CPUState *cpu;
491
492 CPU_FOREACH(cpu) {
493 if (cpu->cpu_index == index) {
494 return cpu;
495 }
496 }
497
498 return NULL;
499 }
500
501 #if !defined(CONFIG_USER_ONLY)
502 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
503 {
504 /* We only support one address space per cpu at the moment. */
505 assert(cpu->as == as);
506
507 if (cpu->tcg_as_listener) {
508 memory_listener_unregister(cpu->tcg_as_listener);
509 } else {
510 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
511 }
512 cpu->tcg_as_listener->commit = tcg_commit;
513 memory_listener_register(cpu->tcg_as_listener, as);
514 }
515 #endif
516
517 void cpu_exec_init(CPUArchState *env)
518 {
519 CPUState *cpu = ENV_GET_CPU(env);
520 CPUClass *cc = CPU_GET_CLASS(cpu);
521 CPUState *some_cpu;
522 int cpu_index;
523
524 #if defined(CONFIG_USER_ONLY)
525 cpu_list_lock();
526 #endif
527 cpu_index = 0;
528 CPU_FOREACH(some_cpu) {
529 cpu_index++;
530 }
531 cpu->cpu_index = cpu_index;
532 cpu->numa_node = 0;
533 QTAILQ_INIT(&cpu->breakpoints);
534 QTAILQ_INIT(&cpu->watchpoints);
535 #ifndef CONFIG_USER_ONLY
536 cpu->as = &address_space_memory;
537 cpu->thread_id = qemu_get_thread_id();
538 cpu_reload_memory_map(cpu);
539 #endif
540 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
541 #if defined(CONFIG_USER_ONLY)
542 cpu_list_unlock();
543 #endif
544 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
545 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
546 }
547 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
548 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
549 cpu_save, cpu_load, env);
550 assert(cc->vmsd == NULL);
551 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
552 #endif
553 if (cc->vmsd != NULL) {
554 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
555 }
556 }
557
558 #if defined(CONFIG_USER_ONLY)
559 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 {
561 tb_invalidate_phys_page_range(pc, pc + 1, 0);
562 }
563 #else
564 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
565 {
566 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
567 if (phys != -1) {
568 tb_invalidate_phys_addr(cpu->as,
569 phys | (pc & ~TARGET_PAGE_MASK));
570 }
571 }
572 #endif
573
574 #if defined(CONFIG_USER_ONLY)
575 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
576
577 {
578 }
579
580 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
581 int flags)
582 {
583 return -ENOSYS;
584 }
585
586 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
587 {
588 }
589
590 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
591 int flags, CPUWatchpoint **watchpoint)
592 {
593 return -ENOSYS;
594 }
595 #else
596 /* Add a watchpoint. */
597 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
598 int flags, CPUWatchpoint **watchpoint)
599 {
600 CPUWatchpoint *wp;
601
602 /* forbid ranges which are empty or run off the end of the address space */
603 if (len == 0 || (addr + len - 1) < addr) {
604 error_report("tried to set invalid watchpoint at %"
605 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
606 return -EINVAL;
607 }
608 wp = g_malloc(sizeof(*wp));
609
610 wp->vaddr = addr;
611 wp->len = len;
612 wp->flags = flags;
613
614 /* keep all GDB-injected watchpoints in front */
615 if (flags & BP_GDB) {
616 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
617 } else {
618 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
619 }
620
621 tlb_flush_page(cpu, addr);
622
623 if (watchpoint)
624 *watchpoint = wp;
625 return 0;
626 }
627
628 /* Remove a specific watchpoint. */
629 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
630 int flags)
631 {
632 CPUWatchpoint *wp;
633
634 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
635 if (addr == wp->vaddr && len == wp->len
636 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
637 cpu_watchpoint_remove_by_ref(cpu, wp);
638 return 0;
639 }
640 }
641 return -ENOENT;
642 }
643
644 /* Remove a specific watchpoint by reference. */
645 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
646 {
647 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
648
649 tlb_flush_page(cpu, watchpoint->vaddr);
650
651 g_free(watchpoint);
652 }
653
654 /* Remove all matching watchpoints. */
655 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
656 {
657 CPUWatchpoint *wp, *next;
658
659 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
660 if (wp->flags & mask) {
661 cpu_watchpoint_remove_by_ref(cpu, wp);
662 }
663 }
664 }
665
666 /* Return true if this watchpoint address matches the specified
667 * access (ie the address range covered by the watchpoint overlaps
668 * partially or completely with the address range covered by the
669 * access).
670 */
671 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
672 vaddr addr,
673 vaddr len)
674 {
675 /* We know the lengths are non-zero, but a little caution is
676 * required to avoid errors in the case where the range ends
677 * exactly at the top of the address space and so addr + len
678 * wraps round to zero.
679 */
680 vaddr wpend = wp->vaddr + wp->len - 1;
681 vaddr addrend = addr + len - 1;
682
683 return !(addr > wpend || wp->vaddr > addrend);
684 }
685
686 #endif
687
688 /* Add a breakpoint. */
689 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
690 CPUBreakpoint **breakpoint)
691 {
692 CPUBreakpoint *bp;
693
694 bp = g_malloc(sizeof(*bp));
695
696 bp->pc = pc;
697 bp->flags = flags;
698
699 /* keep all GDB-injected breakpoints in front */
700 if (flags & BP_GDB) {
701 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
702 } else {
703 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
704 }
705
706 breakpoint_invalidate(cpu, pc);
707
708 if (breakpoint) {
709 *breakpoint = bp;
710 }
711 return 0;
712 }
713
714 /* Remove a specific breakpoint. */
715 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
716 {
717 CPUBreakpoint *bp;
718
719 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
720 if (bp->pc == pc && bp->flags == flags) {
721 cpu_breakpoint_remove_by_ref(cpu, bp);
722 return 0;
723 }
724 }
725 return -ENOENT;
726 }
727
728 /* Remove a specific breakpoint by reference. */
729 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
730 {
731 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
732
733 breakpoint_invalidate(cpu, breakpoint->pc);
734
735 g_free(breakpoint);
736 }
737
738 /* Remove all matching breakpoints. */
739 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
740 {
741 CPUBreakpoint *bp, *next;
742
743 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
744 if (bp->flags & mask) {
745 cpu_breakpoint_remove_by_ref(cpu, bp);
746 }
747 }
748 }
749
750 /* enable or disable single step mode. EXCP_DEBUG is returned by the
751 CPU loop after each instruction */
752 void cpu_single_step(CPUState *cpu, int enabled)
753 {
754 if (cpu->singlestep_enabled != enabled) {
755 cpu->singlestep_enabled = enabled;
756 if (kvm_enabled()) {
757 kvm_update_guest_debug(cpu, 0);
758 } else {
759 /* must flush all the translated code to avoid inconsistencies */
760 /* XXX: only flush what is necessary */
761 CPUArchState *env = cpu->env_ptr;
762 tb_flush(env);
763 }
764 }
765 }
766
767 void cpu_abort(CPUState *cpu, const char *fmt, ...)
768 {
769 va_list ap;
770 va_list ap2;
771
772 va_start(ap, fmt);
773 va_copy(ap2, ap);
774 fprintf(stderr, "qemu: fatal: ");
775 vfprintf(stderr, fmt, ap);
776 fprintf(stderr, "\n");
777 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
778 if (qemu_log_enabled()) {
779 qemu_log("qemu: fatal: ");
780 qemu_log_vprintf(fmt, ap2);
781 qemu_log("\n");
782 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
783 qemu_log_flush();
784 qemu_log_close();
785 }
786 va_end(ap2);
787 va_end(ap);
788 #if defined(CONFIG_USER_ONLY)
789 {
790 struct sigaction act;
791 sigfillset(&act.sa_mask);
792 act.sa_handler = SIG_DFL;
793 sigaction(SIGABRT, &act, NULL);
794 }
795 #endif
796 abort();
797 }
798
799 #if !defined(CONFIG_USER_ONLY)
800 /* Called from RCU critical section */
801 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
802 {
803 RAMBlock *block;
804
805 block = atomic_rcu_read(&ram_list.mru_block);
806 if (block && addr - block->offset < block->max_length) {
807 goto found;
808 }
809 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
810 if (addr - block->offset < block->max_length) {
811 goto found;
812 }
813 }
814
815 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
816 abort();
817
818 found:
819 /* It is safe to write mru_block outside the iothread lock. This
820 * is what happens:
821 *
822 * mru_block = xxx
823 * rcu_read_unlock()
824 * xxx removed from list
825 * rcu_read_lock()
826 * read mru_block
827 * mru_block = NULL;
828 * call_rcu(reclaim_ramblock, xxx);
829 * rcu_read_unlock()
830 *
831 * atomic_rcu_set is not needed here. The block was already published
832 * when it was placed into the list. Here we're just making an extra
833 * copy of the pointer.
834 */
835 ram_list.mru_block = block;
836 return block;
837 }
838
839 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
840 {
841 ram_addr_t start1;
842 RAMBlock *block;
843 ram_addr_t end;
844
845 end = TARGET_PAGE_ALIGN(start + length);
846 start &= TARGET_PAGE_MASK;
847
848 rcu_read_lock();
849 block = qemu_get_ram_block(start);
850 assert(block == qemu_get_ram_block(end - 1));
851 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
852 cpu_tlb_reset_dirty_all(start1, length);
853 rcu_read_unlock();
854 }
855
856 /* Note: start and end must be within the same ram block. */
857 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
858 unsigned client)
859 {
860 if (length == 0)
861 return;
862 cpu_physical_memory_clear_dirty_range_type(start, length, client);
863
864 if (tcg_enabled()) {
865 tlb_reset_dirty_range_all(start, length);
866 }
867 }
868
869 static void cpu_physical_memory_set_dirty_tracking(bool enable)
870 {
871 in_migration = enable;
872 }
873
874 /* Called from RCU critical section */
875 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
876 MemoryRegionSection *section,
877 target_ulong vaddr,
878 hwaddr paddr, hwaddr xlat,
879 int prot,
880 target_ulong *address)
881 {
882 hwaddr iotlb;
883 CPUWatchpoint *wp;
884
885 if (memory_region_is_ram(section->mr)) {
886 /* Normal RAM. */
887 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
888 + xlat;
889 if (!section->readonly) {
890 iotlb |= PHYS_SECTION_NOTDIRTY;
891 } else {
892 iotlb |= PHYS_SECTION_ROM;
893 }
894 } else {
895 iotlb = section - section->address_space->dispatch->map.sections;
896 iotlb += xlat;
897 }
898
899 /* Make accesses to pages with watchpoints go via the
900 watchpoint trap routines. */
901 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
902 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
903 /* Avoid trapping reads of pages with a write breakpoint. */
904 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
905 iotlb = PHYS_SECTION_WATCH + paddr;
906 *address |= TLB_MMIO;
907 break;
908 }
909 }
910 }
911
912 return iotlb;
913 }
914 #endif /* defined(CONFIG_USER_ONLY) */
915
916 #if !defined(CONFIG_USER_ONLY)
917
918 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
919 uint16_t section);
920 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
921
922 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
923 qemu_anon_ram_alloc;
924
925 /*
926 * Set a custom physical guest memory alloator.
927 * Accelerators with unusual needs may need this. Hopefully, we can
928 * get rid of it eventually.
929 */
930 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
931 {
932 phys_mem_alloc = alloc;
933 }
934
935 static uint16_t phys_section_add(PhysPageMap *map,
936 MemoryRegionSection *section)
937 {
938 /* The physical section number is ORed with a page-aligned
939 * pointer to produce the iotlb entries. Thus it should
940 * never overflow into the page-aligned value.
941 */
942 assert(map->sections_nb < TARGET_PAGE_SIZE);
943
944 if (map->sections_nb == map->sections_nb_alloc) {
945 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
946 map->sections = g_renew(MemoryRegionSection, map->sections,
947 map->sections_nb_alloc);
948 }
949 map->sections[map->sections_nb] = *section;
950 memory_region_ref(section->mr);
951 return map->sections_nb++;
952 }
953
954 static void phys_section_destroy(MemoryRegion *mr)
955 {
956 memory_region_unref(mr);
957
958 if (mr->subpage) {
959 subpage_t *subpage = container_of(mr, subpage_t, iomem);
960 object_unref(OBJECT(&subpage->iomem));
961 g_free(subpage);
962 }
963 }
964
965 static void phys_sections_free(PhysPageMap *map)
966 {
967 while (map->sections_nb > 0) {
968 MemoryRegionSection *section = &map->sections[--map->sections_nb];
969 phys_section_destroy(section->mr);
970 }
971 g_free(map->sections);
972 g_free(map->nodes);
973 }
974
975 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
976 {
977 subpage_t *subpage;
978 hwaddr base = section->offset_within_address_space
979 & TARGET_PAGE_MASK;
980 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
981 d->map.nodes, d->map.sections);
982 MemoryRegionSection subsection = {
983 .offset_within_address_space = base,
984 .size = int128_make64(TARGET_PAGE_SIZE),
985 };
986 hwaddr start, end;
987
988 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
989
990 if (!(existing->mr->subpage)) {
991 subpage = subpage_init(d->as, base);
992 subsection.address_space = d->as;
993 subsection.mr = &subpage->iomem;
994 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
995 phys_section_add(&d->map, &subsection));
996 } else {
997 subpage = container_of(existing->mr, subpage_t, iomem);
998 }
999 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1000 end = start + int128_get64(section->size) - 1;
1001 subpage_register(subpage, start, end,
1002 phys_section_add(&d->map, section));
1003 }
1004
1005
1006 static void register_multipage(AddressSpaceDispatch *d,
1007 MemoryRegionSection *section)
1008 {
1009 hwaddr start_addr = section->offset_within_address_space;
1010 uint16_t section_index = phys_section_add(&d->map, section);
1011 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1012 TARGET_PAGE_BITS));
1013
1014 assert(num_pages);
1015 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1016 }
1017
1018 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1019 {
1020 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1021 AddressSpaceDispatch *d = as->next_dispatch;
1022 MemoryRegionSection now = *section, remain = *section;
1023 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1024
1025 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1026 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1027 - now.offset_within_address_space;
1028
1029 now.size = int128_min(int128_make64(left), now.size);
1030 register_subpage(d, &now);
1031 } else {
1032 now.size = int128_zero();
1033 }
1034 while (int128_ne(remain.size, now.size)) {
1035 remain.size = int128_sub(remain.size, now.size);
1036 remain.offset_within_address_space += int128_get64(now.size);
1037 remain.offset_within_region += int128_get64(now.size);
1038 now = remain;
1039 if (int128_lt(remain.size, page_size)) {
1040 register_subpage(d, &now);
1041 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1042 now.size = page_size;
1043 register_subpage(d, &now);
1044 } else {
1045 now.size = int128_and(now.size, int128_neg(page_size));
1046 register_multipage(d, &now);
1047 }
1048 }
1049 }
1050
1051 void qemu_flush_coalesced_mmio_buffer(void)
1052 {
1053 if (kvm_enabled())
1054 kvm_flush_coalesced_mmio_buffer();
1055 }
1056
1057 void qemu_mutex_lock_ramlist(void)
1058 {
1059 qemu_mutex_lock(&ram_list.mutex);
1060 }
1061
1062 void qemu_mutex_unlock_ramlist(void)
1063 {
1064 qemu_mutex_unlock(&ram_list.mutex);
1065 }
1066
1067 #ifdef __linux__
1068
1069 #include <sys/vfs.h>
1070
1071 #define HUGETLBFS_MAGIC 0x958458f6
1072
1073 static long gethugepagesize(const char *path, Error **errp)
1074 {
1075 struct statfs fs;
1076 int ret;
1077
1078 do {
1079 ret = statfs(path, &fs);
1080 } while (ret != 0 && errno == EINTR);
1081
1082 if (ret != 0) {
1083 error_setg_errno(errp, errno, "failed to get page size of file %s",
1084 path);
1085 return 0;
1086 }
1087
1088 if (fs.f_type != HUGETLBFS_MAGIC)
1089 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1090
1091 return fs.f_bsize;
1092 }
1093
1094 static void *file_ram_alloc(RAMBlock *block,
1095 ram_addr_t memory,
1096 const char *path,
1097 Error **errp)
1098 {
1099 char *filename;
1100 char *sanitized_name;
1101 char *c;
1102 void *area = NULL;
1103 int fd;
1104 uint64_t hpagesize;
1105 Error *local_err = NULL;
1106
1107 hpagesize = gethugepagesize(path, &local_err);
1108 if (local_err) {
1109 error_propagate(errp, local_err);
1110 goto error;
1111 }
1112 block->mr->align = hpagesize;
1113
1114 if (memory < hpagesize) {
1115 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1116 "or larger than huge page size 0x%" PRIx64,
1117 memory, hpagesize);
1118 goto error;
1119 }
1120
1121 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1122 error_setg(errp,
1123 "host lacks kvm mmu notifiers, -mem-path unsupported");
1124 goto error;
1125 }
1126
1127 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1128 sanitized_name = g_strdup(memory_region_name(block->mr));
1129 for (c = sanitized_name; *c != '\0'; c++) {
1130 if (*c == '/')
1131 *c = '_';
1132 }
1133
1134 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1135 sanitized_name);
1136 g_free(sanitized_name);
1137
1138 fd = mkstemp(filename);
1139 if (fd < 0) {
1140 error_setg_errno(errp, errno,
1141 "unable to create backing store for hugepages");
1142 g_free(filename);
1143 goto error;
1144 }
1145 unlink(filename);
1146 g_free(filename);
1147
1148 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1149
1150 /*
1151 * ftruncate is not supported by hugetlbfs in older
1152 * hosts, so don't bother bailing out on errors.
1153 * If anything goes wrong with it under other filesystems,
1154 * mmap will fail.
1155 */
1156 if (ftruncate(fd, memory)) {
1157 perror("ftruncate");
1158 }
1159
1160 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1161 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1162 fd, 0);
1163 if (area == MAP_FAILED) {
1164 error_setg_errno(errp, errno,
1165 "unable to map backing store for hugepages");
1166 close(fd);
1167 goto error;
1168 }
1169
1170 if (mem_prealloc) {
1171 os_mem_prealloc(fd, area, memory);
1172 }
1173
1174 block->fd = fd;
1175 return area;
1176
1177 error:
1178 if (mem_prealloc) {
1179 error_report("%s", error_get_pretty(*errp));
1180 exit(1);
1181 }
1182 return NULL;
1183 }
1184 #endif
1185
1186 /* Called with the ramlist lock held. */
1187 static ram_addr_t find_ram_offset(ram_addr_t size)
1188 {
1189 RAMBlock *block, *next_block;
1190 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1191
1192 assert(size != 0); /* it would hand out same offset multiple times */
1193
1194 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1195 return 0;
1196 }
1197
1198 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1199 ram_addr_t end, next = RAM_ADDR_MAX;
1200
1201 end = block->offset + block->max_length;
1202
1203 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1204 if (next_block->offset >= end) {
1205 next = MIN(next, next_block->offset);
1206 }
1207 }
1208 if (next - end >= size && next - end < mingap) {
1209 offset = end;
1210 mingap = next - end;
1211 }
1212 }
1213
1214 if (offset == RAM_ADDR_MAX) {
1215 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1216 (uint64_t)size);
1217 abort();
1218 }
1219
1220 return offset;
1221 }
1222
1223 ram_addr_t last_ram_offset(void)
1224 {
1225 RAMBlock *block;
1226 ram_addr_t last = 0;
1227
1228 rcu_read_lock();
1229 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1230 last = MAX(last, block->offset + block->max_length);
1231 }
1232 rcu_read_unlock();
1233 return last;
1234 }
1235
1236 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1237 {
1238 int ret;
1239
1240 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1241 if (!machine_dump_guest_core(current_machine)) {
1242 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1243 if (ret) {
1244 perror("qemu_madvise");
1245 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1246 "but dump_guest_core=off specified\n");
1247 }
1248 }
1249 }
1250
1251 /* Called within an RCU critical section, or while the ramlist lock
1252 * is held.
1253 */
1254 static RAMBlock *find_ram_block(ram_addr_t addr)
1255 {
1256 RAMBlock *block;
1257
1258 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1259 if (block->offset == addr) {
1260 return block;
1261 }
1262 }
1263
1264 return NULL;
1265 }
1266
1267 /* Called with iothread lock held. */
1268 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1269 {
1270 RAMBlock *new_block, *block;
1271
1272 rcu_read_lock();
1273 new_block = find_ram_block(addr);
1274 assert(new_block);
1275 assert(!new_block->idstr[0]);
1276
1277 if (dev) {
1278 char *id = qdev_get_dev_path(dev);
1279 if (id) {
1280 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1281 g_free(id);
1282 }
1283 }
1284 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1285
1286 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1287 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1288 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1289 new_block->idstr);
1290 abort();
1291 }
1292 }
1293 rcu_read_unlock();
1294 }
1295
1296 /* Called with iothread lock held. */
1297 void qemu_ram_unset_idstr(ram_addr_t addr)
1298 {
1299 RAMBlock *block;
1300
1301 /* FIXME: arch_init.c assumes that this is not called throughout
1302 * migration. Ignore the problem since hot-unplug during migration
1303 * does not work anyway.
1304 */
1305
1306 rcu_read_lock();
1307 block = find_ram_block(addr);
1308 if (block) {
1309 memset(block->idstr, 0, sizeof(block->idstr));
1310 }
1311 rcu_read_unlock();
1312 }
1313
1314 static int memory_try_enable_merging(void *addr, size_t len)
1315 {
1316 if (!machine_mem_merge(current_machine)) {
1317 /* disabled by the user */
1318 return 0;
1319 }
1320
1321 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1322 }
1323
1324 /* Only legal before guest might have detected the memory size: e.g. on
1325 * incoming migration, or right after reset.
1326 *
1327 * As memory core doesn't know how is memory accessed, it is up to
1328 * resize callback to update device state and/or add assertions to detect
1329 * misuse, if necessary.
1330 */
1331 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1332 {
1333 RAMBlock *block = find_ram_block(base);
1334
1335 assert(block);
1336
1337 newsize = TARGET_PAGE_ALIGN(newsize);
1338
1339 if (block->used_length == newsize) {
1340 return 0;
1341 }
1342
1343 if (!(block->flags & RAM_RESIZEABLE)) {
1344 error_setg_errno(errp, EINVAL,
1345 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1346 " in != 0x" RAM_ADDR_FMT, block->idstr,
1347 newsize, block->used_length);
1348 return -EINVAL;
1349 }
1350
1351 if (block->max_length < newsize) {
1352 error_setg_errno(errp, EINVAL,
1353 "Length too large: %s: 0x" RAM_ADDR_FMT
1354 " > 0x" RAM_ADDR_FMT, block->idstr,
1355 newsize, block->max_length);
1356 return -EINVAL;
1357 }
1358
1359 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1360 block->used_length = newsize;
1361 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1362 memory_region_set_size(block->mr, newsize);
1363 if (block->resized) {
1364 block->resized(block->idstr, newsize, block->host);
1365 }
1366 return 0;
1367 }
1368
1369 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1370 {
1371 RAMBlock *block;
1372 RAMBlock *last_block = NULL;
1373 ram_addr_t old_ram_size, new_ram_size;
1374
1375 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1376
1377 qemu_mutex_lock_ramlist();
1378 new_block->offset = find_ram_offset(new_block->max_length);
1379
1380 if (!new_block->host) {
1381 if (xen_enabled()) {
1382 xen_ram_alloc(new_block->offset, new_block->max_length,
1383 new_block->mr);
1384 } else {
1385 new_block->host = phys_mem_alloc(new_block->max_length,
1386 &new_block->mr->align);
1387 if (!new_block->host) {
1388 error_setg_errno(errp, errno,
1389 "cannot set up guest memory '%s'",
1390 memory_region_name(new_block->mr));
1391 qemu_mutex_unlock_ramlist();
1392 return -1;
1393 }
1394 memory_try_enable_merging(new_block->host, new_block->max_length);
1395 }
1396 }
1397
1398 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1399 * QLIST (which has an RCU-friendly variant) does not have insertion at
1400 * tail, so save the last element in last_block.
1401 */
1402 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1403 last_block = block;
1404 if (block->max_length < new_block->max_length) {
1405 break;
1406 }
1407 }
1408 if (block) {
1409 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1410 } else if (last_block) {
1411 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1412 } else { /* list is empty */
1413 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1414 }
1415 ram_list.mru_block = NULL;
1416
1417 /* Write list before version */
1418 smp_wmb();
1419 ram_list.version++;
1420 qemu_mutex_unlock_ramlist();
1421
1422 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1423
1424 if (new_ram_size > old_ram_size) {
1425 int i;
1426
1427 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1428 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1429 ram_list.dirty_memory[i] =
1430 bitmap_zero_extend(ram_list.dirty_memory[i],
1431 old_ram_size, new_ram_size);
1432 }
1433 }
1434 cpu_physical_memory_set_dirty_range(new_block->offset,
1435 new_block->used_length);
1436
1437 if (new_block->host) {
1438 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1439 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1440 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1441 if (kvm_enabled()) {
1442 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1443 }
1444 }
1445
1446 return new_block->offset;
1447 }
1448
1449 #ifdef __linux__
1450 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1451 bool share, const char *mem_path,
1452 Error **errp)
1453 {
1454 RAMBlock *new_block;
1455 ram_addr_t addr;
1456 Error *local_err = NULL;
1457
1458 if (xen_enabled()) {
1459 error_setg(errp, "-mem-path not supported with Xen");
1460 return -1;
1461 }
1462
1463 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1464 /*
1465 * file_ram_alloc() needs to allocate just like
1466 * phys_mem_alloc, but we haven't bothered to provide
1467 * a hook there.
1468 */
1469 error_setg(errp,
1470 "-mem-path not supported with this accelerator");
1471 return -1;
1472 }
1473
1474 size = TARGET_PAGE_ALIGN(size);
1475 new_block = g_malloc0(sizeof(*new_block));
1476 new_block->mr = mr;
1477 new_block->used_length = size;
1478 new_block->max_length = size;
1479 new_block->flags = share ? RAM_SHARED : 0;
1480 new_block->host = file_ram_alloc(new_block, size,
1481 mem_path, errp);
1482 if (!new_block->host) {
1483 g_free(new_block);
1484 return -1;
1485 }
1486
1487 addr = ram_block_add(new_block, &local_err);
1488 if (local_err) {
1489 g_free(new_block);
1490 error_propagate(errp, local_err);
1491 return -1;
1492 }
1493 return addr;
1494 }
1495 #endif
1496
1497 static
1498 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1499 void (*resized)(const char*,
1500 uint64_t length,
1501 void *host),
1502 void *host, bool resizeable,
1503 MemoryRegion *mr, Error **errp)
1504 {
1505 RAMBlock *new_block;
1506 ram_addr_t addr;
1507 Error *local_err = NULL;
1508
1509 size = TARGET_PAGE_ALIGN(size);
1510 max_size = TARGET_PAGE_ALIGN(max_size);
1511 new_block = g_malloc0(sizeof(*new_block));
1512 new_block->mr = mr;
1513 new_block->resized = resized;
1514 new_block->used_length = size;
1515 new_block->max_length = max_size;
1516 assert(max_size >= size);
1517 new_block->fd = -1;
1518 new_block->host = host;
1519 if (host) {
1520 new_block->flags |= RAM_PREALLOC;
1521 }
1522 if (resizeable) {
1523 new_block->flags |= RAM_RESIZEABLE;
1524 }
1525 addr = ram_block_add(new_block, &local_err);
1526 if (local_err) {
1527 g_free(new_block);
1528 error_propagate(errp, local_err);
1529 return -1;
1530 }
1531 return addr;
1532 }
1533
1534 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1535 MemoryRegion *mr, Error **errp)
1536 {
1537 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1538 }
1539
1540 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1541 {
1542 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1543 }
1544
1545 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1546 void (*resized)(const char*,
1547 uint64_t length,
1548 void *host),
1549 MemoryRegion *mr, Error **errp)
1550 {
1551 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1552 }
1553
1554 void qemu_ram_free_from_ptr(ram_addr_t addr)
1555 {
1556 RAMBlock *block;
1557
1558 qemu_mutex_lock_ramlist();
1559 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1560 if (addr == block->offset) {
1561 QLIST_REMOVE_RCU(block, next);
1562 ram_list.mru_block = NULL;
1563 /* Write list before version */
1564 smp_wmb();
1565 ram_list.version++;
1566 g_free_rcu(block, rcu);
1567 break;
1568 }
1569 }
1570 qemu_mutex_unlock_ramlist();
1571 }
1572
1573 static void reclaim_ramblock(RAMBlock *block)
1574 {
1575 if (block->flags & RAM_PREALLOC) {
1576 ;
1577 } else if (xen_enabled()) {
1578 xen_invalidate_map_cache_entry(block->host);
1579 #ifndef _WIN32
1580 } else if (block->fd >= 0) {
1581 munmap(block->host, block->max_length);
1582 close(block->fd);
1583 #endif
1584 } else {
1585 qemu_anon_ram_free(block->host, block->max_length);
1586 }
1587 g_free(block);
1588 }
1589
1590 void qemu_ram_free(ram_addr_t addr)
1591 {
1592 RAMBlock *block;
1593
1594 qemu_mutex_lock_ramlist();
1595 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1596 if (addr == block->offset) {
1597 QLIST_REMOVE_RCU(block, next);
1598 ram_list.mru_block = NULL;
1599 /* Write list before version */
1600 smp_wmb();
1601 ram_list.version++;
1602 call_rcu(block, reclaim_ramblock, rcu);
1603 break;
1604 }
1605 }
1606 qemu_mutex_unlock_ramlist();
1607 }
1608
1609 #ifndef _WIN32
1610 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1611 {
1612 RAMBlock *block;
1613 ram_addr_t offset;
1614 int flags;
1615 void *area, *vaddr;
1616
1617 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1618 offset = addr - block->offset;
1619 if (offset < block->max_length) {
1620 vaddr = ramblock_ptr(block, offset);
1621 if (block->flags & RAM_PREALLOC) {
1622 ;
1623 } else if (xen_enabled()) {
1624 abort();
1625 } else {
1626 flags = MAP_FIXED;
1627 if (block->fd >= 0) {
1628 flags |= (block->flags & RAM_SHARED ?
1629 MAP_SHARED : MAP_PRIVATE);
1630 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1631 flags, block->fd, offset);
1632 } else {
1633 /*
1634 * Remap needs to match alloc. Accelerators that
1635 * set phys_mem_alloc never remap. If they did,
1636 * we'd need a remap hook here.
1637 */
1638 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1639
1640 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1641 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1642 flags, -1, 0);
1643 }
1644 if (area != vaddr) {
1645 fprintf(stderr, "Could not remap addr: "
1646 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1647 length, addr);
1648 exit(1);
1649 }
1650 memory_try_enable_merging(vaddr, length);
1651 qemu_ram_setup_dump(vaddr, length);
1652 }
1653 }
1654 }
1655 }
1656 #endif /* !_WIN32 */
1657
1658 int qemu_get_ram_fd(ram_addr_t addr)
1659 {
1660 RAMBlock *block;
1661 int fd;
1662
1663 rcu_read_lock();
1664 block = qemu_get_ram_block(addr);
1665 fd = block->fd;
1666 rcu_read_unlock();
1667 return fd;
1668 }
1669
1670 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1671 {
1672 RAMBlock *block;
1673 void *ptr;
1674
1675 rcu_read_lock();
1676 block = qemu_get_ram_block(addr);
1677 ptr = ramblock_ptr(block, 0);
1678 rcu_read_unlock();
1679 return ptr;
1680 }
1681
1682 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1683 * This should not be used for general purpose DMA. Use address_space_map
1684 * or address_space_rw instead. For local memory (e.g. video ram) that the
1685 * device owns, use memory_region_get_ram_ptr.
1686 *
1687 * By the time this function returns, the returned pointer is not protected
1688 * by RCU anymore. If the caller is not within an RCU critical section and
1689 * does not hold the iothread lock, it must have other means of protecting the
1690 * pointer, such as a reference to the region that includes the incoming
1691 * ram_addr_t.
1692 */
1693 void *qemu_get_ram_ptr(ram_addr_t addr)
1694 {
1695 RAMBlock *block;
1696 void *ptr;
1697
1698 rcu_read_lock();
1699 block = qemu_get_ram_block(addr);
1700
1701 if (xen_enabled() && block->host == NULL) {
1702 /* We need to check if the requested address is in the RAM
1703 * because we don't want to map the entire memory in QEMU.
1704 * In that case just map until the end of the page.
1705 */
1706 if (block->offset == 0) {
1707 ptr = xen_map_cache(addr, 0, 0);
1708 goto unlock;
1709 }
1710
1711 block->host = xen_map_cache(block->offset, block->max_length, 1);
1712 }
1713 ptr = ramblock_ptr(block, addr - block->offset);
1714
1715 unlock:
1716 rcu_read_unlock();
1717 return ptr;
1718 }
1719
1720 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1721 * but takes a size argument.
1722 *
1723 * By the time this function returns, the returned pointer is not protected
1724 * by RCU anymore. If the caller is not within an RCU critical section and
1725 * does not hold the iothread lock, it must have other means of protecting the
1726 * pointer, such as a reference to the region that includes the incoming
1727 * ram_addr_t.
1728 */
1729 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1730 {
1731 void *ptr;
1732 if (*size == 0) {
1733 return NULL;
1734 }
1735 if (xen_enabled()) {
1736 return xen_map_cache(addr, *size, 1);
1737 } else {
1738 RAMBlock *block;
1739 rcu_read_lock();
1740 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1741 if (addr - block->offset < block->max_length) {
1742 if (addr - block->offset + *size > block->max_length)
1743 *size = block->max_length - addr + block->offset;
1744 ptr = ramblock_ptr(block, addr - block->offset);
1745 rcu_read_unlock();
1746 return ptr;
1747 }
1748 }
1749
1750 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1751 abort();
1752 }
1753 }
1754
1755 /* Some of the softmmu routines need to translate from a host pointer
1756 * (typically a TLB entry) back to a ram offset.
1757 *
1758 * By the time this function returns, the returned pointer is not protected
1759 * by RCU anymore. If the caller is not within an RCU critical section and
1760 * does not hold the iothread lock, it must have other means of protecting the
1761 * pointer, such as a reference to the region that includes the incoming
1762 * ram_addr_t.
1763 */
1764 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1765 {
1766 RAMBlock *block;
1767 uint8_t *host = ptr;
1768 MemoryRegion *mr;
1769
1770 if (xen_enabled()) {
1771 rcu_read_lock();
1772 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1773 mr = qemu_get_ram_block(*ram_addr)->mr;
1774 rcu_read_unlock();
1775 return mr;
1776 }
1777
1778 rcu_read_lock();
1779 block = atomic_rcu_read(&ram_list.mru_block);
1780 if (block && block->host && host - block->host < block->max_length) {
1781 goto found;
1782 }
1783
1784 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1785 /* This case append when the block is not mapped. */
1786 if (block->host == NULL) {
1787 continue;
1788 }
1789 if (host - block->host < block->max_length) {
1790 goto found;
1791 }
1792 }
1793
1794 rcu_read_unlock();
1795 return NULL;
1796
1797 found:
1798 *ram_addr = block->offset + (host - block->host);
1799 mr = block->mr;
1800 rcu_read_unlock();
1801 return mr;
1802 }
1803
1804 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1805 uint64_t val, unsigned size)
1806 {
1807 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1808 tb_invalidate_phys_page_fast(ram_addr, size);
1809 }
1810 switch (size) {
1811 case 1:
1812 stb_p(qemu_get_ram_ptr(ram_addr), val);
1813 break;
1814 case 2:
1815 stw_p(qemu_get_ram_ptr(ram_addr), val);
1816 break;
1817 case 4:
1818 stl_p(qemu_get_ram_ptr(ram_addr), val);
1819 break;
1820 default:
1821 abort();
1822 }
1823 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1824 /* we remove the notdirty callback only if the code has been
1825 flushed */
1826 if (!cpu_physical_memory_is_clean(ram_addr)) {
1827 CPUArchState *env = current_cpu->env_ptr;
1828 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1829 }
1830 }
1831
1832 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1833 unsigned size, bool is_write)
1834 {
1835 return is_write;
1836 }
1837
1838 static const MemoryRegionOps notdirty_mem_ops = {
1839 .write = notdirty_mem_write,
1840 .valid.accepts = notdirty_mem_accepts,
1841 .endianness = DEVICE_NATIVE_ENDIAN,
1842 };
1843
1844 /* Generate a debug exception if a watchpoint has been hit. */
1845 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1846 {
1847 CPUState *cpu = current_cpu;
1848 CPUArchState *env = cpu->env_ptr;
1849 target_ulong pc, cs_base;
1850 target_ulong vaddr;
1851 CPUWatchpoint *wp;
1852 int cpu_flags;
1853
1854 if (cpu->watchpoint_hit) {
1855 /* We re-entered the check after replacing the TB. Now raise
1856 * the debug interrupt so that is will trigger after the
1857 * current instruction. */
1858 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1859 return;
1860 }
1861 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1862 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1863 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1864 && (wp->flags & flags)) {
1865 if (flags == BP_MEM_READ) {
1866 wp->flags |= BP_WATCHPOINT_HIT_READ;
1867 } else {
1868 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1869 }
1870 wp->hitaddr = vaddr;
1871 wp->hitattrs = attrs;
1872 if (!cpu->watchpoint_hit) {
1873 cpu->watchpoint_hit = wp;
1874 tb_check_watchpoint(cpu);
1875 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1876 cpu->exception_index = EXCP_DEBUG;
1877 cpu_loop_exit(cpu);
1878 } else {
1879 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1880 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1881 cpu_resume_from_signal(cpu, NULL);
1882 }
1883 }
1884 } else {
1885 wp->flags &= ~BP_WATCHPOINT_HIT;
1886 }
1887 }
1888 }
1889
1890 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1891 so these check for a hit then pass through to the normal out-of-line
1892 phys routines. */
1893 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1894 unsigned size, MemTxAttrs attrs)
1895 {
1896 MemTxResult res;
1897 uint64_t data;
1898
1899 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1900 switch (size) {
1901 case 1:
1902 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1903 break;
1904 case 2:
1905 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1906 break;
1907 case 4:
1908 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1909 break;
1910 default: abort();
1911 }
1912 *pdata = data;
1913 return res;
1914 }
1915
1916 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1917 uint64_t val, unsigned size,
1918 MemTxAttrs attrs)
1919 {
1920 MemTxResult res;
1921
1922 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1923 switch (size) {
1924 case 1:
1925 address_space_stb(&address_space_memory, addr, val, attrs, &res);
1926 break;
1927 case 2:
1928 address_space_stw(&address_space_memory, addr, val, attrs, &res);
1929 break;
1930 case 4:
1931 address_space_stl(&address_space_memory, addr, val, attrs, &res);
1932 break;
1933 default: abort();
1934 }
1935 return res;
1936 }
1937
1938 static const MemoryRegionOps watch_mem_ops = {
1939 .read_with_attrs = watch_mem_read,
1940 .write_with_attrs = watch_mem_write,
1941 .endianness = DEVICE_NATIVE_ENDIAN,
1942 };
1943
1944 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1945 unsigned len, MemTxAttrs attrs)
1946 {
1947 subpage_t *subpage = opaque;
1948 uint8_t buf[8];
1949 MemTxResult res;
1950
1951 #if defined(DEBUG_SUBPAGE)
1952 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1953 subpage, len, addr);
1954 #endif
1955 res = address_space_read(subpage->as, addr + subpage->base,
1956 attrs, buf, len);
1957 if (res) {
1958 return res;
1959 }
1960 switch (len) {
1961 case 1:
1962 *data = ldub_p(buf);
1963 return MEMTX_OK;
1964 case 2:
1965 *data = lduw_p(buf);
1966 return MEMTX_OK;
1967 case 4:
1968 *data = ldl_p(buf);
1969 return MEMTX_OK;
1970 case 8:
1971 *data = ldq_p(buf);
1972 return MEMTX_OK;
1973 default:
1974 abort();
1975 }
1976 }
1977
1978 static MemTxResult subpage_write(void *opaque, hwaddr addr,
1979 uint64_t value, unsigned len, MemTxAttrs attrs)
1980 {
1981 subpage_t *subpage = opaque;
1982 uint8_t buf[8];
1983
1984 #if defined(DEBUG_SUBPAGE)
1985 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1986 " value %"PRIx64"\n",
1987 __func__, subpage, len, addr, value);
1988 #endif
1989 switch (len) {
1990 case 1:
1991 stb_p(buf, value);
1992 break;
1993 case 2:
1994 stw_p(buf, value);
1995 break;
1996 case 4:
1997 stl_p(buf, value);
1998 break;
1999 case 8:
2000 stq_p(buf, value);
2001 break;
2002 default:
2003 abort();
2004 }
2005 return address_space_write(subpage->as, addr + subpage->base,
2006 attrs, buf, len);
2007 }
2008
2009 static bool subpage_accepts(void *opaque, hwaddr addr,
2010 unsigned len, bool is_write)
2011 {
2012 subpage_t *subpage = opaque;
2013 #if defined(DEBUG_SUBPAGE)
2014 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2015 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2016 #endif
2017
2018 return address_space_access_valid(subpage->as, addr + subpage->base,
2019 len, is_write);
2020 }
2021
2022 static const MemoryRegionOps subpage_ops = {
2023 .read_with_attrs = subpage_read,
2024 .write_with_attrs = subpage_write,
2025 .impl.min_access_size = 1,
2026 .impl.max_access_size = 8,
2027 .valid.min_access_size = 1,
2028 .valid.max_access_size = 8,
2029 .valid.accepts = subpage_accepts,
2030 .endianness = DEVICE_NATIVE_ENDIAN,
2031 };
2032
2033 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2034 uint16_t section)
2035 {
2036 int idx, eidx;
2037
2038 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2039 return -1;
2040 idx = SUBPAGE_IDX(start);
2041 eidx = SUBPAGE_IDX(end);
2042 #if defined(DEBUG_SUBPAGE)
2043 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2044 __func__, mmio, start, end, idx, eidx, section);
2045 #endif
2046 for (; idx <= eidx; idx++) {
2047 mmio->sub_section[idx] = section;
2048 }
2049
2050 return 0;
2051 }
2052
2053 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2054 {
2055 subpage_t *mmio;
2056
2057 mmio = g_malloc0(sizeof(subpage_t));
2058
2059 mmio->as = as;
2060 mmio->base = base;
2061 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2062 NULL, TARGET_PAGE_SIZE);
2063 mmio->iomem.subpage = true;
2064 #if defined(DEBUG_SUBPAGE)
2065 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2066 mmio, base, TARGET_PAGE_SIZE);
2067 #endif
2068 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2069
2070 return mmio;
2071 }
2072
2073 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2074 MemoryRegion *mr)
2075 {
2076 assert(as);
2077 MemoryRegionSection section = {
2078 .address_space = as,
2079 .mr = mr,
2080 .offset_within_address_space = 0,
2081 .offset_within_region = 0,
2082 .size = int128_2_64(),
2083 };
2084
2085 return phys_section_add(map, &section);
2086 }
2087
2088 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2089 {
2090 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2091 MemoryRegionSection *sections = d->map.sections;
2092
2093 return sections[index & ~TARGET_PAGE_MASK].mr;
2094 }
2095
2096 static void io_mem_init(void)
2097 {
2098 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2099 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2100 NULL, UINT64_MAX);
2101 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2102 NULL, UINT64_MAX);
2103 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2104 NULL, UINT64_MAX);
2105 }
2106
2107 static void mem_begin(MemoryListener *listener)
2108 {
2109 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2110 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2111 uint16_t n;
2112
2113 n = dummy_section(&d->map, as, &io_mem_unassigned);
2114 assert(n == PHYS_SECTION_UNASSIGNED);
2115 n = dummy_section(&d->map, as, &io_mem_notdirty);
2116 assert(n == PHYS_SECTION_NOTDIRTY);
2117 n = dummy_section(&d->map, as, &io_mem_rom);
2118 assert(n == PHYS_SECTION_ROM);
2119 n = dummy_section(&d->map, as, &io_mem_watch);
2120 assert(n == PHYS_SECTION_WATCH);
2121
2122 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2123 d->as = as;
2124 as->next_dispatch = d;
2125 }
2126
2127 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2128 {
2129 phys_sections_free(&d->map);
2130 g_free(d);
2131 }
2132
2133 static void mem_commit(MemoryListener *listener)
2134 {
2135 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2136 AddressSpaceDispatch *cur = as->dispatch;
2137 AddressSpaceDispatch *next = as->next_dispatch;
2138
2139 phys_page_compact_all(next, next->map.nodes_nb);
2140
2141 atomic_rcu_set(&as->dispatch, next);
2142 if (cur) {
2143 call_rcu(cur, address_space_dispatch_free, rcu);
2144 }
2145 }
2146
2147 static void tcg_commit(MemoryListener *listener)
2148 {
2149 CPUState *cpu;
2150
2151 /* since each CPU stores ram addresses in its TLB cache, we must
2152 reset the modified entries */
2153 /* XXX: slow ! */
2154 CPU_FOREACH(cpu) {
2155 /* FIXME: Disentangle the cpu.h circular files deps so we can
2156 directly get the right CPU from listener. */
2157 if (cpu->tcg_as_listener != listener) {
2158 continue;
2159 }
2160 cpu_reload_memory_map(cpu);
2161 }
2162 }
2163
2164 static void core_log_global_start(MemoryListener *listener)
2165 {
2166 cpu_physical_memory_set_dirty_tracking(true);
2167 }
2168
2169 static void core_log_global_stop(MemoryListener *listener)
2170 {
2171 cpu_physical_memory_set_dirty_tracking(false);
2172 }
2173
2174 static MemoryListener core_memory_listener = {
2175 .log_global_start = core_log_global_start,
2176 .log_global_stop = core_log_global_stop,
2177 .priority = 1,
2178 };
2179
2180 void address_space_init_dispatch(AddressSpace *as)
2181 {
2182 as->dispatch = NULL;
2183 as->dispatch_listener = (MemoryListener) {
2184 .begin = mem_begin,
2185 .commit = mem_commit,
2186 .region_add = mem_add,
2187 .region_nop = mem_add,
2188 .priority = 0,
2189 };
2190 memory_listener_register(&as->dispatch_listener, as);
2191 }
2192
2193 void address_space_unregister(AddressSpace *as)
2194 {
2195 memory_listener_unregister(&as->dispatch_listener);
2196 }
2197
2198 void address_space_destroy_dispatch(AddressSpace *as)
2199 {
2200 AddressSpaceDispatch *d = as->dispatch;
2201
2202 atomic_rcu_set(&as->dispatch, NULL);
2203 if (d) {
2204 call_rcu(d, address_space_dispatch_free, rcu);
2205 }
2206 }
2207
2208 static void memory_map_init(void)
2209 {
2210 system_memory = g_malloc(sizeof(*system_memory));
2211
2212 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2213 address_space_init(&address_space_memory, system_memory, "memory");
2214
2215 system_io = g_malloc(sizeof(*system_io));
2216 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2217 65536);
2218 address_space_init(&address_space_io, system_io, "I/O");
2219
2220 memory_listener_register(&core_memory_listener, &address_space_memory);
2221 }
2222
2223 MemoryRegion *get_system_memory(void)
2224 {
2225 return system_memory;
2226 }
2227
2228 MemoryRegion *get_system_io(void)
2229 {
2230 return system_io;
2231 }
2232
2233 #endif /* !defined(CONFIG_USER_ONLY) */
2234
2235 /* physical memory access (slow version, mainly for debug) */
2236 #if defined(CONFIG_USER_ONLY)
2237 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2238 uint8_t *buf, int len, int is_write)
2239 {
2240 int l, flags;
2241 target_ulong page;
2242 void * p;
2243
2244 while (len > 0) {
2245 page = addr & TARGET_PAGE_MASK;
2246 l = (page + TARGET_PAGE_SIZE) - addr;
2247 if (l > len)
2248 l = len;
2249 flags = page_get_flags(page);
2250 if (!(flags & PAGE_VALID))
2251 return -1;
2252 if (is_write) {
2253 if (!(flags & PAGE_WRITE))
2254 return -1;
2255 /* XXX: this code should not depend on lock_user */
2256 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2257 return -1;
2258 memcpy(p, buf, l);
2259 unlock_user(p, addr, l);
2260 } else {
2261 if (!(flags & PAGE_READ))
2262 return -1;
2263 /* XXX: this code should not depend on lock_user */
2264 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2265 return -1;
2266 memcpy(buf, p, l);
2267 unlock_user(p, addr, 0);
2268 }
2269 len -= l;
2270 buf += l;
2271 addr += l;
2272 }
2273 return 0;
2274 }
2275
2276 #else
2277
2278 static void invalidate_and_set_dirty(hwaddr addr,
2279 hwaddr length)
2280 {
2281 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2282 tb_invalidate_phys_range(addr, addr + length, 0);
2283 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2284 }
2285 xen_modified_memory(addr, length);
2286 }
2287
2288 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2289 {
2290 unsigned access_size_max = mr->ops->valid.max_access_size;
2291
2292 /* Regions are assumed to support 1-4 byte accesses unless
2293 otherwise specified. */
2294 if (access_size_max == 0) {
2295 access_size_max = 4;
2296 }
2297
2298 /* Bound the maximum access by the alignment of the address. */
2299 if (!mr->ops->impl.unaligned) {
2300 unsigned align_size_max = addr & -addr;
2301 if (align_size_max != 0 && align_size_max < access_size_max) {
2302 access_size_max = align_size_max;
2303 }
2304 }
2305
2306 /* Don't attempt accesses larger than the maximum. */
2307 if (l > access_size_max) {
2308 l = access_size_max;
2309 }
2310 if (l & (l - 1)) {
2311 l = 1 << (qemu_fls(l) - 1);
2312 }
2313
2314 return l;
2315 }
2316
2317 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2318 uint8_t *buf, int len, bool is_write)
2319 {
2320 hwaddr l;
2321 uint8_t *ptr;
2322 uint64_t val;
2323 hwaddr addr1;
2324 MemoryRegion *mr;
2325 MemTxResult result = MEMTX_OK;
2326
2327 rcu_read_lock();
2328 while (len > 0) {
2329 l = len;
2330 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2331
2332 if (is_write) {
2333 if (!memory_access_is_direct(mr, is_write)) {
2334 l = memory_access_size(mr, l, addr1);
2335 /* XXX: could force current_cpu to NULL to avoid
2336 potential bugs */
2337 switch (l) {
2338 case 8:
2339 /* 64 bit write access */
2340 val = ldq_p(buf);
2341 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2342 attrs);
2343 break;
2344 case 4:
2345 /* 32 bit write access */
2346 val = ldl_p(buf);
2347 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2348 attrs);
2349 break;
2350 case 2:
2351 /* 16 bit write access */
2352 val = lduw_p(buf);
2353 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2354 attrs);
2355 break;
2356 case 1:
2357 /* 8 bit write access */
2358 val = ldub_p(buf);
2359 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2360 attrs);
2361 break;
2362 default:
2363 abort();
2364 }
2365 } else {
2366 addr1 += memory_region_get_ram_addr(mr);
2367 /* RAM case */
2368 ptr = qemu_get_ram_ptr(addr1);
2369 memcpy(ptr, buf, l);
2370 invalidate_and_set_dirty(addr1, l);
2371 }
2372 } else {
2373 if (!memory_access_is_direct(mr, is_write)) {
2374 /* I/O case */
2375 l = memory_access_size(mr, l, addr1);
2376 switch (l) {
2377 case 8:
2378 /* 64 bit read access */
2379 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2380 attrs);
2381 stq_p(buf, val);
2382 break;
2383 case 4:
2384 /* 32 bit read access */
2385 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2386 attrs);
2387 stl_p(buf, val);
2388 break;
2389 case 2:
2390 /* 16 bit read access */
2391 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2392 attrs);
2393 stw_p(buf, val);
2394 break;
2395 case 1:
2396 /* 8 bit read access */
2397 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2398 attrs);
2399 stb_p(buf, val);
2400 break;
2401 default:
2402 abort();
2403 }
2404 } else {
2405 /* RAM case */
2406 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2407 memcpy(buf, ptr, l);
2408 }
2409 }
2410 len -= l;
2411 buf += l;
2412 addr += l;
2413 }
2414 rcu_read_unlock();
2415
2416 return result;
2417 }
2418
2419 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2420 const uint8_t *buf, int len)
2421 {
2422 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2423 }
2424
2425 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2426 uint8_t *buf, int len)
2427 {
2428 return address_space_rw(as, addr, attrs, buf, len, false);
2429 }
2430
2431
2432 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2433 int len, int is_write)
2434 {
2435 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2436 buf, len, is_write);
2437 }
2438
2439 enum write_rom_type {
2440 WRITE_DATA,
2441 FLUSH_CACHE,
2442 };
2443
2444 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2445 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2446 {
2447 hwaddr l;
2448 uint8_t *ptr;
2449 hwaddr addr1;
2450 MemoryRegion *mr;
2451
2452 rcu_read_lock();
2453 while (len > 0) {
2454 l = len;
2455 mr = address_space_translate(as, addr, &addr1, &l, true);
2456
2457 if (!(memory_region_is_ram(mr) ||
2458 memory_region_is_romd(mr))) {
2459 /* do nothing */
2460 } else {
2461 addr1 += memory_region_get_ram_addr(mr);
2462 /* ROM/RAM case */
2463 ptr = qemu_get_ram_ptr(addr1);
2464 switch (type) {
2465 case WRITE_DATA:
2466 memcpy(ptr, buf, l);
2467 invalidate_and_set_dirty(addr1, l);
2468 break;
2469 case FLUSH_CACHE:
2470 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2471 break;
2472 }
2473 }
2474 len -= l;
2475 buf += l;
2476 addr += l;
2477 }
2478 rcu_read_unlock();
2479 }
2480
2481 /* used for ROM loading : can write in RAM and ROM */
2482 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2483 const uint8_t *buf, int len)
2484 {
2485 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2486 }
2487
2488 void cpu_flush_icache_range(hwaddr start, int len)
2489 {
2490 /*
2491 * This function should do the same thing as an icache flush that was
2492 * triggered from within the guest. For TCG we are always cache coherent,
2493 * so there is no need to flush anything. For KVM / Xen we need to flush
2494 * the host's instruction cache at least.
2495 */
2496 if (tcg_enabled()) {
2497 return;
2498 }
2499
2500 cpu_physical_memory_write_rom_internal(&address_space_memory,
2501 start, NULL, len, FLUSH_CACHE);
2502 }
2503
2504 typedef struct {
2505 MemoryRegion *mr;
2506 void *buffer;
2507 hwaddr addr;
2508 hwaddr len;
2509 bool in_use;
2510 } BounceBuffer;
2511
2512 static BounceBuffer bounce;
2513
2514 typedef struct MapClient {
2515 QEMUBH *bh;
2516 QLIST_ENTRY(MapClient) link;
2517 } MapClient;
2518
2519 QemuMutex map_client_list_lock;
2520 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2521 = QLIST_HEAD_INITIALIZER(map_client_list);
2522
2523 static void cpu_unregister_map_client_do(MapClient *client)
2524 {
2525 QLIST_REMOVE(client, link);
2526 g_free(client);
2527 }
2528
2529 static void cpu_notify_map_clients_locked(void)
2530 {
2531 MapClient *client;
2532
2533 while (!QLIST_EMPTY(&map_client_list)) {
2534 client = QLIST_FIRST(&map_client_list);
2535 qemu_bh_schedule(client->bh);
2536 cpu_unregister_map_client_do(client);
2537 }
2538 }
2539
2540 void cpu_register_map_client(QEMUBH *bh)
2541 {
2542 MapClient *client = g_malloc(sizeof(*client));
2543
2544 qemu_mutex_lock(&map_client_list_lock);
2545 client->bh = bh;
2546 QLIST_INSERT_HEAD(&map_client_list, client, link);
2547 if (!atomic_read(&bounce.in_use)) {
2548 cpu_notify_map_clients_locked();
2549 }
2550 qemu_mutex_unlock(&map_client_list_lock);
2551 }
2552
2553 void cpu_exec_init_all(void)
2554 {
2555 qemu_mutex_init(&ram_list.mutex);
2556 memory_map_init();
2557 io_mem_init();
2558 qemu_mutex_init(&map_client_list_lock);
2559 }
2560
2561 void cpu_unregister_map_client(QEMUBH *bh)
2562 {
2563 MapClient *client;
2564
2565 qemu_mutex_lock(&map_client_list_lock);
2566 QLIST_FOREACH(client, &map_client_list, link) {
2567 if (client->bh == bh) {
2568 cpu_unregister_map_client_do(client);
2569 break;
2570 }
2571 }
2572 qemu_mutex_unlock(&map_client_list_lock);
2573 }
2574
2575 static void cpu_notify_map_clients(void)
2576 {
2577 qemu_mutex_lock(&map_client_list_lock);
2578 cpu_notify_map_clients_locked();
2579 qemu_mutex_unlock(&map_client_list_lock);
2580 }
2581
2582 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2583 {
2584 MemoryRegion *mr;
2585 hwaddr l, xlat;
2586
2587 rcu_read_lock();
2588 while (len > 0) {
2589 l = len;
2590 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2591 if (!memory_access_is_direct(mr, is_write)) {
2592 l = memory_access_size(mr, l, addr);
2593 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2594 return false;
2595 }
2596 }
2597
2598 len -= l;
2599 addr += l;
2600 }
2601 rcu_read_unlock();
2602 return true;
2603 }
2604
2605 /* Map a physical memory region into a host virtual address.
2606 * May map a subset of the requested range, given by and returned in *plen.
2607 * May return NULL if resources needed to perform the mapping are exhausted.
2608 * Use only for reads OR writes - not for read-modify-write operations.
2609 * Use cpu_register_map_client() to know when retrying the map operation is
2610 * likely to succeed.
2611 */
2612 void *address_space_map(AddressSpace *as,
2613 hwaddr addr,
2614 hwaddr *plen,
2615 bool is_write)
2616 {
2617 hwaddr len = *plen;
2618 hwaddr done = 0;
2619 hwaddr l, xlat, base;
2620 MemoryRegion *mr, *this_mr;
2621 ram_addr_t raddr;
2622
2623 if (len == 0) {
2624 return NULL;
2625 }
2626
2627 l = len;
2628 rcu_read_lock();
2629 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2630
2631 if (!memory_access_is_direct(mr, is_write)) {
2632 if (atomic_xchg(&bounce.in_use, true)) {
2633 rcu_read_unlock();
2634 return NULL;
2635 }
2636 /* Avoid unbounded allocations */
2637 l = MIN(l, TARGET_PAGE_SIZE);
2638 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2639 bounce.addr = addr;
2640 bounce.len = l;
2641
2642 memory_region_ref(mr);
2643 bounce.mr = mr;
2644 if (!is_write) {
2645 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2646 bounce.buffer, l);
2647 }
2648
2649 rcu_read_unlock();
2650 *plen = l;
2651 return bounce.buffer;
2652 }
2653
2654 base = xlat;
2655 raddr = memory_region_get_ram_addr(mr);
2656
2657 for (;;) {
2658 len -= l;
2659 addr += l;
2660 done += l;
2661 if (len == 0) {
2662 break;
2663 }
2664
2665 l = len;
2666 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2667 if (this_mr != mr || xlat != base + done) {
2668 break;
2669 }
2670 }
2671
2672 memory_region_ref(mr);
2673 rcu_read_unlock();
2674 *plen = done;
2675 return qemu_ram_ptr_length(raddr + base, plen);
2676 }
2677
2678 /* Unmaps a memory region previously mapped by address_space_map().
2679 * Will also mark the memory as dirty if is_write == 1. access_len gives
2680 * the amount of memory that was actually read or written by the caller.
2681 */
2682 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2683 int is_write, hwaddr access_len)
2684 {
2685 if (buffer != bounce.buffer) {
2686 MemoryRegion *mr;
2687 ram_addr_t addr1;
2688
2689 mr = qemu_ram_addr_from_host(buffer, &addr1);
2690 assert(mr != NULL);
2691 if (is_write) {
2692 invalidate_and_set_dirty(addr1, access_len);
2693 }
2694 if (xen_enabled()) {
2695 xen_invalidate_map_cache_entry(buffer);
2696 }
2697 memory_region_unref(mr);
2698 return;
2699 }
2700 if (is_write) {
2701 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2702 bounce.buffer, access_len);
2703 }
2704 qemu_vfree(bounce.buffer);
2705 bounce.buffer = NULL;
2706 memory_region_unref(bounce.mr);
2707 atomic_mb_set(&bounce.in_use, false);
2708 cpu_notify_map_clients();
2709 }
2710
2711 void *cpu_physical_memory_map(hwaddr addr,
2712 hwaddr *plen,
2713 int is_write)
2714 {
2715 return address_space_map(&address_space_memory, addr, plen, is_write);
2716 }
2717
2718 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2719 int is_write, hwaddr access_len)
2720 {
2721 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2722 }
2723
2724 /* warning: addr must be aligned */
2725 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2726 MemTxAttrs attrs,
2727 MemTxResult *result,
2728 enum device_endian endian)
2729 {
2730 uint8_t *ptr;
2731 uint64_t val;
2732 MemoryRegion *mr;
2733 hwaddr l = 4;
2734 hwaddr addr1;
2735 MemTxResult r;
2736
2737 rcu_read_lock();
2738 mr = address_space_translate(as, addr, &addr1, &l, false);
2739 if (l < 4 || !memory_access_is_direct(mr, false)) {
2740 /* I/O case */
2741 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2742 #if defined(TARGET_WORDS_BIGENDIAN)
2743 if (endian == DEVICE_LITTLE_ENDIAN) {
2744 val = bswap32(val);
2745 }
2746 #else
2747 if (endian == DEVICE_BIG_ENDIAN) {
2748 val = bswap32(val);
2749 }
2750 #endif
2751 } else {
2752 /* RAM case */
2753 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2754 & TARGET_PAGE_MASK)
2755 + addr1);
2756 switch (endian) {
2757 case DEVICE_LITTLE_ENDIAN:
2758 val = ldl_le_p(ptr);
2759 break;
2760 case DEVICE_BIG_ENDIAN:
2761 val = ldl_be_p(ptr);
2762 break;
2763 default:
2764 val = ldl_p(ptr);
2765 break;
2766 }
2767 r = MEMTX_OK;
2768 }
2769 if (result) {
2770 *result = r;
2771 }
2772 rcu_read_unlock();
2773 return val;
2774 }
2775
2776 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2777 MemTxAttrs attrs, MemTxResult *result)
2778 {
2779 return address_space_ldl_internal(as, addr, attrs, result,
2780 DEVICE_NATIVE_ENDIAN);
2781 }
2782
2783 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2784 MemTxAttrs attrs, MemTxResult *result)
2785 {
2786 return address_space_ldl_internal(as, addr, attrs, result,
2787 DEVICE_LITTLE_ENDIAN);
2788 }
2789
2790 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2791 MemTxAttrs attrs, MemTxResult *result)
2792 {
2793 return address_space_ldl_internal(as, addr, attrs, result,
2794 DEVICE_BIG_ENDIAN);
2795 }
2796
2797 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2798 {
2799 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2800 }
2801
2802 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2803 {
2804 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2805 }
2806
2807 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2808 {
2809 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2810 }
2811
2812 /* warning: addr must be aligned */
2813 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2814 MemTxAttrs attrs,
2815 MemTxResult *result,
2816 enum device_endian endian)
2817 {
2818 uint8_t *ptr;
2819 uint64_t val;
2820 MemoryRegion *mr;
2821 hwaddr l = 8;
2822 hwaddr addr1;
2823 MemTxResult r;
2824
2825 rcu_read_lock();
2826 mr = address_space_translate(as, addr, &addr1, &l,
2827 false);
2828 if (l < 8 || !memory_access_is_direct(mr, false)) {
2829 /* I/O case */
2830 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2831 #if defined(TARGET_WORDS_BIGENDIAN)
2832 if (endian == DEVICE_LITTLE_ENDIAN) {
2833 val = bswap64(val);
2834 }
2835 #else
2836 if (endian == DEVICE_BIG_ENDIAN) {
2837 val = bswap64(val);
2838 }
2839 #endif
2840 } else {
2841 /* RAM case */
2842 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2843 & TARGET_PAGE_MASK)
2844 + addr1);
2845 switch (endian) {
2846 case DEVICE_LITTLE_ENDIAN:
2847 val = ldq_le_p(ptr);
2848 break;
2849 case DEVICE_BIG_ENDIAN:
2850 val = ldq_be_p(ptr);
2851 break;
2852 default:
2853 val = ldq_p(ptr);
2854 break;
2855 }
2856 r = MEMTX_OK;
2857 }
2858 if (result) {
2859 *result = r;
2860 }
2861 rcu_read_unlock();
2862 return val;
2863 }
2864
2865 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2866 MemTxAttrs attrs, MemTxResult *result)
2867 {
2868 return address_space_ldq_internal(as, addr, attrs, result,
2869 DEVICE_NATIVE_ENDIAN);
2870 }
2871
2872 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2873 MemTxAttrs attrs, MemTxResult *result)
2874 {
2875 return address_space_ldq_internal(as, addr, attrs, result,
2876 DEVICE_LITTLE_ENDIAN);
2877 }
2878
2879 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2880 MemTxAttrs attrs, MemTxResult *result)
2881 {
2882 return address_space_ldq_internal(as, addr, attrs, result,
2883 DEVICE_BIG_ENDIAN);
2884 }
2885
2886 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2887 {
2888 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2889 }
2890
2891 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2892 {
2893 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2894 }
2895
2896 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2897 {
2898 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2899 }
2900
2901 /* XXX: optimize */
2902 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2903 MemTxAttrs attrs, MemTxResult *result)
2904 {
2905 uint8_t val;
2906 MemTxResult r;
2907
2908 r = address_space_rw(as, addr, attrs, &val, 1, 0);
2909 if (result) {
2910 *result = r;
2911 }
2912 return val;
2913 }
2914
2915 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2916 {
2917 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2918 }
2919
2920 /* warning: addr must be aligned */
2921 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2922 hwaddr addr,
2923 MemTxAttrs attrs,
2924 MemTxResult *result,
2925 enum device_endian endian)
2926 {
2927 uint8_t *ptr;
2928 uint64_t val;
2929 MemoryRegion *mr;
2930 hwaddr l = 2;
2931 hwaddr addr1;
2932 MemTxResult r;
2933
2934 rcu_read_lock();
2935 mr = address_space_translate(as, addr, &addr1, &l,
2936 false);
2937 if (l < 2 || !memory_access_is_direct(mr, false)) {
2938 /* I/O case */
2939 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
2940 #if defined(TARGET_WORDS_BIGENDIAN)
2941 if (endian == DEVICE_LITTLE_ENDIAN) {
2942 val = bswap16(val);
2943 }
2944 #else
2945 if (endian == DEVICE_BIG_ENDIAN) {
2946 val = bswap16(val);
2947 }
2948 #endif
2949 } else {
2950 /* RAM case */
2951 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2952 & TARGET_PAGE_MASK)
2953 + addr1);
2954 switch (endian) {
2955 case DEVICE_LITTLE_ENDIAN:
2956 val = lduw_le_p(ptr);
2957 break;
2958 case DEVICE_BIG_ENDIAN:
2959 val = lduw_be_p(ptr);
2960 break;
2961 default:
2962 val = lduw_p(ptr);
2963 break;
2964 }
2965 r = MEMTX_OK;
2966 }
2967 if (result) {
2968 *result = r;
2969 }
2970 rcu_read_unlock();
2971 return val;
2972 }
2973
2974 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
2975 MemTxAttrs attrs, MemTxResult *result)
2976 {
2977 return address_space_lduw_internal(as, addr, attrs, result,
2978 DEVICE_NATIVE_ENDIAN);
2979 }
2980
2981 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
2982 MemTxAttrs attrs, MemTxResult *result)
2983 {
2984 return address_space_lduw_internal(as, addr, attrs, result,
2985 DEVICE_LITTLE_ENDIAN);
2986 }
2987
2988 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
2989 MemTxAttrs attrs, MemTxResult *result)
2990 {
2991 return address_space_lduw_internal(as, addr, attrs, result,
2992 DEVICE_BIG_ENDIAN);
2993 }
2994
2995 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2996 {
2997 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2998 }
2999
3000 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3001 {
3002 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3003 }
3004
3005 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3006 {
3007 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3008 }
3009
3010 /* warning: addr must be aligned. The ram page is not masked as dirty
3011 and the code inside is not invalidated. It is useful if the dirty
3012 bits are used to track modified PTEs */
3013 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3014 MemTxAttrs attrs, MemTxResult *result)
3015 {
3016 uint8_t *ptr;
3017 MemoryRegion *mr;
3018 hwaddr l = 4;
3019 hwaddr addr1;
3020 MemTxResult r;
3021
3022 rcu_read_lock();
3023 mr = address_space_translate(as, addr, &addr1, &l,
3024 true);
3025 if (l < 4 || !memory_access_is_direct(mr, true)) {
3026 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3027 } else {
3028 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3029 ptr = qemu_get_ram_ptr(addr1);
3030 stl_p(ptr, val);
3031
3032 if (unlikely(in_migration)) {
3033 if (cpu_physical_memory_is_clean(addr1)) {
3034 /* invalidate code */
3035 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3036 /* set dirty bit */
3037 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
3038 }
3039 }
3040 r = MEMTX_OK;
3041 }
3042 if (result) {
3043 *result = r;
3044 }
3045 rcu_read_unlock();
3046 }
3047
3048 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3049 {
3050 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3051 }
3052
3053 /* warning: addr must be aligned */
3054 static inline void address_space_stl_internal(AddressSpace *as,
3055 hwaddr addr, uint32_t val,
3056 MemTxAttrs attrs,
3057 MemTxResult *result,
3058 enum device_endian endian)
3059 {
3060 uint8_t *ptr;
3061 MemoryRegion *mr;
3062 hwaddr l = 4;
3063 hwaddr addr1;
3064 MemTxResult r;
3065
3066 rcu_read_lock();
3067 mr = address_space_translate(as, addr, &addr1, &l,
3068 true);
3069 if (l < 4 || !memory_access_is_direct(mr, true)) {
3070 #if defined(TARGET_WORDS_BIGENDIAN)
3071 if (endian == DEVICE_LITTLE_ENDIAN) {
3072 val = bswap32(val);
3073 }
3074 #else
3075 if (endian == DEVICE_BIG_ENDIAN) {
3076 val = bswap32(val);
3077 }
3078 #endif
3079 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3080 } else {
3081 /* RAM case */
3082 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3083 ptr = qemu_get_ram_ptr(addr1);
3084 switch (endian) {
3085 case DEVICE_LITTLE_ENDIAN:
3086 stl_le_p(ptr, val);
3087 break;
3088 case DEVICE_BIG_ENDIAN:
3089 stl_be_p(ptr, val);
3090 break;
3091 default:
3092 stl_p(ptr, val);
3093 break;
3094 }
3095 invalidate_and_set_dirty(addr1, 4);
3096 r = MEMTX_OK;
3097 }
3098 if (result) {
3099 *result = r;
3100 }
3101 rcu_read_unlock();
3102 }
3103
3104 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3105 MemTxAttrs attrs, MemTxResult *result)
3106 {
3107 address_space_stl_internal(as, addr, val, attrs, result,
3108 DEVICE_NATIVE_ENDIAN);
3109 }
3110
3111 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3112 MemTxAttrs attrs, MemTxResult *result)
3113 {
3114 address_space_stl_internal(as, addr, val, attrs, result,
3115 DEVICE_LITTLE_ENDIAN);
3116 }
3117
3118 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3119 MemTxAttrs attrs, MemTxResult *result)
3120 {
3121 address_space_stl_internal(as, addr, val, attrs, result,
3122 DEVICE_BIG_ENDIAN);
3123 }
3124
3125 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3126 {
3127 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3128 }
3129
3130 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3131 {
3132 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3133 }
3134
3135 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3136 {
3137 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3138 }
3139
3140 /* XXX: optimize */
3141 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3142 MemTxAttrs attrs, MemTxResult *result)
3143 {
3144 uint8_t v = val;
3145 MemTxResult r;
3146
3147 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3148 if (result) {
3149 *result = r;
3150 }
3151 }
3152
3153 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3154 {
3155 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3156 }
3157
3158 /* warning: addr must be aligned */
3159 static inline void address_space_stw_internal(AddressSpace *as,
3160 hwaddr addr, uint32_t val,
3161 MemTxAttrs attrs,
3162 MemTxResult *result,
3163 enum device_endian endian)
3164 {
3165 uint8_t *ptr;
3166 MemoryRegion *mr;
3167 hwaddr l = 2;
3168 hwaddr addr1;
3169 MemTxResult r;
3170
3171 rcu_read_lock();
3172 mr = address_space_translate(as, addr, &addr1, &l, true);
3173 if (l < 2 || !memory_access_is_direct(mr, true)) {
3174 #if defined(TARGET_WORDS_BIGENDIAN)
3175 if (endian == DEVICE_LITTLE_ENDIAN) {
3176 val = bswap16(val);
3177 }
3178 #else
3179 if (endian == DEVICE_BIG_ENDIAN) {
3180 val = bswap16(val);
3181 }
3182 #endif
3183 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3184 } else {
3185 /* RAM case */
3186 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3187 ptr = qemu_get_ram_ptr(addr1);
3188 switch (endian) {
3189 case DEVICE_LITTLE_ENDIAN:
3190 stw_le_p(ptr, val);
3191 break;
3192 case DEVICE_BIG_ENDIAN:
3193 stw_be_p(ptr, val);
3194 break;
3195 default:
3196 stw_p(ptr, val);
3197 break;
3198 }
3199 invalidate_and_set_dirty(addr1, 2);
3200 r = MEMTX_OK;
3201 }
3202 if (result) {
3203 *result = r;
3204 }
3205 rcu_read_unlock();
3206 }
3207
3208 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3209 MemTxAttrs attrs, MemTxResult *result)
3210 {
3211 address_space_stw_internal(as, addr, val, attrs, result,
3212 DEVICE_NATIVE_ENDIAN);
3213 }
3214
3215 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3216 MemTxAttrs attrs, MemTxResult *result)
3217 {
3218 address_space_stw_internal(as, addr, val, attrs, result,
3219 DEVICE_LITTLE_ENDIAN);
3220 }
3221
3222 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3223 MemTxAttrs attrs, MemTxResult *result)
3224 {
3225 address_space_stw_internal(as, addr, val, attrs, result,
3226 DEVICE_BIG_ENDIAN);
3227 }
3228
3229 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3230 {
3231 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3232 }
3233
3234 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3235 {
3236 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3237 }
3238
3239 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3240 {
3241 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3242 }
3243
3244 /* XXX: optimize */
3245 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3246 MemTxAttrs attrs, MemTxResult *result)
3247 {
3248 MemTxResult r;
3249 val = tswap64(val);
3250 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3251 if (result) {
3252 *result = r;
3253 }
3254 }
3255
3256 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3257 MemTxAttrs attrs, MemTxResult *result)
3258 {
3259 MemTxResult r;
3260 val = cpu_to_le64(val);
3261 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3262 if (result) {
3263 *result = r;
3264 }
3265 }
3266 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3267 MemTxAttrs attrs, MemTxResult *result)
3268 {
3269 MemTxResult r;
3270 val = cpu_to_be64(val);
3271 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3272 if (result) {
3273 *result = r;
3274 }
3275 }
3276
3277 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3278 {
3279 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3280 }
3281
3282 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3283 {
3284 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3285 }
3286
3287 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3288 {
3289 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3290 }
3291
3292 /* virtual memory access for debug (includes writing to ROM) */
3293 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3294 uint8_t *buf, int len, int is_write)
3295 {
3296 int l;
3297 hwaddr phys_addr;
3298 target_ulong page;
3299
3300 while (len > 0) {
3301 page = addr & TARGET_PAGE_MASK;
3302 phys_addr = cpu_get_phys_page_debug(cpu, page);
3303 /* if no physical page mapped, return an error */
3304 if (phys_addr == -1)
3305 return -1;
3306 l = (page + TARGET_PAGE_SIZE) - addr;
3307 if (l > len)
3308 l = len;
3309 phys_addr += (addr & ~TARGET_PAGE_MASK);
3310 if (is_write) {
3311 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3312 } else {
3313 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3314 buf, l, 0);
3315 }
3316 len -= l;
3317 buf += l;
3318 addr += l;
3319 }
3320 return 0;
3321 }
3322 #endif
3323
3324 /*
3325 * A helper function for the _utterly broken_ virtio device model to find out if
3326 * it's running on a big endian machine. Don't do this at home kids!
3327 */
3328 bool target_words_bigendian(void);
3329 bool target_words_bigendian(void)
3330 {
3331 #if defined(TARGET_WORDS_BIGENDIAN)
3332 return true;
3333 #else
3334 return false;
3335 #endif
3336 }
3337
3338 #ifndef CONFIG_USER_ONLY
3339 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3340 {
3341 MemoryRegion*mr;
3342 hwaddr l = 1;
3343 bool res;
3344
3345 rcu_read_lock();
3346 mr = address_space_translate(&address_space_memory,
3347 phys_addr, &phys_addr, &l, false);
3348
3349 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3350 rcu_read_unlock();
3351 return res;
3352 }
3353
3354 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3355 {
3356 RAMBlock *block;
3357
3358 rcu_read_lock();
3359 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3360 func(block->host, block->offset, block->used_length, opaque);
3361 }
3362 rcu_read_unlock();
3363 }
3364 #endif