]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec: skip MMIO regions correctly in cpu_physical_memory_write_rom_internal
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "exec/cputlb.h"
53 #include "translate-all.h"
54
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57
58 #include "qemu/range.h"
59
60 //#define DEBUG_SUBPAGE
61
62 #if !defined(CONFIG_USER_ONLY)
63 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
64 * are protected by the ramlist lock.
65 */
66 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
67
68 static MemoryRegion *system_memory;
69 static MemoryRegion *system_io;
70
71 AddressSpace address_space_io;
72 AddressSpace address_space_memory;
73
74 MemoryRegion io_mem_rom, io_mem_notdirty;
75 static MemoryRegion io_mem_unassigned;
76
77 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
78 #define RAM_PREALLOC (1 << 0)
79
80 /* RAM is mmap-ed with MAP_SHARED */
81 #define RAM_SHARED (1 << 1)
82
83 /* Only a portion of RAM (used_length) is actually used, and migrated.
84 * This used_length size can change across reboots.
85 */
86 #define RAM_RESIZEABLE (1 << 2)
87
88 #endif
89
90 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
91 /* current CPU in the current thread. It is only valid inside
92 cpu_exec() */
93 DEFINE_TLS(CPUState *, current_cpu);
94 /* 0 = Do not count executed instructions.
95 1 = Precise instruction counting.
96 2 = Adaptive rate instruction counting. */
97 int use_icount;
98
99 #if !defined(CONFIG_USER_ONLY)
100
101 typedef struct PhysPageEntry PhysPageEntry;
102
103 struct PhysPageEntry {
104 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
105 uint32_t skip : 6;
106 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
107 uint32_t ptr : 26;
108 };
109
110 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
111
112 /* Size of the L2 (and L3, etc) page tables. */
113 #define ADDR_SPACE_BITS 64
114
115 #define P_L2_BITS 9
116 #define P_L2_SIZE (1 << P_L2_BITS)
117
118 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
119
120 typedef PhysPageEntry Node[P_L2_SIZE];
121
122 typedef struct PhysPageMap {
123 struct rcu_head rcu;
124
125 unsigned sections_nb;
126 unsigned sections_nb_alloc;
127 unsigned nodes_nb;
128 unsigned nodes_nb_alloc;
129 Node *nodes;
130 MemoryRegionSection *sections;
131 } PhysPageMap;
132
133 struct AddressSpaceDispatch {
134 struct rcu_head rcu;
135
136 /* This is a multi-level map on the physical address space.
137 * The bottom level has pointers to MemoryRegionSections.
138 */
139 PhysPageEntry phys_map;
140 PhysPageMap map;
141 AddressSpace *as;
142 };
143
144 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
145 typedef struct subpage_t {
146 MemoryRegion iomem;
147 AddressSpace *as;
148 hwaddr base;
149 uint16_t sub_section[TARGET_PAGE_SIZE];
150 } subpage_t;
151
152 #define PHYS_SECTION_UNASSIGNED 0
153 #define PHYS_SECTION_NOTDIRTY 1
154 #define PHYS_SECTION_ROM 2
155 #define PHYS_SECTION_WATCH 3
156
157 static void io_mem_init(void);
158 static void memory_map_init(void);
159 static void tcg_commit(MemoryListener *listener);
160
161 static MemoryRegion io_mem_watch;
162 #endif
163
164 #if !defined(CONFIG_USER_ONLY)
165
166 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
167 {
168 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
169 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
171 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
172 }
173 }
174
175 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
176 {
177 unsigned i;
178 uint32_t ret;
179 PhysPageEntry e;
180 PhysPageEntry *p;
181
182 ret = map->nodes_nb++;
183 p = map->nodes[ret];
184 assert(ret != PHYS_MAP_NODE_NIL);
185 assert(ret != map->nodes_nb_alloc);
186
187 e.skip = leaf ? 0 : 1;
188 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
189 for (i = 0; i < P_L2_SIZE; ++i) {
190 memcpy(&p[i], &e, sizeof(e));
191 }
192 return ret;
193 }
194
195 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
196 hwaddr *index, hwaddr *nb, uint16_t leaf,
197 int level)
198 {
199 PhysPageEntry *p;
200 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
201
202 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
203 lp->ptr = phys_map_node_alloc(map, level == 0);
204 }
205 p = map->nodes[lp->ptr];
206 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
207
208 while (*nb && lp < &p[P_L2_SIZE]) {
209 if ((*index & (step - 1)) == 0 && *nb >= step) {
210 lp->skip = 0;
211 lp->ptr = leaf;
212 *index += step;
213 *nb -= step;
214 } else {
215 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
216 }
217 ++lp;
218 }
219 }
220
221 static void phys_page_set(AddressSpaceDispatch *d,
222 hwaddr index, hwaddr nb,
223 uint16_t leaf)
224 {
225 /* Wildly overreserve - it doesn't matter much. */
226 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
227
228 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
229 }
230
231 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
232 * and update our entry so we can skip it and go directly to the destination.
233 */
234 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
235 {
236 unsigned valid_ptr = P_L2_SIZE;
237 int valid = 0;
238 PhysPageEntry *p;
239 int i;
240
241 if (lp->ptr == PHYS_MAP_NODE_NIL) {
242 return;
243 }
244
245 p = nodes[lp->ptr];
246 for (i = 0; i < P_L2_SIZE; i++) {
247 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
248 continue;
249 }
250
251 valid_ptr = i;
252 valid++;
253 if (p[i].skip) {
254 phys_page_compact(&p[i], nodes, compacted);
255 }
256 }
257
258 /* We can only compress if there's only one child. */
259 if (valid != 1) {
260 return;
261 }
262
263 assert(valid_ptr < P_L2_SIZE);
264
265 /* Don't compress if it won't fit in the # of bits we have. */
266 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
267 return;
268 }
269
270 lp->ptr = p[valid_ptr].ptr;
271 if (!p[valid_ptr].skip) {
272 /* If our only child is a leaf, make this a leaf. */
273 /* By design, we should have made this node a leaf to begin with so we
274 * should never reach here.
275 * But since it's so simple to handle this, let's do it just in case we
276 * change this rule.
277 */
278 lp->skip = 0;
279 } else {
280 lp->skip += p[valid_ptr].skip;
281 }
282 }
283
284 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
285 {
286 DECLARE_BITMAP(compacted, nodes_nb);
287
288 if (d->phys_map.skip) {
289 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
290 }
291 }
292
293 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
294 Node *nodes, MemoryRegionSection *sections)
295 {
296 PhysPageEntry *p;
297 hwaddr index = addr >> TARGET_PAGE_BITS;
298 int i;
299
300 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
301 if (lp.ptr == PHYS_MAP_NODE_NIL) {
302 return &sections[PHYS_SECTION_UNASSIGNED];
303 }
304 p = nodes[lp.ptr];
305 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
306 }
307
308 if (sections[lp.ptr].size.hi ||
309 range_covers_byte(sections[lp.ptr].offset_within_address_space,
310 sections[lp.ptr].size.lo, addr)) {
311 return &sections[lp.ptr];
312 } else {
313 return &sections[PHYS_SECTION_UNASSIGNED];
314 }
315 }
316
317 bool memory_region_is_unassigned(MemoryRegion *mr)
318 {
319 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
320 && mr != &io_mem_watch;
321 }
322
323 /* Called from RCU critical section */
324 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
325 hwaddr addr,
326 bool resolve_subpage)
327 {
328 MemoryRegionSection *section;
329 subpage_t *subpage;
330
331 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
332 if (resolve_subpage && section->mr->subpage) {
333 subpage = container_of(section->mr, subpage_t, iomem);
334 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
335 }
336 return section;
337 }
338
339 /* Called from RCU critical section */
340 static MemoryRegionSection *
341 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
342 hwaddr *plen, bool resolve_subpage)
343 {
344 MemoryRegionSection *section;
345 MemoryRegion *mr;
346 Int128 diff;
347
348 section = address_space_lookup_region(d, addr, resolve_subpage);
349 /* Compute offset within MemoryRegionSection */
350 addr -= section->offset_within_address_space;
351
352 /* Compute offset within MemoryRegion */
353 *xlat = addr + section->offset_within_region;
354
355 mr = section->mr;
356
357 /* MMIO registers can be expected to perform full-width accesses based only
358 * on their address, without considering adjacent registers that could
359 * decode to completely different MemoryRegions. When such registers
360 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
361 * regions overlap wildly. For this reason we cannot clamp the accesses
362 * here.
363 *
364 * If the length is small (as is the case for address_space_ldl/stl),
365 * everything works fine. If the incoming length is large, however,
366 * the caller really has to do the clamping through memory_access_size.
367 */
368 if (memory_region_is_ram(mr)) {
369 diff = int128_sub(section->size, int128_make64(addr));
370 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
371 }
372 return section;
373 }
374
375 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
376 {
377 if (memory_region_is_ram(mr)) {
378 return !(is_write && mr->readonly);
379 }
380 if (memory_region_is_romd(mr)) {
381 return !is_write;
382 }
383
384 return false;
385 }
386
387 /* Called from RCU critical section */
388 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
389 hwaddr *xlat, hwaddr *plen,
390 bool is_write)
391 {
392 IOMMUTLBEntry iotlb;
393 MemoryRegionSection *section;
394 MemoryRegion *mr;
395
396 for (;;) {
397 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
398 section = address_space_translate_internal(d, addr, &addr, plen, true);
399 mr = section->mr;
400
401 if (!mr->iommu_ops) {
402 break;
403 }
404
405 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
406 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
407 | (addr & iotlb.addr_mask));
408 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
409 if (!(iotlb.perm & (1 << is_write))) {
410 mr = &io_mem_unassigned;
411 break;
412 }
413
414 as = iotlb.target_as;
415 }
416
417 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
418 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
419 *plen = MIN(page, *plen);
420 }
421
422 *xlat = addr;
423 return mr;
424 }
425
426 /* Called from RCU critical section */
427 MemoryRegionSection *
428 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
429 hwaddr *xlat, hwaddr *plen)
430 {
431 MemoryRegionSection *section;
432 section = address_space_translate_internal(cpu->memory_dispatch,
433 addr, xlat, plen, false);
434
435 assert(!section->mr->iommu_ops);
436 return section;
437 }
438 #endif
439
440 #if !defined(CONFIG_USER_ONLY)
441
442 static int cpu_common_post_load(void *opaque, int version_id)
443 {
444 CPUState *cpu = opaque;
445
446 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
447 version_id is increased. */
448 cpu->interrupt_request &= ~0x01;
449 tlb_flush(cpu, 1);
450
451 return 0;
452 }
453
454 static int cpu_common_pre_load(void *opaque)
455 {
456 CPUState *cpu = opaque;
457
458 cpu->exception_index = -1;
459
460 return 0;
461 }
462
463 static bool cpu_common_exception_index_needed(void *opaque)
464 {
465 CPUState *cpu = opaque;
466
467 return tcg_enabled() && cpu->exception_index != -1;
468 }
469
470 static const VMStateDescription vmstate_cpu_common_exception_index = {
471 .name = "cpu_common/exception_index",
472 .version_id = 1,
473 .minimum_version_id = 1,
474 .needed = cpu_common_exception_index_needed,
475 .fields = (VMStateField[]) {
476 VMSTATE_INT32(exception_index, CPUState),
477 VMSTATE_END_OF_LIST()
478 }
479 };
480
481 const VMStateDescription vmstate_cpu_common = {
482 .name = "cpu_common",
483 .version_id = 1,
484 .minimum_version_id = 1,
485 .pre_load = cpu_common_pre_load,
486 .post_load = cpu_common_post_load,
487 .fields = (VMStateField[]) {
488 VMSTATE_UINT32(halted, CPUState),
489 VMSTATE_UINT32(interrupt_request, CPUState),
490 VMSTATE_END_OF_LIST()
491 },
492 .subsections = (const VMStateDescription*[]) {
493 &vmstate_cpu_common_exception_index,
494 NULL
495 }
496 };
497
498 #endif
499
500 CPUState *qemu_get_cpu(int index)
501 {
502 CPUState *cpu;
503
504 CPU_FOREACH(cpu) {
505 if (cpu->cpu_index == index) {
506 return cpu;
507 }
508 }
509
510 return NULL;
511 }
512
513 #if !defined(CONFIG_USER_ONLY)
514 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
515 {
516 /* We only support one address space per cpu at the moment. */
517 assert(cpu->as == as);
518
519 if (cpu->tcg_as_listener) {
520 memory_listener_unregister(cpu->tcg_as_listener);
521 } else {
522 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
523 }
524 cpu->tcg_as_listener->commit = tcg_commit;
525 memory_listener_register(cpu->tcg_as_listener, as);
526 }
527 #endif
528
529 void cpu_exec_init(CPUArchState *env)
530 {
531 CPUState *cpu = ENV_GET_CPU(env);
532 CPUClass *cc = CPU_GET_CLASS(cpu);
533 CPUState *some_cpu;
534 int cpu_index;
535
536 #if defined(CONFIG_USER_ONLY)
537 cpu_list_lock();
538 #endif
539 cpu_index = 0;
540 CPU_FOREACH(some_cpu) {
541 cpu_index++;
542 }
543 cpu->cpu_index = cpu_index;
544 cpu->numa_node = 0;
545 QTAILQ_INIT(&cpu->breakpoints);
546 QTAILQ_INIT(&cpu->watchpoints);
547 #ifndef CONFIG_USER_ONLY
548 cpu->as = &address_space_memory;
549 cpu->thread_id = qemu_get_thread_id();
550 cpu_reload_memory_map(cpu);
551 #endif
552 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
553 #if defined(CONFIG_USER_ONLY)
554 cpu_list_unlock();
555 #endif
556 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
557 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
558 }
559 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
560 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
561 cpu_save, cpu_load, env);
562 assert(cc->vmsd == NULL);
563 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
564 #endif
565 if (cc->vmsd != NULL) {
566 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
567 }
568 }
569
570 #if defined(CONFIG_USER_ONLY)
571 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
572 {
573 tb_invalidate_phys_page_range(pc, pc + 1, 0);
574 }
575 #else
576 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
577 {
578 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
579 if (phys != -1) {
580 tb_invalidate_phys_addr(cpu->as,
581 phys | (pc & ~TARGET_PAGE_MASK));
582 }
583 }
584 #endif
585
586 #if defined(CONFIG_USER_ONLY)
587 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
588
589 {
590 }
591
592 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
593 int flags)
594 {
595 return -ENOSYS;
596 }
597
598 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
599 {
600 }
601
602 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
603 int flags, CPUWatchpoint **watchpoint)
604 {
605 return -ENOSYS;
606 }
607 #else
608 /* Add a watchpoint. */
609 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
610 int flags, CPUWatchpoint **watchpoint)
611 {
612 CPUWatchpoint *wp;
613
614 /* forbid ranges which are empty or run off the end of the address space */
615 if (len == 0 || (addr + len - 1) < addr) {
616 error_report("tried to set invalid watchpoint at %"
617 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
618 return -EINVAL;
619 }
620 wp = g_malloc(sizeof(*wp));
621
622 wp->vaddr = addr;
623 wp->len = len;
624 wp->flags = flags;
625
626 /* keep all GDB-injected watchpoints in front */
627 if (flags & BP_GDB) {
628 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
629 } else {
630 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
631 }
632
633 tlb_flush_page(cpu, addr);
634
635 if (watchpoint)
636 *watchpoint = wp;
637 return 0;
638 }
639
640 /* Remove a specific watchpoint. */
641 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
642 int flags)
643 {
644 CPUWatchpoint *wp;
645
646 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
647 if (addr == wp->vaddr && len == wp->len
648 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
649 cpu_watchpoint_remove_by_ref(cpu, wp);
650 return 0;
651 }
652 }
653 return -ENOENT;
654 }
655
656 /* Remove a specific watchpoint by reference. */
657 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
658 {
659 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
660
661 tlb_flush_page(cpu, watchpoint->vaddr);
662
663 g_free(watchpoint);
664 }
665
666 /* Remove all matching watchpoints. */
667 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
668 {
669 CPUWatchpoint *wp, *next;
670
671 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
672 if (wp->flags & mask) {
673 cpu_watchpoint_remove_by_ref(cpu, wp);
674 }
675 }
676 }
677
678 /* Return true if this watchpoint address matches the specified
679 * access (ie the address range covered by the watchpoint overlaps
680 * partially or completely with the address range covered by the
681 * access).
682 */
683 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
684 vaddr addr,
685 vaddr len)
686 {
687 /* We know the lengths are non-zero, but a little caution is
688 * required to avoid errors in the case where the range ends
689 * exactly at the top of the address space and so addr + len
690 * wraps round to zero.
691 */
692 vaddr wpend = wp->vaddr + wp->len - 1;
693 vaddr addrend = addr + len - 1;
694
695 return !(addr > wpend || wp->vaddr > addrend);
696 }
697
698 #endif
699
700 /* Add a breakpoint. */
701 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
702 CPUBreakpoint **breakpoint)
703 {
704 CPUBreakpoint *bp;
705
706 bp = g_malloc(sizeof(*bp));
707
708 bp->pc = pc;
709 bp->flags = flags;
710
711 /* keep all GDB-injected breakpoints in front */
712 if (flags & BP_GDB) {
713 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
714 } else {
715 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
716 }
717
718 breakpoint_invalidate(cpu, pc);
719
720 if (breakpoint) {
721 *breakpoint = bp;
722 }
723 return 0;
724 }
725
726 /* Remove a specific breakpoint. */
727 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
728 {
729 CPUBreakpoint *bp;
730
731 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
732 if (bp->pc == pc && bp->flags == flags) {
733 cpu_breakpoint_remove_by_ref(cpu, bp);
734 return 0;
735 }
736 }
737 return -ENOENT;
738 }
739
740 /* Remove a specific breakpoint by reference. */
741 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
742 {
743 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
744
745 breakpoint_invalidate(cpu, breakpoint->pc);
746
747 g_free(breakpoint);
748 }
749
750 /* Remove all matching breakpoints. */
751 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
752 {
753 CPUBreakpoint *bp, *next;
754
755 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
756 if (bp->flags & mask) {
757 cpu_breakpoint_remove_by_ref(cpu, bp);
758 }
759 }
760 }
761
762 /* enable or disable single step mode. EXCP_DEBUG is returned by the
763 CPU loop after each instruction */
764 void cpu_single_step(CPUState *cpu, int enabled)
765 {
766 if (cpu->singlestep_enabled != enabled) {
767 cpu->singlestep_enabled = enabled;
768 if (kvm_enabled()) {
769 kvm_update_guest_debug(cpu, 0);
770 } else {
771 /* must flush all the translated code to avoid inconsistencies */
772 /* XXX: only flush what is necessary */
773 CPUArchState *env = cpu->env_ptr;
774 tb_flush(env);
775 }
776 }
777 }
778
779 void cpu_abort(CPUState *cpu, const char *fmt, ...)
780 {
781 va_list ap;
782 va_list ap2;
783
784 va_start(ap, fmt);
785 va_copy(ap2, ap);
786 fprintf(stderr, "qemu: fatal: ");
787 vfprintf(stderr, fmt, ap);
788 fprintf(stderr, "\n");
789 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
790 if (qemu_log_enabled()) {
791 qemu_log("qemu: fatal: ");
792 qemu_log_vprintf(fmt, ap2);
793 qemu_log("\n");
794 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
795 qemu_log_flush();
796 qemu_log_close();
797 }
798 va_end(ap2);
799 va_end(ap);
800 #if defined(CONFIG_USER_ONLY)
801 {
802 struct sigaction act;
803 sigfillset(&act.sa_mask);
804 act.sa_handler = SIG_DFL;
805 sigaction(SIGABRT, &act, NULL);
806 }
807 #endif
808 abort();
809 }
810
811 #if !defined(CONFIG_USER_ONLY)
812 /* Called from RCU critical section */
813 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
814 {
815 RAMBlock *block;
816
817 block = atomic_rcu_read(&ram_list.mru_block);
818 if (block && addr - block->offset < block->max_length) {
819 goto found;
820 }
821 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
822 if (addr - block->offset < block->max_length) {
823 goto found;
824 }
825 }
826
827 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
828 abort();
829
830 found:
831 /* It is safe to write mru_block outside the iothread lock. This
832 * is what happens:
833 *
834 * mru_block = xxx
835 * rcu_read_unlock()
836 * xxx removed from list
837 * rcu_read_lock()
838 * read mru_block
839 * mru_block = NULL;
840 * call_rcu(reclaim_ramblock, xxx);
841 * rcu_read_unlock()
842 *
843 * atomic_rcu_set is not needed here. The block was already published
844 * when it was placed into the list. Here we're just making an extra
845 * copy of the pointer.
846 */
847 ram_list.mru_block = block;
848 return block;
849 }
850
851 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
852 {
853 ram_addr_t start1;
854 RAMBlock *block;
855 ram_addr_t end;
856
857 end = TARGET_PAGE_ALIGN(start + length);
858 start &= TARGET_PAGE_MASK;
859
860 rcu_read_lock();
861 block = qemu_get_ram_block(start);
862 assert(block == qemu_get_ram_block(end - 1));
863 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
864 cpu_tlb_reset_dirty_all(start1, length);
865 rcu_read_unlock();
866 }
867
868 /* Note: start and end must be within the same ram block. */
869 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
870 ram_addr_t length,
871 unsigned client)
872 {
873 unsigned long end, page;
874 bool dirty;
875
876 if (length == 0) {
877 return false;
878 }
879
880 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
881 page = start >> TARGET_PAGE_BITS;
882 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
883 page, end - page);
884
885 if (dirty && tcg_enabled()) {
886 tlb_reset_dirty_range_all(start, length);
887 }
888
889 return dirty;
890 }
891
892 /* Called from RCU critical section */
893 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
894 MemoryRegionSection *section,
895 target_ulong vaddr,
896 hwaddr paddr, hwaddr xlat,
897 int prot,
898 target_ulong *address)
899 {
900 hwaddr iotlb;
901 CPUWatchpoint *wp;
902
903 if (memory_region_is_ram(section->mr)) {
904 /* Normal RAM. */
905 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
906 + xlat;
907 if (!section->readonly) {
908 iotlb |= PHYS_SECTION_NOTDIRTY;
909 } else {
910 iotlb |= PHYS_SECTION_ROM;
911 }
912 } else {
913 iotlb = section - section->address_space->dispatch->map.sections;
914 iotlb += xlat;
915 }
916
917 /* Make accesses to pages with watchpoints go via the
918 watchpoint trap routines. */
919 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
920 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
921 /* Avoid trapping reads of pages with a write breakpoint. */
922 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
923 iotlb = PHYS_SECTION_WATCH + paddr;
924 *address |= TLB_MMIO;
925 break;
926 }
927 }
928 }
929
930 return iotlb;
931 }
932 #endif /* defined(CONFIG_USER_ONLY) */
933
934 #if !defined(CONFIG_USER_ONLY)
935
936 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
937 uint16_t section);
938 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
939
940 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
941 qemu_anon_ram_alloc;
942
943 /*
944 * Set a custom physical guest memory alloator.
945 * Accelerators with unusual needs may need this. Hopefully, we can
946 * get rid of it eventually.
947 */
948 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
949 {
950 phys_mem_alloc = alloc;
951 }
952
953 static uint16_t phys_section_add(PhysPageMap *map,
954 MemoryRegionSection *section)
955 {
956 /* The physical section number is ORed with a page-aligned
957 * pointer to produce the iotlb entries. Thus it should
958 * never overflow into the page-aligned value.
959 */
960 assert(map->sections_nb < TARGET_PAGE_SIZE);
961
962 if (map->sections_nb == map->sections_nb_alloc) {
963 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
964 map->sections = g_renew(MemoryRegionSection, map->sections,
965 map->sections_nb_alloc);
966 }
967 map->sections[map->sections_nb] = *section;
968 memory_region_ref(section->mr);
969 return map->sections_nb++;
970 }
971
972 static void phys_section_destroy(MemoryRegion *mr)
973 {
974 memory_region_unref(mr);
975
976 if (mr->subpage) {
977 subpage_t *subpage = container_of(mr, subpage_t, iomem);
978 object_unref(OBJECT(&subpage->iomem));
979 g_free(subpage);
980 }
981 }
982
983 static void phys_sections_free(PhysPageMap *map)
984 {
985 while (map->sections_nb > 0) {
986 MemoryRegionSection *section = &map->sections[--map->sections_nb];
987 phys_section_destroy(section->mr);
988 }
989 g_free(map->sections);
990 g_free(map->nodes);
991 }
992
993 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
994 {
995 subpage_t *subpage;
996 hwaddr base = section->offset_within_address_space
997 & TARGET_PAGE_MASK;
998 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
999 d->map.nodes, d->map.sections);
1000 MemoryRegionSection subsection = {
1001 .offset_within_address_space = base,
1002 .size = int128_make64(TARGET_PAGE_SIZE),
1003 };
1004 hwaddr start, end;
1005
1006 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1007
1008 if (!(existing->mr->subpage)) {
1009 subpage = subpage_init(d->as, base);
1010 subsection.address_space = d->as;
1011 subsection.mr = &subpage->iomem;
1012 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1013 phys_section_add(&d->map, &subsection));
1014 } else {
1015 subpage = container_of(existing->mr, subpage_t, iomem);
1016 }
1017 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1018 end = start + int128_get64(section->size) - 1;
1019 subpage_register(subpage, start, end,
1020 phys_section_add(&d->map, section));
1021 }
1022
1023
1024 static void register_multipage(AddressSpaceDispatch *d,
1025 MemoryRegionSection *section)
1026 {
1027 hwaddr start_addr = section->offset_within_address_space;
1028 uint16_t section_index = phys_section_add(&d->map, section);
1029 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1030 TARGET_PAGE_BITS));
1031
1032 assert(num_pages);
1033 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1034 }
1035
1036 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1037 {
1038 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1039 AddressSpaceDispatch *d = as->next_dispatch;
1040 MemoryRegionSection now = *section, remain = *section;
1041 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1042
1043 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1044 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1045 - now.offset_within_address_space;
1046
1047 now.size = int128_min(int128_make64(left), now.size);
1048 register_subpage(d, &now);
1049 } else {
1050 now.size = int128_zero();
1051 }
1052 while (int128_ne(remain.size, now.size)) {
1053 remain.size = int128_sub(remain.size, now.size);
1054 remain.offset_within_address_space += int128_get64(now.size);
1055 remain.offset_within_region += int128_get64(now.size);
1056 now = remain;
1057 if (int128_lt(remain.size, page_size)) {
1058 register_subpage(d, &now);
1059 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1060 now.size = page_size;
1061 register_subpage(d, &now);
1062 } else {
1063 now.size = int128_and(now.size, int128_neg(page_size));
1064 register_multipage(d, &now);
1065 }
1066 }
1067 }
1068
1069 void qemu_flush_coalesced_mmio_buffer(void)
1070 {
1071 if (kvm_enabled())
1072 kvm_flush_coalesced_mmio_buffer();
1073 }
1074
1075 void qemu_mutex_lock_ramlist(void)
1076 {
1077 qemu_mutex_lock(&ram_list.mutex);
1078 }
1079
1080 void qemu_mutex_unlock_ramlist(void)
1081 {
1082 qemu_mutex_unlock(&ram_list.mutex);
1083 }
1084
1085 #ifdef __linux__
1086
1087 #include <sys/vfs.h>
1088
1089 #define HUGETLBFS_MAGIC 0x958458f6
1090
1091 static long gethugepagesize(const char *path, Error **errp)
1092 {
1093 struct statfs fs;
1094 int ret;
1095
1096 do {
1097 ret = statfs(path, &fs);
1098 } while (ret != 0 && errno == EINTR);
1099
1100 if (ret != 0) {
1101 error_setg_errno(errp, errno, "failed to get page size of file %s",
1102 path);
1103 return 0;
1104 }
1105
1106 if (fs.f_type != HUGETLBFS_MAGIC)
1107 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1108
1109 return fs.f_bsize;
1110 }
1111
1112 static void *file_ram_alloc(RAMBlock *block,
1113 ram_addr_t memory,
1114 const char *path,
1115 Error **errp)
1116 {
1117 char *filename;
1118 char *sanitized_name;
1119 char *c;
1120 void *area = NULL;
1121 int fd;
1122 uint64_t hpagesize;
1123 Error *local_err = NULL;
1124
1125 hpagesize = gethugepagesize(path, &local_err);
1126 if (local_err) {
1127 error_propagate(errp, local_err);
1128 goto error;
1129 }
1130 block->mr->align = hpagesize;
1131
1132 if (memory < hpagesize) {
1133 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1134 "or larger than huge page size 0x%" PRIx64,
1135 memory, hpagesize);
1136 goto error;
1137 }
1138
1139 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1140 error_setg(errp,
1141 "host lacks kvm mmu notifiers, -mem-path unsupported");
1142 goto error;
1143 }
1144
1145 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1146 sanitized_name = g_strdup(memory_region_name(block->mr));
1147 for (c = sanitized_name; *c != '\0'; c++) {
1148 if (*c == '/')
1149 *c = '_';
1150 }
1151
1152 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1153 sanitized_name);
1154 g_free(sanitized_name);
1155
1156 fd = mkstemp(filename);
1157 if (fd < 0) {
1158 error_setg_errno(errp, errno,
1159 "unable to create backing store for hugepages");
1160 g_free(filename);
1161 goto error;
1162 }
1163 unlink(filename);
1164 g_free(filename);
1165
1166 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1167
1168 /*
1169 * ftruncate is not supported by hugetlbfs in older
1170 * hosts, so don't bother bailing out on errors.
1171 * If anything goes wrong with it under other filesystems,
1172 * mmap will fail.
1173 */
1174 if (ftruncate(fd, memory)) {
1175 perror("ftruncate");
1176 }
1177
1178 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1179 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1180 fd, 0);
1181 if (area == MAP_FAILED) {
1182 error_setg_errno(errp, errno,
1183 "unable to map backing store for hugepages");
1184 close(fd);
1185 goto error;
1186 }
1187
1188 if (mem_prealloc) {
1189 os_mem_prealloc(fd, area, memory);
1190 }
1191
1192 block->fd = fd;
1193 return area;
1194
1195 error:
1196 if (mem_prealloc) {
1197 error_report("%s", error_get_pretty(*errp));
1198 exit(1);
1199 }
1200 return NULL;
1201 }
1202 #endif
1203
1204 /* Called with the ramlist lock held. */
1205 static ram_addr_t find_ram_offset(ram_addr_t size)
1206 {
1207 RAMBlock *block, *next_block;
1208 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1209
1210 assert(size != 0); /* it would hand out same offset multiple times */
1211
1212 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1213 return 0;
1214 }
1215
1216 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1217 ram_addr_t end, next = RAM_ADDR_MAX;
1218
1219 end = block->offset + block->max_length;
1220
1221 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1222 if (next_block->offset >= end) {
1223 next = MIN(next, next_block->offset);
1224 }
1225 }
1226 if (next - end >= size && next - end < mingap) {
1227 offset = end;
1228 mingap = next - end;
1229 }
1230 }
1231
1232 if (offset == RAM_ADDR_MAX) {
1233 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1234 (uint64_t)size);
1235 abort();
1236 }
1237
1238 return offset;
1239 }
1240
1241 ram_addr_t last_ram_offset(void)
1242 {
1243 RAMBlock *block;
1244 ram_addr_t last = 0;
1245
1246 rcu_read_lock();
1247 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1248 last = MAX(last, block->offset + block->max_length);
1249 }
1250 rcu_read_unlock();
1251 return last;
1252 }
1253
1254 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1255 {
1256 int ret;
1257
1258 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1259 if (!machine_dump_guest_core(current_machine)) {
1260 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1261 if (ret) {
1262 perror("qemu_madvise");
1263 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1264 "but dump_guest_core=off specified\n");
1265 }
1266 }
1267 }
1268
1269 /* Called within an RCU critical section, or while the ramlist lock
1270 * is held.
1271 */
1272 static RAMBlock *find_ram_block(ram_addr_t addr)
1273 {
1274 RAMBlock *block;
1275
1276 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1277 if (block->offset == addr) {
1278 return block;
1279 }
1280 }
1281
1282 return NULL;
1283 }
1284
1285 /* Called with iothread lock held. */
1286 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1287 {
1288 RAMBlock *new_block, *block;
1289
1290 rcu_read_lock();
1291 new_block = find_ram_block(addr);
1292 assert(new_block);
1293 assert(!new_block->idstr[0]);
1294
1295 if (dev) {
1296 char *id = qdev_get_dev_path(dev);
1297 if (id) {
1298 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1299 g_free(id);
1300 }
1301 }
1302 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1303
1304 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1305 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1306 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1307 new_block->idstr);
1308 abort();
1309 }
1310 }
1311 rcu_read_unlock();
1312 }
1313
1314 /* Called with iothread lock held. */
1315 void qemu_ram_unset_idstr(ram_addr_t addr)
1316 {
1317 RAMBlock *block;
1318
1319 /* FIXME: arch_init.c assumes that this is not called throughout
1320 * migration. Ignore the problem since hot-unplug during migration
1321 * does not work anyway.
1322 */
1323
1324 rcu_read_lock();
1325 block = find_ram_block(addr);
1326 if (block) {
1327 memset(block->idstr, 0, sizeof(block->idstr));
1328 }
1329 rcu_read_unlock();
1330 }
1331
1332 static int memory_try_enable_merging(void *addr, size_t len)
1333 {
1334 if (!machine_mem_merge(current_machine)) {
1335 /* disabled by the user */
1336 return 0;
1337 }
1338
1339 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1340 }
1341
1342 /* Only legal before guest might have detected the memory size: e.g. on
1343 * incoming migration, or right after reset.
1344 *
1345 * As memory core doesn't know how is memory accessed, it is up to
1346 * resize callback to update device state and/or add assertions to detect
1347 * misuse, if necessary.
1348 */
1349 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1350 {
1351 RAMBlock *block = find_ram_block(base);
1352
1353 assert(block);
1354
1355 newsize = TARGET_PAGE_ALIGN(newsize);
1356
1357 if (block->used_length == newsize) {
1358 return 0;
1359 }
1360
1361 if (!(block->flags & RAM_RESIZEABLE)) {
1362 error_setg_errno(errp, EINVAL,
1363 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1364 " in != 0x" RAM_ADDR_FMT, block->idstr,
1365 newsize, block->used_length);
1366 return -EINVAL;
1367 }
1368
1369 if (block->max_length < newsize) {
1370 error_setg_errno(errp, EINVAL,
1371 "Length too large: %s: 0x" RAM_ADDR_FMT
1372 " > 0x" RAM_ADDR_FMT, block->idstr,
1373 newsize, block->max_length);
1374 return -EINVAL;
1375 }
1376
1377 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1378 block->used_length = newsize;
1379 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1380 DIRTY_CLIENTS_ALL);
1381 memory_region_set_size(block->mr, newsize);
1382 if (block->resized) {
1383 block->resized(block->idstr, newsize, block->host);
1384 }
1385 return 0;
1386 }
1387
1388 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1389 {
1390 RAMBlock *block;
1391 RAMBlock *last_block = NULL;
1392 ram_addr_t old_ram_size, new_ram_size;
1393
1394 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1395
1396 qemu_mutex_lock_ramlist();
1397 new_block->offset = find_ram_offset(new_block->max_length);
1398
1399 if (!new_block->host) {
1400 if (xen_enabled()) {
1401 xen_ram_alloc(new_block->offset, new_block->max_length,
1402 new_block->mr);
1403 } else {
1404 new_block->host = phys_mem_alloc(new_block->max_length,
1405 &new_block->mr->align);
1406 if (!new_block->host) {
1407 error_setg_errno(errp, errno,
1408 "cannot set up guest memory '%s'",
1409 memory_region_name(new_block->mr));
1410 qemu_mutex_unlock_ramlist();
1411 return -1;
1412 }
1413 memory_try_enable_merging(new_block->host, new_block->max_length);
1414 }
1415 }
1416
1417 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1418 * QLIST (which has an RCU-friendly variant) does not have insertion at
1419 * tail, so save the last element in last_block.
1420 */
1421 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1422 last_block = block;
1423 if (block->max_length < new_block->max_length) {
1424 break;
1425 }
1426 }
1427 if (block) {
1428 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1429 } else if (last_block) {
1430 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1431 } else { /* list is empty */
1432 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1433 }
1434 ram_list.mru_block = NULL;
1435
1436 /* Write list before version */
1437 smp_wmb();
1438 ram_list.version++;
1439 qemu_mutex_unlock_ramlist();
1440
1441 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1442
1443 if (new_ram_size > old_ram_size) {
1444 int i;
1445
1446 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1447 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1448 ram_list.dirty_memory[i] =
1449 bitmap_zero_extend(ram_list.dirty_memory[i],
1450 old_ram_size, new_ram_size);
1451 }
1452 }
1453 cpu_physical_memory_set_dirty_range(new_block->offset,
1454 new_block->used_length,
1455 DIRTY_CLIENTS_ALL);
1456
1457 if (new_block->host) {
1458 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1459 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1460 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1461 if (kvm_enabled()) {
1462 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1463 }
1464 }
1465
1466 return new_block->offset;
1467 }
1468
1469 #ifdef __linux__
1470 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1471 bool share, const char *mem_path,
1472 Error **errp)
1473 {
1474 RAMBlock *new_block;
1475 ram_addr_t addr;
1476 Error *local_err = NULL;
1477
1478 if (xen_enabled()) {
1479 error_setg(errp, "-mem-path not supported with Xen");
1480 return -1;
1481 }
1482
1483 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1484 /*
1485 * file_ram_alloc() needs to allocate just like
1486 * phys_mem_alloc, but we haven't bothered to provide
1487 * a hook there.
1488 */
1489 error_setg(errp,
1490 "-mem-path not supported with this accelerator");
1491 return -1;
1492 }
1493
1494 size = TARGET_PAGE_ALIGN(size);
1495 new_block = g_malloc0(sizeof(*new_block));
1496 new_block->mr = mr;
1497 new_block->used_length = size;
1498 new_block->max_length = size;
1499 new_block->flags = share ? RAM_SHARED : 0;
1500 new_block->host = file_ram_alloc(new_block, size,
1501 mem_path, errp);
1502 if (!new_block->host) {
1503 g_free(new_block);
1504 return -1;
1505 }
1506
1507 addr = ram_block_add(new_block, &local_err);
1508 if (local_err) {
1509 g_free(new_block);
1510 error_propagate(errp, local_err);
1511 return -1;
1512 }
1513 return addr;
1514 }
1515 #endif
1516
1517 static
1518 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1519 void (*resized)(const char*,
1520 uint64_t length,
1521 void *host),
1522 void *host, bool resizeable,
1523 MemoryRegion *mr, Error **errp)
1524 {
1525 RAMBlock *new_block;
1526 ram_addr_t addr;
1527 Error *local_err = NULL;
1528
1529 size = TARGET_PAGE_ALIGN(size);
1530 max_size = TARGET_PAGE_ALIGN(max_size);
1531 new_block = g_malloc0(sizeof(*new_block));
1532 new_block->mr = mr;
1533 new_block->resized = resized;
1534 new_block->used_length = size;
1535 new_block->max_length = max_size;
1536 assert(max_size >= size);
1537 new_block->fd = -1;
1538 new_block->host = host;
1539 if (host) {
1540 new_block->flags |= RAM_PREALLOC;
1541 }
1542 if (resizeable) {
1543 new_block->flags |= RAM_RESIZEABLE;
1544 }
1545 addr = ram_block_add(new_block, &local_err);
1546 if (local_err) {
1547 g_free(new_block);
1548 error_propagate(errp, local_err);
1549 return -1;
1550 }
1551 return addr;
1552 }
1553
1554 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1555 MemoryRegion *mr, Error **errp)
1556 {
1557 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1558 }
1559
1560 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1561 {
1562 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1563 }
1564
1565 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1566 void (*resized)(const char*,
1567 uint64_t length,
1568 void *host),
1569 MemoryRegion *mr, Error **errp)
1570 {
1571 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1572 }
1573
1574 void qemu_ram_free_from_ptr(ram_addr_t addr)
1575 {
1576 RAMBlock *block;
1577
1578 qemu_mutex_lock_ramlist();
1579 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1580 if (addr == block->offset) {
1581 QLIST_REMOVE_RCU(block, next);
1582 ram_list.mru_block = NULL;
1583 /* Write list before version */
1584 smp_wmb();
1585 ram_list.version++;
1586 g_free_rcu(block, rcu);
1587 break;
1588 }
1589 }
1590 qemu_mutex_unlock_ramlist();
1591 }
1592
1593 static void reclaim_ramblock(RAMBlock *block)
1594 {
1595 if (block->flags & RAM_PREALLOC) {
1596 ;
1597 } else if (xen_enabled()) {
1598 xen_invalidate_map_cache_entry(block->host);
1599 #ifndef _WIN32
1600 } else if (block->fd >= 0) {
1601 munmap(block->host, block->max_length);
1602 close(block->fd);
1603 #endif
1604 } else {
1605 qemu_anon_ram_free(block->host, block->max_length);
1606 }
1607 g_free(block);
1608 }
1609
1610 void qemu_ram_free(ram_addr_t addr)
1611 {
1612 RAMBlock *block;
1613
1614 qemu_mutex_lock_ramlist();
1615 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1616 if (addr == block->offset) {
1617 QLIST_REMOVE_RCU(block, next);
1618 ram_list.mru_block = NULL;
1619 /* Write list before version */
1620 smp_wmb();
1621 ram_list.version++;
1622 call_rcu(block, reclaim_ramblock, rcu);
1623 break;
1624 }
1625 }
1626 qemu_mutex_unlock_ramlist();
1627 }
1628
1629 #ifndef _WIN32
1630 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1631 {
1632 RAMBlock *block;
1633 ram_addr_t offset;
1634 int flags;
1635 void *area, *vaddr;
1636
1637 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1638 offset = addr - block->offset;
1639 if (offset < block->max_length) {
1640 vaddr = ramblock_ptr(block, offset);
1641 if (block->flags & RAM_PREALLOC) {
1642 ;
1643 } else if (xen_enabled()) {
1644 abort();
1645 } else {
1646 flags = MAP_FIXED;
1647 if (block->fd >= 0) {
1648 flags |= (block->flags & RAM_SHARED ?
1649 MAP_SHARED : MAP_PRIVATE);
1650 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1651 flags, block->fd, offset);
1652 } else {
1653 /*
1654 * Remap needs to match alloc. Accelerators that
1655 * set phys_mem_alloc never remap. If they did,
1656 * we'd need a remap hook here.
1657 */
1658 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1659
1660 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1661 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1662 flags, -1, 0);
1663 }
1664 if (area != vaddr) {
1665 fprintf(stderr, "Could not remap addr: "
1666 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1667 length, addr);
1668 exit(1);
1669 }
1670 memory_try_enable_merging(vaddr, length);
1671 qemu_ram_setup_dump(vaddr, length);
1672 }
1673 }
1674 }
1675 }
1676 #endif /* !_WIN32 */
1677
1678 int qemu_get_ram_fd(ram_addr_t addr)
1679 {
1680 RAMBlock *block;
1681 int fd;
1682
1683 rcu_read_lock();
1684 block = qemu_get_ram_block(addr);
1685 fd = block->fd;
1686 rcu_read_unlock();
1687 return fd;
1688 }
1689
1690 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1691 {
1692 RAMBlock *block;
1693 void *ptr;
1694
1695 rcu_read_lock();
1696 block = qemu_get_ram_block(addr);
1697 ptr = ramblock_ptr(block, 0);
1698 rcu_read_unlock();
1699 return ptr;
1700 }
1701
1702 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1703 * This should not be used for general purpose DMA. Use address_space_map
1704 * or address_space_rw instead. For local memory (e.g. video ram) that the
1705 * device owns, use memory_region_get_ram_ptr.
1706 *
1707 * By the time this function returns, the returned pointer is not protected
1708 * by RCU anymore. If the caller is not within an RCU critical section and
1709 * does not hold the iothread lock, it must have other means of protecting the
1710 * pointer, such as a reference to the region that includes the incoming
1711 * ram_addr_t.
1712 */
1713 void *qemu_get_ram_ptr(ram_addr_t addr)
1714 {
1715 RAMBlock *block;
1716 void *ptr;
1717
1718 rcu_read_lock();
1719 block = qemu_get_ram_block(addr);
1720
1721 if (xen_enabled() && block->host == NULL) {
1722 /* We need to check if the requested address is in the RAM
1723 * because we don't want to map the entire memory in QEMU.
1724 * In that case just map until the end of the page.
1725 */
1726 if (block->offset == 0) {
1727 ptr = xen_map_cache(addr, 0, 0);
1728 goto unlock;
1729 }
1730
1731 block->host = xen_map_cache(block->offset, block->max_length, 1);
1732 }
1733 ptr = ramblock_ptr(block, addr - block->offset);
1734
1735 unlock:
1736 rcu_read_unlock();
1737 return ptr;
1738 }
1739
1740 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1741 * but takes a size argument.
1742 *
1743 * By the time this function returns, the returned pointer is not protected
1744 * by RCU anymore. If the caller is not within an RCU critical section and
1745 * does not hold the iothread lock, it must have other means of protecting the
1746 * pointer, such as a reference to the region that includes the incoming
1747 * ram_addr_t.
1748 */
1749 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1750 {
1751 void *ptr;
1752 if (*size == 0) {
1753 return NULL;
1754 }
1755 if (xen_enabled()) {
1756 return xen_map_cache(addr, *size, 1);
1757 } else {
1758 RAMBlock *block;
1759 rcu_read_lock();
1760 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1761 if (addr - block->offset < block->max_length) {
1762 if (addr - block->offset + *size > block->max_length)
1763 *size = block->max_length - addr + block->offset;
1764 ptr = ramblock_ptr(block, addr - block->offset);
1765 rcu_read_unlock();
1766 return ptr;
1767 }
1768 }
1769
1770 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1771 abort();
1772 }
1773 }
1774
1775 /* Some of the softmmu routines need to translate from a host pointer
1776 * (typically a TLB entry) back to a ram offset.
1777 *
1778 * By the time this function returns, the returned pointer is not protected
1779 * by RCU anymore. If the caller is not within an RCU critical section and
1780 * does not hold the iothread lock, it must have other means of protecting the
1781 * pointer, such as a reference to the region that includes the incoming
1782 * ram_addr_t.
1783 */
1784 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1785 {
1786 RAMBlock *block;
1787 uint8_t *host = ptr;
1788 MemoryRegion *mr;
1789
1790 if (xen_enabled()) {
1791 rcu_read_lock();
1792 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1793 mr = qemu_get_ram_block(*ram_addr)->mr;
1794 rcu_read_unlock();
1795 return mr;
1796 }
1797
1798 rcu_read_lock();
1799 block = atomic_rcu_read(&ram_list.mru_block);
1800 if (block && block->host && host - block->host < block->max_length) {
1801 goto found;
1802 }
1803
1804 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1805 /* This case append when the block is not mapped. */
1806 if (block->host == NULL) {
1807 continue;
1808 }
1809 if (host - block->host < block->max_length) {
1810 goto found;
1811 }
1812 }
1813
1814 rcu_read_unlock();
1815 return NULL;
1816
1817 found:
1818 *ram_addr = block->offset + (host - block->host);
1819 mr = block->mr;
1820 rcu_read_unlock();
1821 return mr;
1822 }
1823
1824 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1825 uint64_t val, unsigned size)
1826 {
1827 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1828 tb_invalidate_phys_page_fast(ram_addr, size);
1829 }
1830 switch (size) {
1831 case 1:
1832 stb_p(qemu_get_ram_ptr(ram_addr), val);
1833 break;
1834 case 2:
1835 stw_p(qemu_get_ram_ptr(ram_addr), val);
1836 break;
1837 case 4:
1838 stl_p(qemu_get_ram_ptr(ram_addr), val);
1839 break;
1840 default:
1841 abort();
1842 }
1843 /* Set both VGA and migration bits for simplicity and to remove
1844 * the notdirty callback faster.
1845 */
1846 cpu_physical_memory_set_dirty_range(ram_addr, size,
1847 DIRTY_CLIENTS_NOCODE);
1848 /* we remove the notdirty callback only if the code has been
1849 flushed */
1850 if (!cpu_physical_memory_is_clean(ram_addr)) {
1851 CPUArchState *env = current_cpu->env_ptr;
1852 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1853 }
1854 }
1855
1856 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1857 unsigned size, bool is_write)
1858 {
1859 return is_write;
1860 }
1861
1862 static const MemoryRegionOps notdirty_mem_ops = {
1863 .write = notdirty_mem_write,
1864 .valid.accepts = notdirty_mem_accepts,
1865 .endianness = DEVICE_NATIVE_ENDIAN,
1866 };
1867
1868 /* Generate a debug exception if a watchpoint has been hit. */
1869 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1870 {
1871 CPUState *cpu = current_cpu;
1872 CPUArchState *env = cpu->env_ptr;
1873 target_ulong pc, cs_base;
1874 target_ulong vaddr;
1875 CPUWatchpoint *wp;
1876 int cpu_flags;
1877
1878 if (cpu->watchpoint_hit) {
1879 /* We re-entered the check after replacing the TB. Now raise
1880 * the debug interrupt so that is will trigger after the
1881 * current instruction. */
1882 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1883 return;
1884 }
1885 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1886 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1887 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1888 && (wp->flags & flags)) {
1889 if (flags == BP_MEM_READ) {
1890 wp->flags |= BP_WATCHPOINT_HIT_READ;
1891 } else {
1892 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1893 }
1894 wp->hitaddr = vaddr;
1895 wp->hitattrs = attrs;
1896 if (!cpu->watchpoint_hit) {
1897 cpu->watchpoint_hit = wp;
1898 tb_check_watchpoint(cpu);
1899 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1900 cpu->exception_index = EXCP_DEBUG;
1901 cpu_loop_exit(cpu);
1902 } else {
1903 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1904 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1905 cpu_resume_from_signal(cpu, NULL);
1906 }
1907 }
1908 } else {
1909 wp->flags &= ~BP_WATCHPOINT_HIT;
1910 }
1911 }
1912 }
1913
1914 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1915 so these check for a hit then pass through to the normal out-of-line
1916 phys routines. */
1917 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1918 unsigned size, MemTxAttrs attrs)
1919 {
1920 MemTxResult res;
1921 uint64_t data;
1922
1923 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1924 switch (size) {
1925 case 1:
1926 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1927 break;
1928 case 2:
1929 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1930 break;
1931 case 4:
1932 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1933 break;
1934 default: abort();
1935 }
1936 *pdata = data;
1937 return res;
1938 }
1939
1940 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1941 uint64_t val, unsigned size,
1942 MemTxAttrs attrs)
1943 {
1944 MemTxResult res;
1945
1946 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1947 switch (size) {
1948 case 1:
1949 address_space_stb(&address_space_memory, addr, val, attrs, &res);
1950 break;
1951 case 2:
1952 address_space_stw(&address_space_memory, addr, val, attrs, &res);
1953 break;
1954 case 4:
1955 address_space_stl(&address_space_memory, addr, val, attrs, &res);
1956 break;
1957 default: abort();
1958 }
1959 return res;
1960 }
1961
1962 static const MemoryRegionOps watch_mem_ops = {
1963 .read_with_attrs = watch_mem_read,
1964 .write_with_attrs = watch_mem_write,
1965 .endianness = DEVICE_NATIVE_ENDIAN,
1966 };
1967
1968 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1969 unsigned len, MemTxAttrs attrs)
1970 {
1971 subpage_t *subpage = opaque;
1972 uint8_t buf[8];
1973 MemTxResult res;
1974
1975 #if defined(DEBUG_SUBPAGE)
1976 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1977 subpage, len, addr);
1978 #endif
1979 res = address_space_read(subpage->as, addr + subpage->base,
1980 attrs, buf, len);
1981 if (res) {
1982 return res;
1983 }
1984 switch (len) {
1985 case 1:
1986 *data = ldub_p(buf);
1987 return MEMTX_OK;
1988 case 2:
1989 *data = lduw_p(buf);
1990 return MEMTX_OK;
1991 case 4:
1992 *data = ldl_p(buf);
1993 return MEMTX_OK;
1994 case 8:
1995 *data = ldq_p(buf);
1996 return MEMTX_OK;
1997 default:
1998 abort();
1999 }
2000 }
2001
2002 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2003 uint64_t value, unsigned len, MemTxAttrs attrs)
2004 {
2005 subpage_t *subpage = opaque;
2006 uint8_t buf[8];
2007
2008 #if defined(DEBUG_SUBPAGE)
2009 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2010 " value %"PRIx64"\n",
2011 __func__, subpage, len, addr, value);
2012 #endif
2013 switch (len) {
2014 case 1:
2015 stb_p(buf, value);
2016 break;
2017 case 2:
2018 stw_p(buf, value);
2019 break;
2020 case 4:
2021 stl_p(buf, value);
2022 break;
2023 case 8:
2024 stq_p(buf, value);
2025 break;
2026 default:
2027 abort();
2028 }
2029 return address_space_write(subpage->as, addr + subpage->base,
2030 attrs, buf, len);
2031 }
2032
2033 static bool subpage_accepts(void *opaque, hwaddr addr,
2034 unsigned len, bool is_write)
2035 {
2036 subpage_t *subpage = opaque;
2037 #if defined(DEBUG_SUBPAGE)
2038 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2039 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2040 #endif
2041
2042 return address_space_access_valid(subpage->as, addr + subpage->base,
2043 len, is_write);
2044 }
2045
2046 static const MemoryRegionOps subpage_ops = {
2047 .read_with_attrs = subpage_read,
2048 .write_with_attrs = subpage_write,
2049 .impl.min_access_size = 1,
2050 .impl.max_access_size = 8,
2051 .valid.min_access_size = 1,
2052 .valid.max_access_size = 8,
2053 .valid.accepts = subpage_accepts,
2054 .endianness = DEVICE_NATIVE_ENDIAN,
2055 };
2056
2057 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2058 uint16_t section)
2059 {
2060 int idx, eidx;
2061
2062 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2063 return -1;
2064 idx = SUBPAGE_IDX(start);
2065 eidx = SUBPAGE_IDX(end);
2066 #if defined(DEBUG_SUBPAGE)
2067 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2068 __func__, mmio, start, end, idx, eidx, section);
2069 #endif
2070 for (; idx <= eidx; idx++) {
2071 mmio->sub_section[idx] = section;
2072 }
2073
2074 return 0;
2075 }
2076
2077 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2078 {
2079 subpage_t *mmio;
2080
2081 mmio = g_malloc0(sizeof(subpage_t));
2082
2083 mmio->as = as;
2084 mmio->base = base;
2085 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2086 NULL, TARGET_PAGE_SIZE);
2087 mmio->iomem.subpage = true;
2088 #if defined(DEBUG_SUBPAGE)
2089 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2090 mmio, base, TARGET_PAGE_SIZE);
2091 #endif
2092 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2093
2094 return mmio;
2095 }
2096
2097 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2098 MemoryRegion *mr)
2099 {
2100 assert(as);
2101 MemoryRegionSection section = {
2102 .address_space = as,
2103 .mr = mr,
2104 .offset_within_address_space = 0,
2105 .offset_within_region = 0,
2106 .size = int128_2_64(),
2107 };
2108
2109 return phys_section_add(map, &section);
2110 }
2111
2112 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2113 {
2114 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2115 MemoryRegionSection *sections = d->map.sections;
2116
2117 return sections[index & ~TARGET_PAGE_MASK].mr;
2118 }
2119
2120 static void io_mem_init(void)
2121 {
2122 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2123 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2124 NULL, UINT64_MAX);
2125 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2126 NULL, UINT64_MAX);
2127 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2128 NULL, UINT64_MAX);
2129 }
2130
2131 static void mem_begin(MemoryListener *listener)
2132 {
2133 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2134 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2135 uint16_t n;
2136
2137 n = dummy_section(&d->map, as, &io_mem_unassigned);
2138 assert(n == PHYS_SECTION_UNASSIGNED);
2139 n = dummy_section(&d->map, as, &io_mem_notdirty);
2140 assert(n == PHYS_SECTION_NOTDIRTY);
2141 n = dummy_section(&d->map, as, &io_mem_rom);
2142 assert(n == PHYS_SECTION_ROM);
2143 n = dummy_section(&d->map, as, &io_mem_watch);
2144 assert(n == PHYS_SECTION_WATCH);
2145
2146 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2147 d->as = as;
2148 as->next_dispatch = d;
2149 }
2150
2151 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2152 {
2153 phys_sections_free(&d->map);
2154 g_free(d);
2155 }
2156
2157 static void mem_commit(MemoryListener *listener)
2158 {
2159 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2160 AddressSpaceDispatch *cur = as->dispatch;
2161 AddressSpaceDispatch *next = as->next_dispatch;
2162
2163 phys_page_compact_all(next, next->map.nodes_nb);
2164
2165 atomic_rcu_set(&as->dispatch, next);
2166 if (cur) {
2167 call_rcu(cur, address_space_dispatch_free, rcu);
2168 }
2169 }
2170
2171 static void tcg_commit(MemoryListener *listener)
2172 {
2173 CPUState *cpu;
2174
2175 /* since each CPU stores ram addresses in its TLB cache, we must
2176 reset the modified entries */
2177 /* XXX: slow ! */
2178 CPU_FOREACH(cpu) {
2179 /* FIXME: Disentangle the cpu.h circular files deps so we can
2180 directly get the right CPU from listener. */
2181 if (cpu->tcg_as_listener != listener) {
2182 continue;
2183 }
2184 cpu_reload_memory_map(cpu);
2185 }
2186 }
2187
2188 void address_space_init_dispatch(AddressSpace *as)
2189 {
2190 as->dispatch = NULL;
2191 as->dispatch_listener = (MemoryListener) {
2192 .begin = mem_begin,
2193 .commit = mem_commit,
2194 .region_add = mem_add,
2195 .region_nop = mem_add,
2196 .priority = 0,
2197 };
2198 memory_listener_register(&as->dispatch_listener, as);
2199 }
2200
2201 void address_space_unregister(AddressSpace *as)
2202 {
2203 memory_listener_unregister(&as->dispatch_listener);
2204 }
2205
2206 void address_space_destroy_dispatch(AddressSpace *as)
2207 {
2208 AddressSpaceDispatch *d = as->dispatch;
2209
2210 atomic_rcu_set(&as->dispatch, NULL);
2211 if (d) {
2212 call_rcu(d, address_space_dispatch_free, rcu);
2213 }
2214 }
2215
2216 static void memory_map_init(void)
2217 {
2218 system_memory = g_malloc(sizeof(*system_memory));
2219
2220 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2221 address_space_init(&address_space_memory, system_memory, "memory");
2222
2223 system_io = g_malloc(sizeof(*system_io));
2224 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2225 65536);
2226 address_space_init(&address_space_io, system_io, "I/O");
2227 }
2228
2229 MemoryRegion *get_system_memory(void)
2230 {
2231 return system_memory;
2232 }
2233
2234 MemoryRegion *get_system_io(void)
2235 {
2236 return system_io;
2237 }
2238
2239 #endif /* !defined(CONFIG_USER_ONLY) */
2240
2241 /* physical memory access (slow version, mainly for debug) */
2242 #if defined(CONFIG_USER_ONLY)
2243 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2244 uint8_t *buf, int len, int is_write)
2245 {
2246 int l, flags;
2247 target_ulong page;
2248 void * p;
2249
2250 while (len > 0) {
2251 page = addr & TARGET_PAGE_MASK;
2252 l = (page + TARGET_PAGE_SIZE) - addr;
2253 if (l > len)
2254 l = len;
2255 flags = page_get_flags(page);
2256 if (!(flags & PAGE_VALID))
2257 return -1;
2258 if (is_write) {
2259 if (!(flags & PAGE_WRITE))
2260 return -1;
2261 /* XXX: this code should not depend on lock_user */
2262 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2263 return -1;
2264 memcpy(p, buf, l);
2265 unlock_user(p, addr, l);
2266 } else {
2267 if (!(flags & PAGE_READ))
2268 return -1;
2269 /* XXX: this code should not depend on lock_user */
2270 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2271 return -1;
2272 memcpy(buf, p, l);
2273 unlock_user(p, addr, 0);
2274 }
2275 len -= l;
2276 buf += l;
2277 addr += l;
2278 }
2279 return 0;
2280 }
2281
2282 #else
2283
2284 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2285 hwaddr length)
2286 {
2287 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2288 /* No early return if dirty_log_mask is or becomes 0, because
2289 * cpu_physical_memory_set_dirty_range will still call
2290 * xen_modified_memory.
2291 */
2292 if (dirty_log_mask) {
2293 dirty_log_mask =
2294 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2295 }
2296 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2297 tb_invalidate_phys_range(addr, addr + length);
2298 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2299 }
2300 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2301 }
2302
2303 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2304 {
2305 unsigned access_size_max = mr->ops->valid.max_access_size;
2306
2307 /* Regions are assumed to support 1-4 byte accesses unless
2308 otherwise specified. */
2309 if (access_size_max == 0) {
2310 access_size_max = 4;
2311 }
2312
2313 /* Bound the maximum access by the alignment of the address. */
2314 if (!mr->ops->impl.unaligned) {
2315 unsigned align_size_max = addr & -addr;
2316 if (align_size_max != 0 && align_size_max < access_size_max) {
2317 access_size_max = align_size_max;
2318 }
2319 }
2320
2321 /* Don't attempt accesses larger than the maximum. */
2322 if (l > access_size_max) {
2323 l = access_size_max;
2324 }
2325 if (l & (l - 1)) {
2326 l = 1 << (qemu_fls(l) - 1);
2327 }
2328
2329 return l;
2330 }
2331
2332 static bool prepare_mmio_access(MemoryRegion *mr)
2333 {
2334 bool unlocked = !qemu_mutex_iothread_locked();
2335 bool release_lock = false;
2336
2337 if (unlocked && mr->global_locking) {
2338 qemu_mutex_lock_iothread();
2339 unlocked = false;
2340 release_lock = true;
2341 }
2342 if (mr->flush_coalesced_mmio) {
2343 if (unlocked) {
2344 qemu_mutex_lock_iothread();
2345 }
2346 qemu_flush_coalesced_mmio_buffer();
2347 if (unlocked) {
2348 qemu_mutex_unlock_iothread();
2349 }
2350 }
2351
2352 return release_lock;
2353 }
2354
2355 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2356 uint8_t *buf, int len, bool is_write)
2357 {
2358 hwaddr l;
2359 uint8_t *ptr;
2360 uint64_t val;
2361 hwaddr addr1;
2362 MemoryRegion *mr;
2363 MemTxResult result = MEMTX_OK;
2364 bool release_lock = false;
2365
2366 rcu_read_lock();
2367 while (len > 0) {
2368 l = len;
2369 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2370
2371 if (is_write) {
2372 if (!memory_access_is_direct(mr, is_write)) {
2373 release_lock |= prepare_mmio_access(mr);
2374 l = memory_access_size(mr, l, addr1);
2375 /* XXX: could force current_cpu to NULL to avoid
2376 potential bugs */
2377 switch (l) {
2378 case 8:
2379 /* 64 bit write access */
2380 val = ldq_p(buf);
2381 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2382 attrs);
2383 break;
2384 case 4:
2385 /* 32 bit write access */
2386 val = ldl_p(buf);
2387 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2388 attrs);
2389 break;
2390 case 2:
2391 /* 16 bit write access */
2392 val = lduw_p(buf);
2393 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2394 attrs);
2395 break;
2396 case 1:
2397 /* 8 bit write access */
2398 val = ldub_p(buf);
2399 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2400 attrs);
2401 break;
2402 default:
2403 abort();
2404 }
2405 } else {
2406 addr1 += memory_region_get_ram_addr(mr);
2407 /* RAM case */
2408 ptr = qemu_get_ram_ptr(addr1);
2409 memcpy(ptr, buf, l);
2410 invalidate_and_set_dirty(mr, addr1, l);
2411 }
2412 } else {
2413 if (!memory_access_is_direct(mr, is_write)) {
2414 /* I/O case */
2415 release_lock |= prepare_mmio_access(mr);
2416 l = memory_access_size(mr, l, addr1);
2417 switch (l) {
2418 case 8:
2419 /* 64 bit read access */
2420 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2421 attrs);
2422 stq_p(buf, val);
2423 break;
2424 case 4:
2425 /* 32 bit read access */
2426 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2427 attrs);
2428 stl_p(buf, val);
2429 break;
2430 case 2:
2431 /* 16 bit read access */
2432 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2433 attrs);
2434 stw_p(buf, val);
2435 break;
2436 case 1:
2437 /* 8 bit read access */
2438 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2439 attrs);
2440 stb_p(buf, val);
2441 break;
2442 default:
2443 abort();
2444 }
2445 } else {
2446 /* RAM case */
2447 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2448 memcpy(buf, ptr, l);
2449 }
2450 }
2451
2452 if (release_lock) {
2453 qemu_mutex_unlock_iothread();
2454 release_lock = false;
2455 }
2456
2457 len -= l;
2458 buf += l;
2459 addr += l;
2460 }
2461 rcu_read_unlock();
2462
2463 return result;
2464 }
2465
2466 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2467 const uint8_t *buf, int len)
2468 {
2469 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2470 }
2471
2472 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2473 uint8_t *buf, int len)
2474 {
2475 return address_space_rw(as, addr, attrs, buf, len, false);
2476 }
2477
2478
2479 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2480 int len, int is_write)
2481 {
2482 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2483 buf, len, is_write);
2484 }
2485
2486 enum write_rom_type {
2487 WRITE_DATA,
2488 FLUSH_CACHE,
2489 };
2490
2491 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2492 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2493 {
2494 hwaddr l;
2495 uint8_t *ptr;
2496 hwaddr addr1;
2497 MemoryRegion *mr;
2498
2499 rcu_read_lock();
2500 while (len > 0) {
2501 l = len;
2502 mr = address_space_translate(as, addr, &addr1, &l, true);
2503
2504 if (!(memory_region_is_ram(mr) ||
2505 memory_region_is_romd(mr))) {
2506 l = memory_access_size(mr, l, addr1);
2507 } else {
2508 addr1 += memory_region_get_ram_addr(mr);
2509 /* ROM/RAM case */
2510 ptr = qemu_get_ram_ptr(addr1);
2511 switch (type) {
2512 case WRITE_DATA:
2513 memcpy(ptr, buf, l);
2514 invalidate_and_set_dirty(mr, addr1, l);
2515 break;
2516 case FLUSH_CACHE:
2517 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2518 break;
2519 }
2520 }
2521 len -= l;
2522 buf += l;
2523 addr += l;
2524 }
2525 rcu_read_unlock();
2526 }
2527
2528 /* used for ROM loading : can write in RAM and ROM */
2529 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2530 const uint8_t *buf, int len)
2531 {
2532 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2533 }
2534
2535 void cpu_flush_icache_range(hwaddr start, int len)
2536 {
2537 /*
2538 * This function should do the same thing as an icache flush that was
2539 * triggered from within the guest. For TCG we are always cache coherent,
2540 * so there is no need to flush anything. For KVM / Xen we need to flush
2541 * the host's instruction cache at least.
2542 */
2543 if (tcg_enabled()) {
2544 return;
2545 }
2546
2547 cpu_physical_memory_write_rom_internal(&address_space_memory,
2548 start, NULL, len, FLUSH_CACHE);
2549 }
2550
2551 typedef struct {
2552 MemoryRegion *mr;
2553 void *buffer;
2554 hwaddr addr;
2555 hwaddr len;
2556 bool in_use;
2557 } BounceBuffer;
2558
2559 static BounceBuffer bounce;
2560
2561 typedef struct MapClient {
2562 QEMUBH *bh;
2563 QLIST_ENTRY(MapClient) link;
2564 } MapClient;
2565
2566 QemuMutex map_client_list_lock;
2567 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2568 = QLIST_HEAD_INITIALIZER(map_client_list);
2569
2570 static void cpu_unregister_map_client_do(MapClient *client)
2571 {
2572 QLIST_REMOVE(client, link);
2573 g_free(client);
2574 }
2575
2576 static void cpu_notify_map_clients_locked(void)
2577 {
2578 MapClient *client;
2579
2580 while (!QLIST_EMPTY(&map_client_list)) {
2581 client = QLIST_FIRST(&map_client_list);
2582 qemu_bh_schedule(client->bh);
2583 cpu_unregister_map_client_do(client);
2584 }
2585 }
2586
2587 void cpu_register_map_client(QEMUBH *bh)
2588 {
2589 MapClient *client = g_malloc(sizeof(*client));
2590
2591 qemu_mutex_lock(&map_client_list_lock);
2592 client->bh = bh;
2593 QLIST_INSERT_HEAD(&map_client_list, client, link);
2594 if (!atomic_read(&bounce.in_use)) {
2595 cpu_notify_map_clients_locked();
2596 }
2597 qemu_mutex_unlock(&map_client_list_lock);
2598 }
2599
2600 void cpu_exec_init_all(void)
2601 {
2602 qemu_mutex_init(&ram_list.mutex);
2603 memory_map_init();
2604 io_mem_init();
2605 qemu_mutex_init(&map_client_list_lock);
2606 }
2607
2608 void cpu_unregister_map_client(QEMUBH *bh)
2609 {
2610 MapClient *client;
2611
2612 qemu_mutex_lock(&map_client_list_lock);
2613 QLIST_FOREACH(client, &map_client_list, link) {
2614 if (client->bh == bh) {
2615 cpu_unregister_map_client_do(client);
2616 break;
2617 }
2618 }
2619 qemu_mutex_unlock(&map_client_list_lock);
2620 }
2621
2622 static void cpu_notify_map_clients(void)
2623 {
2624 qemu_mutex_lock(&map_client_list_lock);
2625 cpu_notify_map_clients_locked();
2626 qemu_mutex_unlock(&map_client_list_lock);
2627 }
2628
2629 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2630 {
2631 MemoryRegion *mr;
2632 hwaddr l, xlat;
2633
2634 rcu_read_lock();
2635 while (len > 0) {
2636 l = len;
2637 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2638 if (!memory_access_is_direct(mr, is_write)) {
2639 l = memory_access_size(mr, l, addr);
2640 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2641 return false;
2642 }
2643 }
2644
2645 len -= l;
2646 addr += l;
2647 }
2648 rcu_read_unlock();
2649 return true;
2650 }
2651
2652 /* Map a physical memory region into a host virtual address.
2653 * May map a subset of the requested range, given by and returned in *plen.
2654 * May return NULL if resources needed to perform the mapping are exhausted.
2655 * Use only for reads OR writes - not for read-modify-write operations.
2656 * Use cpu_register_map_client() to know when retrying the map operation is
2657 * likely to succeed.
2658 */
2659 void *address_space_map(AddressSpace *as,
2660 hwaddr addr,
2661 hwaddr *plen,
2662 bool is_write)
2663 {
2664 hwaddr len = *plen;
2665 hwaddr done = 0;
2666 hwaddr l, xlat, base;
2667 MemoryRegion *mr, *this_mr;
2668 ram_addr_t raddr;
2669
2670 if (len == 0) {
2671 return NULL;
2672 }
2673
2674 l = len;
2675 rcu_read_lock();
2676 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2677
2678 if (!memory_access_is_direct(mr, is_write)) {
2679 if (atomic_xchg(&bounce.in_use, true)) {
2680 rcu_read_unlock();
2681 return NULL;
2682 }
2683 /* Avoid unbounded allocations */
2684 l = MIN(l, TARGET_PAGE_SIZE);
2685 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2686 bounce.addr = addr;
2687 bounce.len = l;
2688
2689 memory_region_ref(mr);
2690 bounce.mr = mr;
2691 if (!is_write) {
2692 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2693 bounce.buffer, l);
2694 }
2695
2696 rcu_read_unlock();
2697 *plen = l;
2698 return bounce.buffer;
2699 }
2700
2701 base = xlat;
2702 raddr = memory_region_get_ram_addr(mr);
2703
2704 for (;;) {
2705 len -= l;
2706 addr += l;
2707 done += l;
2708 if (len == 0) {
2709 break;
2710 }
2711
2712 l = len;
2713 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2714 if (this_mr != mr || xlat != base + done) {
2715 break;
2716 }
2717 }
2718
2719 memory_region_ref(mr);
2720 rcu_read_unlock();
2721 *plen = done;
2722 return qemu_ram_ptr_length(raddr + base, plen);
2723 }
2724
2725 /* Unmaps a memory region previously mapped by address_space_map().
2726 * Will also mark the memory as dirty if is_write == 1. access_len gives
2727 * the amount of memory that was actually read or written by the caller.
2728 */
2729 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2730 int is_write, hwaddr access_len)
2731 {
2732 if (buffer != bounce.buffer) {
2733 MemoryRegion *mr;
2734 ram_addr_t addr1;
2735
2736 mr = qemu_ram_addr_from_host(buffer, &addr1);
2737 assert(mr != NULL);
2738 if (is_write) {
2739 invalidate_and_set_dirty(mr, addr1, access_len);
2740 }
2741 if (xen_enabled()) {
2742 xen_invalidate_map_cache_entry(buffer);
2743 }
2744 memory_region_unref(mr);
2745 return;
2746 }
2747 if (is_write) {
2748 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2749 bounce.buffer, access_len);
2750 }
2751 qemu_vfree(bounce.buffer);
2752 bounce.buffer = NULL;
2753 memory_region_unref(bounce.mr);
2754 atomic_mb_set(&bounce.in_use, false);
2755 cpu_notify_map_clients();
2756 }
2757
2758 void *cpu_physical_memory_map(hwaddr addr,
2759 hwaddr *plen,
2760 int is_write)
2761 {
2762 return address_space_map(&address_space_memory, addr, plen, is_write);
2763 }
2764
2765 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2766 int is_write, hwaddr access_len)
2767 {
2768 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2769 }
2770
2771 /* warning: addr must be aligned */
2772 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2773 MemTxAttrs attrs,
2774 MemTxResult *result,
2775 enum device_endian endian)
2776 {
2777 uint8_t *ptr;
2778 uint64_t val;
2779 MemoryRegion *mr;
2780 hwaddr l = 4;
2781 hwaddr addr1;
2782 MemTxResult r;
2783 bool release_lock = false;
2784
2785 rcu_read_lock();
2786 mr = address_space_translate(as, addr, &addr1, &l, false);
2787 if (l < 4 || !memory_access_is_direct(mr, false)) {
2788 release_lock |= prepare_mmio_access(mr);
2789
2790 /* I/O case */
2791 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2792 #if defined(TARGET_WORDS_BIGENDIAN)
2793 if (endian == DEVICE_LITTLE_ENDIAN) {
2794 val = bswap32(val);
2795 }
2796 #else
2797 if (endian == DEVICE_BIG_ENDIAN) {
2798 val = bswap32(val);
2799 }
2800 #endif
2801 } else {
2802 /* RAM case */
2803 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2804 & TARGET_PAGE_MASK)
2805 + addr1);
2806 switch (endian) {
2807 case DEVICE_LITTLE_ENDIAN:
2808 val = ldl_le_p(ptr);
2809 break;
2810 case DEVICE_BIG_ENDIAN:
2811 val = ldl_be_p(ptr);
2812 break;
2813 default:
2814 val = ldl_p(ptr);
2815 break;
2816 }
2817 r = MEMTX_OK;
2818 }
2819 if (result) {
2820 *result = r;
2821 }
2822 if (release_lock) {
2823 qemu_mutex_unlock_iothread();
2824 }
2825 rcu_read_unlock();
2826 return val;
2827 }
2828
2829 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2830 MemTxAttrs attrs, MemTxResult *result)
2831 {
2832 return address_space_ldl_internal(as, addr, attrs, result,
2833 DEVICE_NATIVE_ENDIAN);
2834 }
2835
2836 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2837 MemTxAttrs attrs, MemTxResult *result)
2838 {
2839 return address_space_ldl_internal(as, addr, attrs, result,
2840 DEVICE_LITTLE_ENDIAN);
2841 }
2842
2843 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2844 MemTxAttrs attrs, MemTxResult *result)
2845 {
2846 return address_space_ldl_internal(as, addr, attrs, result,
2847 DEVICE_BIG_ENDIAN);
2848 }
2849
2850 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2851 {
2852 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2853 }
2854
2855 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2856 {
2857 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2858 }
2859
2860 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2861 {
2862 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2863 }
2864
2865 /* warning: addr must be aligned */
2866 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2867 MemTxAttrs attrs,
2868 MemTxResult *result,
2869 enum device_endian endian)
2870 {
2871 uint8_t *ptr;
2872 uint64_t val;
2873 MemoryRegion *mr;
2874 hwaddr l = 8;
2875 hwaddr addr1;
2876 MemTxResult r;
2877 bool release_lock = false;
2878
2879 rcu_read_lock();
2880 mr = address_space_translate(as, addr, &addr1, &l,
2881 false);
2882 if (l < 8 || !memory_access_is_direct(mr, false)) {
2883 release_lock |= prepare_mmio_access(mr);
2884
2885 /* I/O case */
2886 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2887 #if defined(TARGET_WORDS_BIGENDIAN)
2888 if (endian == DEVICE_LITTLE_ENDIAN) {
2889 val = bswap64(val);
2890 }
2891 #else
2892 if (endian == DEVICE_BIG_ENDIAN) {
2893 val = bswap64(val);
2894 }
2895 #endif
2896 } else {
2897 /* RAM case */
2898 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2899 & TARGET_PAGE_MASK)
2900 + addr1);
2901 switch (endian) {
2902 case DEVICE_LITTLE_ENDIAN:
2903 val = ldq_le_p(ptr);
2904 break;
2905 case DEVICE_BIG_ENDIAN:
2906 val = ldq_be_p(ptr);
2907 break;
2908 default:
2909 val = ldq_p(ptr);
2910 break;
2911 }
2912 r = MEMTX_OK;
2913 }
2914 if (result) {
2915 *result = r;
2916 }
2917 if (release_lock) {
2918 qemu_mutex_unlock_iothread();
2919 }
2920 rcu_read_unlock();
2921 return val;
2922 }
2923
2924 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2925 MemTxAttrs attrs, MemTxResult *result)
2926 {
2927 return address_space_ldq_internal(as, addr, attrs, result,
2928 DEVICE_NATIVE_ENDIAN);
2929 }
2930
2931 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2932 MemTxAttrs attrs, MemTxResult *result)
2933 {
2934 return address_space_ldq_internal(as, addr, attrs, result,
2935 DEVICE_LITTLE_ENDIAN);
2936 }
2937
2938 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2939 MemTxAttrs attrs, MemTxResult *result)
2940 {
2941 return address_space_ldq_internal(as, addr, attrs, result,
2942 DEVICE_BIG_ENDIAN);
2943 }
2944
2945 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2946 {
2947 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2948 }
2949
2950 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2951 {
2952 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2953 }
2954
2955 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2956 {
2957 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2958 }
2959
2960 /* XXX: optimize */
2961 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2962 MemTxAttrs attrs, MemTxResult *result)
2963 {
2964 uint8_t val;
2965 MemTxResult r;
2966
2967 r = address_space_rw(as, addr, attrs, &val, 1, 0);
2968 if (result) {
2969 *result = r;
2970 }
2971 return val;
2972 }
2973
2974 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2975 {
2976 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2977 }
2978
2979 /* warning: addr must be aligned */
2980 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2981 hwaddr addr,
2982 MemTxAttrs attrs,
2983 MemTxResult *result,
2984 enum device_endian endian)
2985 {
2986 uint8_t *ptr;
2987 uint64_t val;
2988 MemoryRegion *mr;
2989 hwaddr l = 2;
2990 hwaddr addr1;
2991 MemTxResult r;
2992 bool release_lock = false;
2993
2994 rcu_read_lock();
2995 mr = address_space_translate(as, addr, &addr1, &l,
2996 false);
2997 if (l < 2 || !memory_access_is_direct(mr, false)) {
2998 release_lock |= prepare_mmio_access(mr);
2999
3000 /* I/O case */
3001 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3002 #if defined(TARGET_WORDS_BIGENDIAN)
3003 if (endian == DEVICE_LITTLE_ENDIAN) {
3004 val = bswap16(val);
3005 }
3006 #else
3007 if (endian == DEVICE_BIG_ENDIAN) {
3008 val = bswap16(val);
3009 }
3010 #endif
3011 } else {
3012 /* RAM case */
3013 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3014 & TARGET_PAGE_MASK)
3015 + addr1);
3016 switch (endian) {
3017 case DEVICE_LITTLE_ENDIAN:
3018 val = lduw_le_p(ptr);
3019 break;
3020 case DEVICE_BIG_ENDIAN:
3021 val = lduw_be_p(ptr);
3022 break;
3023 default:
3024 val = lduw_p(ptr);
3025 break;
3026 }
3027 r = MEMTX_OK;
3028 }
3029 if (result) {
3030 *result = r;
3031 }
3032 if (release_lock) {
3033 qemu_mutex_unlock_iothread();
3034 }
3035 rcu_read_unlock();
3036 return val;
3037 }
3038
3039 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3040 MemTxAttrs attrs, MemTxResult *result)
3041 {
3042 return address_space_lduw_internal(as, addr, attrs, result,
3043 DEVICE_NATIVE_ENDIAN);
3044 }
3045
3046 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3047 MemTxAttrs attrs, MemTxResult *result)
3048 {
3049 return address_space_lduw_internal(as, addr, attrs, result,
3050 DEVICE_LITTLE_ENDIAN);
3051 }
3052
3053 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3054 MemTxAttrs attrs, MemTxResult *result)
3055 {
3056 return address_space_lduw_internal(as, addr, attrs, result,
3057 DEVICE_BIG_ENDIAN);
3058 }
3059
3060 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3061 {
3062 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3063 }
3064
3065 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3066 {
3067 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3068 }
3069
3070 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3071 {
3072 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3073 }
3074
3075 /* warning: addr must be aligned. The ram page is not masked as dirty
3076 and the code inside is not invalidated. It is useful if the dirty
3077 bits are used to track modified PTEs */
3078 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3079 MemTxAttrs attrs, MemTxResult *result)
3080 {
3081 uint8_t *ptr;
3082 MemoryRegion *mr;
3083 hwaddr l = 4;
3084 hwaddr addr1;
3085 MemTxResult r;
3086 uint8_t dirty_log_mask;
3087 bool release_lock = false;
3088
3089 rcu_read_lock();
3090 mr = address_space_translate(as, addr, &addr1, &l,
3091 true);
3092 if (l < 4 || !memory_access_is_direct(mr, true)) {
3093 release_lock |= prepare_mmio_access(mr);
3094
3095 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3096 } else {
3097 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3098 ptr = qemu_get_ram_ptr(addr1);
3099 stl_p(ptr, val);
3100
3101 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3102 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3103 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3104 r = MEMTX_OK;
3105 }
3106 if (result) {
3107 *result = r;
3108 }
3109 if (release_lock) {
3110 qemu_mutex_unlock_iothread();
3111 }
3112 rcu_read_unlock();
3113 }
3114
3115 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3116 {
3117 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3118 }
3119
3120 /* warning: addr must be aligned */
3121 static inline void address_space_stl_internal(AddressSpace *as,
3122 hwaddr addr, uint32_t val,
3123 MemTxAttrs attrs,
3124 MemTxResult *result,
3125 enum device_endian endian)
3126 {
3127 uint8_t *ptr;
3128 MemoryRegion *mr;
3129 hwaddr l = 4;
3130 hwaddr addr1;
3131 MemTxResult r;
3132 bool release_lock = false;
3133
3134 rcu_read_lock();
3135 mr = address_space_translate(as, addr, &addr1, &l,
3136 true);
3137 if (l < 4 || !memory_access_is_direct(mr, true)) {
3138 release_lock |= prepare_mmio_access(mr);
3139
3140 #if defined(TARGET_WORDS_BIGENDIAN)
3141 if (endian == DEVICE_LITTLE_ENDIAN) {
3142 val = bswap32(val);
3143 }
3144 #else
3145 if (endian == DEVICE_BIG_ENDIAN) {
3146 val = bswap32(val);
3147 }
3148 #endif
3149 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3150 } else {
3151 /* RAM case */
3152 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3153 ptr = qemu_get_ram_ptr(addr1);
3154 switch (endian) {
3155 case DEVICE_LITTLE_ENDIAN:
3156 stl_le_p(ptr, val);
3157 break;
3158 case DEVICE_BIG_ENDIAN:
3159 stl_be_p(ptr, val);
3160 break;
3161 default:
3162 stl_p(ptr, val);
3163 break;
3164 }
3165 invalidate_and_set_dirty(mr, addr1, 4);
3166 r = MEMTX_OK;
3167 }
3168 if (result) {
3169 *result = r;
3170 }
3171 if (release_lock) {
3172 qemu_mutex_unlock_iothread();
3173 }
3174 rcu_read_unlock();
3175 }
3176
3177 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3178 MemTxAttrs attrs, MemTxResult *result)
3179 {
3180 address_space_stl_internal(as, addr, val, attrs, result,
3181 DEVICE_NATIVE_ENDIAN);
3182 }
3183
3184 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3185 MemTxAttrs attrs, MemTxResult *result)
3186 {
3187 address_space_stl_internal(as, addr, val, attrs, result,
3188 DEVICE_LITTLE_ENDIAN);
3189 }
3190
3191 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3192 MemTxAttrs attrs, MemTxResult *result)
3193 {
3194 address_space_stl_internal(as, addr, val, attrs, result,
3195 DEVICE_BIG_ENDIAN);
3196 }
3197
3198 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3199 {
3200 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3201 }
3202
3203 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3204 {
3205 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3206 }
3207
3208 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3209 {
3210 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3211 }
3212
3213 /* XXX: optimize */
3214 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3215 MemTxAttrs attrs, MemTxResult *result)
3216 {
3217 uint8_t v = val;
3218 MemTxResult r;
3219
3220 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3221 if (result) {
3222 *result = r;
3223 }
3224 }
3225
3226 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3227 {
3228 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3229 }
3230
3231 /* warning: addr must be aligned */
3232 static inline void address_space_stw_internal(AddressSpace *as,
3233 hwaddr addr, uint32_t val,
3234 MemTxAttrs attrs,
3235 MemTxResult *result,
3236 enum device_endian endian)
3237 {
3238 uint8_t *ptr;
3239 MemoryRegion *mr;
3240 hwaddr l = 2;
3241 hwaddr addr1;
3242 MemTxResult r;
3243 bool release_lock = false;
3244
3245 rcu_read_lock();
3246 mr = address_space_translate(as, addr, &addr1, &l, true);
3247 if (l < 2 || !memory_access_is_direct(mr, true)) {
3248 release_lock |= prepare_mmio_access(mr);
3249
3250 #if defined(TARGET_WORDS_BIGENDIAN)
3251 if (endian == DEVICE_LITTLE_ENDIAN) {
3252 val = bswap16(val);
3253 }
3254 #else
3255 if (endian == DEVICE_BIG_ENDIAN) {
3256 val = bswap16(val);
3257 }
3258 #endif
3259 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3260 } else {
3261 /* RAM case */
3262 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3263 ptr = qemu_get_ram_ptr(addr1);
3264 switch (endian) {
3265 case DEVICE_LITTLE_ENDIAN:
3266 stw_le_p(ptr, val);
3267 break;
3268 case DEVICE_BIG_ENDIAN:
3269 stw_be_p(ptr, val);
3270 break;
3271 default:
3272 stw_p(ptr, val);
3273 break;
3274 }
3275 invalidate_and_set_dirty(mr, addr1, 2);
3276 r = MEMTX_OK;
3277 }
3278 if (result) {
3279 *result = r;
3280 }
3281 if (release_lock) {
3282 qemu_mutex_unlock_iothread();
3283 }
3284 rcu_read_unlock();
3285 }
3286
3287 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3288 MemTxAttrs attrs, MemTxResult *result)
3289 {
3290 address_space_stw_internal(as, addr, val, attrs, result,
3291 DEVICE_NATIVE_ENDIAN);
3292 }
3293
3294 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3295 MemTxAttrs attrs, MemTxResult *result)
3296 {
3297 address_space_stw_internal(as, addr, val, attrs, result,
3298 DEVICE_LITTLE_ENDIAN);
3299 }
3300
3301 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3302 MemTxAttrs attrs, MemTxResult *result)
3303 {
3304 address_space_stw_internal(as, addr, val, attrs, result,
3305 DEVICE_BIG_ENDIAN);
3306 }
3307
3308 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3309 {
3310 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3311 }
3312
3313 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3314 {
3315 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3316 }
3317
3318 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3319 {
3320 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3321 }
3322
3323 /* XXX: optimize */
3324 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3325 MemTxAttrs attrs, MemTxResult *result)
3326 {
3327 MemTxResult r;
3328 val = tswap64(val);
3329 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3330 if (result) {
3331 *result = r;
3332 }
3333 }
3334
3335 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3336 MemTxAttrs attrs, MemTxResult *result)
3337 {
3338 MemTxResult r;
3339 val = cpu_to_le64(val);
3340 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3341 if (result) {
3342 *result = r;
3343 }
3344 }
3345 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3346 MemTxAttrs attrs, MemTxResult *result)
3347 {
3348 MemTxResult r;
3349 val = cpu_to_be64(val);
3350 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3351 if (result) {
3352 *result = r;
3353 }
3354 }
3355
3356 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3357 {
3358 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3359 }
3360
3361 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3362 {
3363 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3364 }
3365
3366 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3367 {
3368 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3369 }
3370
3371 /* virtual memory access for debug (includes writing to ROM) */
3372 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3373 uint8_t *buf, int len, int is_write)
3374 {
3375 int l;
3376 hwaddr phys_addr;
3377 target_ulong page;
3378
3379 while (len > 0) {
3380 page = addr & TARGET_PAGE_MASK;
3381 phys_addr = cpu_get_phys_page_debug(cpu, page);
3382 /* if no physical page mapped, return an error */
3383 if (phys_addr == -1)
3384 return -1;
3385 l = (page + TARGET_PAGE_SIZE) - addr;
3386 if (l > len)
3387 l = len;
3388 phys_addr += (addr & ~TARGET_PAGE_MASK);
3389 if (is_write) {
3390 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3391 } else {
3392 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3393 buf, l, 0);
3394 }
3395 len -= l;
3396 buf += l;
3397 addr += l;
3398 }
3399 return 0;
3400 }
3401 #endif
3402
3403 /*
3404 * A helper function for the _utterly broken_ virtio device model to find out if
3405 * it's running on a big endian machine. Don't do this at home kids!
3406 */
3407 bool target_words_bigendian(void);
3408 bool target_words_bigendian(void)
3409 {
3410 #if defined(TARGET_WORDS_BIGENDIAN)
3411 return true;
3412 #else
3413 return false;
3414 #endif
3415 }
3416
3417 #ifndef CONFIG_USER_ONLY
3418 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3419 {
3420 MemoryRegion*mr;
3421 hwaddr l = 1;
3422 bool res;
3423
3424 rcu_read_lock();
3425 mr = address_space_translate(&address_space_memory,
3426 phys_addr, &phys_addr, &l, false);
3427
3428 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3429 rcu_read_unlock();
3430 return res;
3431 }
3432
3433 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3434 {
3435 RAMBlock *block;
3436 int ret = 0;
3437
3438 rcu_read_lock();
3439 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3440 ret = func(block->idstr, block->host, block->offset,
3441 block->used_length, opaque);
3442 if (ret) {
3443 break;
3444 }
3445 }
3446 rcu_read_unlock();
3447 return ret;
3448 }
3449 #endif