]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
cpu: No need to zero-initialize CPUState::numa_node
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "exec/cputlb.h"
53 #include "translate-all.h"
54
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57
58 #include "qemu/range.h"
59
60 //#define DEBUG_SUBPAGE
61
62 #if !defined(CONFIG_USER_ONLY)
63 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
64 * are protected by the ramlist lock.
65 */
66 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
67
68 static MemoryRegion *system_memory;
69 static MemoryRegion *system_io;
70
71 AddressSpace address_space_io;
72 AddressSpace address_space_memory;
73
74 MemoryRegion io_mem_rom, io_mem_notdirty;
75 static MemoryRegion io_mem_unassigned;
76
77 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
78 #define RAM_PREALLOC (1 << 0)
79
80 /* RAM is mmap-ed with MAP_SHARED */
81 #define RAM_SHARED (1 << 1)
82
83 /* Only a portion of RAM (used_length) is actually used, and migrated.
84 * This used_length size can change across reboots.
85 */
86 #define RAM_RESIZEABLE (1 << 2)
87
88 #endif
89
90 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
91 /* current CPU in the current thread. It is only valid inside
92 cpu_exec() */
93 DEFINE_TLS(CPUState *, current_cpu);
94 /* 0 = Do not count executed instructions.
95 1 = Precise instruction counting.
96 2 = Adaptive rate instruction counting. */
97 int use_icount;
98
99 #if !defined(CONFIG_USER_ONLY)
100
101 typedef struct PhysPageEntry PhysPageEntry;
102
103 struct PhysPageEntry {
104 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
105 uint32_t skip : 6;
106 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
107 uint32_t ptr : 26;
108 };
109
110 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
111
112 /* Size of the L2 (and L3, etc) page tables. */
113 #define ADDR_SPACE_BITS 64
114
115 #define P_L2_BITS 9
116 #define P_L2_SIZE (1 << P_L2_BITS)
117
118 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
119
120 typedef PhysPageEntry Node[P_L2_SIZE];
121
122 typedef struct PhysPageMap {
123 struct rcu_head rcu;
124
125 unsigned sections_nb;
126 unsigned sections_nb_alloc;
127 unsigned nodes_nb;
128 unsigned nodes_nb_alloc;
129 Node *nodes;
130 MemoryRegionSection *sections;
131 } PhysPageMap;
132
133 struct AddressSpaceDispatch {
134 struct rcu_head rcu;
135
136 /* This is a multi-level map on the physical address space.
137 * The bottom level has pointers to MemoryRegionSections.
138 */
139 PhysPageEntry phys_map;
140 PhysPageMap map;
141 AddressSpace *as;
142 };
143
144 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
145 typedef struct subpage_t {
146 MemoryRegion iomem;
147 AddressSpace *as;
148 hwaddr base;
149 uint16_t sub_section[TARGET_PAGE_SIZE];
150 } subpage_t;
151
152 #define PHYS_SECTION_UNASSIGNED 0
153 #define PHYS_SECTION_NOTDIRTY 1
154 #define PHYS_SECTION_ROM 2
155 #define PHYS_SECTION_WATCH 3
156
157 static void io_mem_init(void);
158 static void memory_map_init(void);
159 static void tcg_commit(MemoryListener *listener);
160
161 static MemoryRegion io_mem_watch;
162 #endif
163
164 #if !defined(CONFIG_USER_ONLY)
165
166 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
167 {
168 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
169 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
171 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
172 }
173 }
174
175 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
176 {
177 unsigned i;
178 uint32_t ret;
179 PhysPageEntry e;
180 PhysPageEntry *p;
181
182 ret = map->nodes_nb++;
183 p = map->nodes[ret];
184 assert(ret != PHYS_MAP_NODE_NIL);
185 assert(ret != map->nodes_nb_alloc);
186
187 e.skip = leaf ? 0 : 1;
188 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
189 for (i = 0; i < P_L2_SIZE; ++i) {
190 memcpy(&p[i], &e, sizeof(e));
191 }
192 return ret;
193 }
194
195 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
196 hwaddr *index, hwaddr *nb, uint16_t leaf,
197 int level)
198 {
199 PhysPageEntry *p;
200 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
201
202 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
203 lp->ptr = phys_map_node_alloc(map, level == 0);
204 }
205 p = map->nodes[lp->ptr];
206 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
207
208 while (*nb && lp < &p[P_L2_SIZE]) {
209 if ((*index & (step - 1)) == 0 && *nb >= step) {
210 lp->skip = 0;
211 lp->ptr = leaf;
212 *index += step;
213 *nb -= step;
214 } else {
215 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
216 }
217 ++lp;
218 }
219 }
220
221 static void phys_page_set(AddressSpaceDispatch *d,
222 hwaddr index, hwaddr nb,
223 uint16_t leaf)
224 {
225 /* Wildly overreserve - it doesn't matter much. */
226 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
227
228 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
229 }
230
231 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
232 * and update our entry so we can skip it and go directly to the destination.
233 */
234 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
235 {
236 unsigned valid_ptr = P_L2_SIZE;
237 int valid = 0;
238 PhysPageEntry *p;
239 int i;
240
241 if (lp->ptr == PHYS_MAP_NODE_NIL) {
242 return;
243 }
244
245 p = nodes[lp->ptr];
246 for (i = 0; i < P_L2_SIZE; i++) {
247 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
248 continue;
249 }
250
251 valid_ptr = i;
252 valid++;
253 if (p[i].skip) {
254 phys_page_compact(&p[i], nodes, compacted);
255 }
256 }
257
258 /* We can only compress if there's only one child. */
259 if (valid != 1) {
260 return;
261 }
262
263 assert(valid_ptr < P_L2_SIZE);
264
265 /* Don't compress if it won't fit in the # of bits we have. */
266 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
267 return;
268 }
269
270 lp->ptr = p[valid_ptr].ptr;
271 if (!p[valid_ptr].skip) {
272 /* If our only child is a leaf, make this a leaf. */
273 /* By design, we should have made this node a leaf to begin with so we
274 * should never reach here.
275 * But since it's so simple to handle this, let's do it just in case we
276 * change this rule.
277 */
278 lp->skip = 0;
279 } else {
280 lp->skip += p[valid_ptr].skip;
281 }
282 }
283
284 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
285 {
286 DECLARE_BITMAP(compacted, nodes_nb);
287
288 if (d->phys_map.skip) {
289 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
290 }
291 }
292
293 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
294 Node *nodes, MemoryRegionSection *sections)
295 {
296 PhysPageEntry *p;
297 hwaddr index = addr >> TARGET_PAGE_BITS;
298 int i;
299
300 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
301 if (lp.ptr == PHYS_MAP_NODE_NIL) {
302 return &sections[PHYS_SECTION_UNASSIGNED];
303 }
304 p = nodes[lp.ptr];
305 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
306 }
307
308 if (sections[lp.ptr].size.hi ||
309 range_covers_byte(sections[lp.ptr].offset_within_address_space,
310 sections[lp.ptr].size.lo, addr)) {
311 return &sections[lp.ptr];
312 } else {
313 return &sections[PHYS_SECTION_UNASSIGNED];
314 }
315 }
316
317 bool memory_region_is_unassigned(MemoryRegion *mr)
318 {
319 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
320 && mr != &io_mem_watch;
321 }
322
323 /* Called from RCU critical section */
324 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
325 hwaddr addr,
326 bool resolve_subpage)
327 {
328 MemoryRegionSection *section;
329 subpage_t *subpage;
330
331 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
332 if (resolve_subpage && section->mr->subpage) {
333 subpage = container_of(section->mr, subpage_t, iomem);
334 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
335 }
336 return section;
337 }
338
339 /* Called from RCU critical section */
340 static MemoryRegionSection *
341 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
342 hwaddr *plen, bool resolve_subpage)
343 {
344 MemoryRegionSection *section;
345 MemoryRegion *mr;
346 Int128 diff;
347
348 section = address_space_lookup_region(d, addr, resolve_subpage);
349 /* Compute offset within MemoryRegionSection */
350 addr -= section->offset_within_address_space;
351
352 /* Compute offset within MemoryRegion */
353 *xlat = addr + section->offset_within_region;
354
355 mr = section->mr;
356
357 /* MMIO registers can be expected to perform full-width accesses based only
358 * on their address, without considering adjacent registers that could
359 * decode to completely different MemoryRegions. When such registers
360 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
361 * regions overlap wildly. For this reason we cannot clamp the accesses
362 * here.
363 *
364 * If the length is small (as is the case for address_space_ldl/stl),
365 * everything works fine. If the incoming length is large, however,
366 * the caller really has to do the clamping through memory_access_size.
367 */
368 if (memory_region_is_ram(mr)) {
369 diff = int128_sub(section->size, int128_make64(addr));
370 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
371 }
372 return section;
373 }
374
375 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
376 {
377 if (memory_region_is_ram(mr)) {
378 return !(is_write && mr->readonly);
379 }
380 if (memory_region_is_romd(mr)) {
381 return !is_write;
382 }
383
384 return false;
385 }
386
387 /* Called from RCU critical section */
388 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
389 hwaddr *xlat, hwaddr *plen,
390 bool is_write)
391 {
392 IOMMUTLBEntry iotlb;
393 MemoryRegionSection *section;
394 MemoryRegion *mr;
395
396 for (;;) {
397 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
398 section = address_space_translate_internal(d, addr, &addr, plen, true);
399 mr = section->mr;
400
401 if (!mr->iommu_ops) {
402 break;
403 }
404
405 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
406 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
407 | (addr & iotlb.addr_mask));
408 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
409 if (!(iotlb.perm & (1 << is_write))) {
410 mr = &io_mem_unassigned;
411 break;
412 }
413
414 as = iotlb.target_as;
415 }
416
417 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
418 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
419 *plen = MIN(page, *plen);
420 }
421
422 *xlat = addr;
423 return mr;
424 }
425
426 /* Called from RCU critical section */
427 MemoryRegionSection *
428 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
429 hwaddr *xlat, hwaddr *plen)
430 {
431 MemoryRegionSection *section;
432 section = address_space_translate_internal(cpu->memory_dispatch,
433 addr, xlat, plen, false);
434
435 assert(!section->mr->iommu_ops);
436 return section;
437 }
438 #endif
439
440 #if !defined(CONFIG_USER_ONLY)
441
442 static int cpu_common_post_load(void *opaque, int version_id)
443 {
444 CPUState *cpu = opaque;
445
446 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
447 version_id is increased. */
448 cpu->interrupt_request &= ~0x01;
449 tlb_flush(cpu, 1);
450
451 return 0;
452 }
453
454 static int cpu_common_pre_load(void *opaque)
455 {
456 CPUState *cpu = opaque;
457
458 cpu->exception_index = -1;
459
460 return 0;
461 }
462
463 static bool cpu_common_exception_index_needed(void *opaque)
464 {
465 CPUState *cpu = opaque;
466
467 return tcg_enabled() && cpu->exception_index != -1;
468 }
469
470 static const VMStateDescription vmstate_cpu_common_exception_index = {
471 .name = "cpu_common/exception_index",
472 .version_id = 1,
473 .minimum_version_id = 1,
474 .needed = cpu_common_exception_index_needed,
475 .fields = (VMStateField[]) {
476 VMSTATE_INT32(exception_index, CPUState),
477 VMSTATE_END_OF_LIST()
478 }
479 };
480
481 const VMStateDescription vmstate_cpu_common = {
482 .name = "cpu_common",
483 .version_id = 1,
484 .minimum_version_id = 1,
485 .pre_load = cpu_common_pre_load,
486 .post_load = cpu_common_post_load,
487 .fields = (VMStateField[]) {
488 VMSTATE_UINT32(halted, CPUState),
489 VMSTATE_UINT32(interrupt_request, CPUState),
490 VMSTATE_END_OF_LIST()
491 },
492 .subsections = (const VMStateDescription*[]) {
493 &vmstate_cpu_common_exception_index,
494 NULL
495 }
496 };
497
498 #endif
499
500 CPUState *qemu_get_cpu(int index)
501 {
502 CPUState *cpu;
503
504 CPU_FOREACH(cpu) {
505 if (cpu->cpu_index == index) {
506 return cpu;
507 }
508 }
509
510 return NULL;
511 }
512
513 #if !defined(CONFIG_USER_ONLY)
514 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
515 {
516 /* We only support one address space per cpu at the moment. */
517 assert(cpu->as == as);
518
519 if (cpu->tcg_as_listener) {
520 memory_listener_unregister(cpu->tcg_as_listener);
521 } else {
522 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
523 }
524 cpu->tcg_as_listener->commit = tcg_commit;
525 memory_listener_register(cpu->tcg_as_listener, as);
526 }
527 #endif
528
529 void cpu_exec_init(CPUArchState *env)
530 {
531 CPUState *cpu = ENV_GET_CPU(env);
532 CPUClass *cc = CPU_GET_CLASS(cpu);
533 CPUState *some_cpu;
534 int cpu_index;
535
536 #if defined(CONFIG_USER_ONLY)
537 cpu_list_lock();
538 #endif
539 cpu_index = 0;
540 CPU_FOREACH(some_cpu) {
541 cpu_index++;
542 }
543 cpu->cpu_index = cpu_index;
544 QTAILQ_INIT(&cpu->breakpoints);
545 QTAILQ_INIT(&cpu->watchpoints);
546 #ifndef CONFIG_USER_ONLY
547 cpu->as = &address_space_memory;
548 cpu->thread_id = qemu_get_thread_id();
549 cpu_reload_memory_map(cpu);
550 #endif
551 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
552 #if defined(CONFIG_USER_ONLY)
553 cpu_list_unlock();
554 #endif
555 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
556 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
557 }
558 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
559 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
560 cpu_save, cpu_load, env);
561 assert(cc->vmsd == NULL);
562 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
563 #endif
564 if (cc->vmsd != NULL) {
565 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
566 }
567 }
568
569 #if defined(CONFIG_USER_ONLY)
570 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
571 {
572 tb_invalidate_phys_page_range(pc, pc + 1, 0);
573 }
574 #else
575 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
576 {
577 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
578 if (phys != -1) {
579 tb_invalidate_phys_addr(cpu->as,
580 phys | (pc & ~TARGET_PAGE_MASK));
581 }
582 }
583 #endif
584
585 #if defined(CONFIG_USER_ONLY)
586 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
587
588 {
589 }
590
591 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
592 int flags)
593 {
594 return -ENOSYS;
595 }
596
597 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
598 {
599 }
600
601 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
602 int flags, CPUWatchpoint **watchpoint)
603 {
604 return -ENOSYS;
605 }
606 #else
607 /* Add a watchpoint. */
608 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
609 int flags, CPUWatchpoint **watchpoint)
610 {
611 CPUWatchpoint *wp;
612
613 /* forbid ranges which are empty or run off the end of the address space */
614 if (len == 0 || (addr + len - 1) < addr) {
615 error_report("tried to set invalid watchpoint at %"
616 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
617 return -EINVAL;
618 }
619 wp = g_malloc(sizeof(*wp));
620
621 wp->vaddr = addr;
622 wp->len = len;
623 wp->flags = flags;
624
625 /* keep all GDB-injected watchpoints in front */
626 if (flags & BP_GDB) {
627 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
628 } else {
629 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
630 }
631
632 tlb_flush_page(cpu, addr);
633
634 if (watchpoint)
635 *watchpoint = wp;
636 return 0;
637 }
638
639 /* Remove a specific watchpoint. */
640 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
641 int flags)
642 {
643 CPUWatchpoint *wp;
644
645 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
646 if (addr == wp->vaddr && len == wp->len
647 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
648 cpu_watchpoint_remove_by_ref(cpu, wp);
649 return 0;
650 }
651 }
652 return -ENOENT;
653 }
654
655 /* Remove a specific watchpoint by reference. */
656 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
657 {
658 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
659
660 tlb_flush_page(cpu, watchpoint->vaddr);
661
662 g_free(watchpoint);
663 }
664
665 /* Remove all matching watchpoints. */
666 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
667 {
668 CPUWatchpoint *wp, *next;
669
670 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
671 if (wp->flags & mask) {
672 cpu_watchpoint_remove_by_ref(cpu, wp);
673 }
674 }
675 }
676
677 /* Return true if this watchpoint address matches the specified
678 * access (ie the address range covered by the watchpoint overlaps
679 * partially or completely with the address range covered by the
680 * access).
681 */
682 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
683 vaddr addr,
684 vaddr len)
685 {
686 /* We know the lengths are non-zero, but a little caution is
687 * required to avoid errors in the case where the range ends
688 * exactly at the top of the address space and so addr + len
689 * wraps round to zero.
690 */
691 vaddr wpend = wp->vaddr + wp->len - 1;
692 vaddr addrend = addr + len - 1;
693
694 return !(addr > wpend || wp->vaddr > addrend);
695 }
696
697 #endif
698
699 /* Add a breakpoint. */
700 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
701 CPUBreakpoint **breakpoint)
702 {
703 CPUBreakpoint *bp;
704
705 bp = g_malloc(sizeof(*bp));
706
707 bp->pc = pc;
708 bp->flags = flags;
709
710 /* keep all GDB-injected breakpoints in front */
711 if (flags & BP_GDB) {
712 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
713 } else {
714 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
715 }
716
717 breakpoint_invalidate(cpu, pc);
718
719 if (breakpoint) {
720 *breakpoint = bp;
721 }
722 return 0;
723 }
724
725 /* Remove a specific breakpoint. */
726 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
727 {
728 CPUBreakpoint *bp;
729
730 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
731 if (bp->pc == pc && bp->flags == flags) {
732 cpu_breakpoint_remove_by_ref(cpu, bp);
733 return 0;
734 }
735 }
736 return -ENOENT;
737 }
738
739 /* Remove a specific breakpoint by reference. */
740 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
741 {
742 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
743
744 breakpoint_invalidate(cpu, breakpoint->pc);
745
746 g_free(breakpoint);
747 }
748
749 /* Remove all matching breakpoints. */
750 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
751 {
752 CPUBreakpoint *bp, *next;
753
754 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
755 if (bp->flags & mask) {
756 cpu_breakpoint_remove_by_ref(cpu, bp);
757 }
758 }
759 }
760
761 /* enable or disable single step mode. EXCP_DEBUG is returned by the
762 CPU loop after each instruction */
763 void cpu_single_step(CPUState *cpu, int enabled)
764 {
765 if (cpu->singlestep_enabled != enabled) {
766 cpu->singlestep_enabled = enabled;
767 if (kvm_enabled()) {
768 kvm_update_guest_debug(cpu, 0);
769 } else {
770 /* must flush all the translated code to avoid inconsistencies */
771 /* XXX: only flush what is necessary */
772 CPUArchState *env = cpu->env_ptr;
773 tb_flush(env);
774 }
775 }
776 }
777
778 void cpu_abort(CPUState *cpu, const char *fmt, ...)
779 {
780 va_list ap;
781 va_list ap2;
782
783 va_start(ap, fmt);
784 va_copy(ap2, ap);
785 fprintf(stderr, "qemu: fatal: ");
786 vfprintf(stderr, fmt, ap);
787 fprintf(stderr, "\n");
788 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
789 if (qemu_log_enabled()) {
790 qemu_log("qemu: fatal: ");
791 qemu_log_vprintf(fmt, ap2);
792 qemu_log("\n");
793 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
794 qemu_log_flush();
795 qemu_log_close();
796 }
797 va_end(ap2);
798 va_end(ap);
799 #if defined(CONFIG_USER_ONLY)
800 {
801 struct sigaction act;
802 sigfillset(&act.sa_mask);
803 act.sa_handler = SIG_DFL;
804 sigaction(SIGABRT, &act, NULL);
805 }
806 #endif
807 abort();
808 }
809
810 #if !defined(CONFIG_USER_ONLY)
811 /* Called from RCU critical section */
812 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
813 {
814 RAMBlock *block;
815
816 block = atomic_rcu_read(&ram_list.mru_block);
817 if (block && addr - block->offset < block->max_length) {
818 goto found;
819 }
820 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
821 if (addr - block->offset < block->max_length) {
822 goto found;
823 }
824 }
825
826 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
827 abort();
828
829 found:
830 /* It is safe to write mru_block outside the iothread lock. This
831 * is what happens:
832 *
833 * mru_block = xxx
834 * rcu_read_unlock()
835 * xxx removed from list
836 * rcu_read_lock()
837 * read mru_block
838 * mru_block = NULL;
839 * call_rcu(reclaim_ramblock, xxx);
840 * rcu_read_unlock()
841 *
842 * atomic_rcu_set is not needed here. The block was already published
843 * when it was placed into the list. Here we're just making an extra
844 * copy of the pointer.
845 */
846 ram_list.mru_block = block;
847 return block;
848 }
849
850 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
851 {
852 ram_addr_t start1;
853 RAMBlock *block;
854 ram_addr_t end;
855
856 end = TARGET_PAGE_ALIGN(start + length);
857 start &= TARGET_PAGE_MASK;
858
859 rcu_read_lock();
860 block = qemu_get_ram_block(start);
861 assert(block == qemu_get_ram_block(end - 1));
862 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
863 cpu_tlb_reset_dirty_all(start1, length);
864 rcu_read_unlock();
865 }
866
867 /* Note: start and end must be within the same ram block. */
868 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
869 ram_addr_t length,
870 unsigned client)
871 {
872 unsigned long end, page;
873 bool dirty;
874
875 if (length == 0) {
876 return false;
877 }
878
879 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
880 page = start >> TARGET_PAGE_BITS;
881 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
882 page, end - page);
883
884 if (dirty && tcg_enabled()) {
885 tlb_reset_dirty_range_all(start, length);
886 }
887
888 return dirty;
889 }
890
891 /* Called from RCU critical section */
892 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
893 MemoryRegionSection *section,
894 target_ulong vaddr,
895 hwaddr paddr, hwaddr xlat,
896 int prot,
897 target_ulong *address)
898 {
899 hwaddr iotlb;
900 CPUWatchpoint *wp;
901
902 if (memory_region_is_ram(section->mr)) {
903 /* Normal RAM. */
904 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
905 + xlat;
906 if (!section->readonly) {
907 iotlb |= PHYS_SECTION_NOTDIRTY;
908 } else {
909 iotlb |= PHYS_SECTION_ROM;
910 }
911 } else {
912 iotlb = section - section->address_space->dispatch->map.sections;
913 iotlb += xlat;
914 }
915
916 /* Make accesses to pages with watchpoints go via the
917 watchpoint trap routines. */
918 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
919 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
920 /* Avoid trapping reads of pages with a write breakpoint. */
921 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
922 iotlb = PHYS_SECTION_WATCH + paddr;
923 *address |= TLB_MMIO;
924 break;
925 }
926 }
927 }
928
929 return iotlb;
930 }
931 #endif /* defined(CONFIG_USER_ONLY) */
932
933 #if !defined(CONFIG_USER_ONLY)
934
935 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
936 uint16_t section);
937 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
938
939 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
940 qemu_anon_ram_alloc;
941
942 /*
943 * Set a custom physical guest memory alloator.
944 * Accelerators with unusual needs may need this. Hopefully, we can
945 * get rid of it eventually.
946 */
947 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
948 {
949 phys_mem_alloc = alloc;
950 }
951
952 static uint16_t phys_section_add(PhysPageMap *map,
953 MemoryRegionSection *section)
954 {
955 /* The physical section number is ORed with a page-aligned
956 * pointer to produce the iotlb entries. Thus it should
957 * never overflow into the page-aligned value.
958 */
959 assert(map->sections_nb < TARGET_PAGE_SIZE);
960
961 if (map->sections_nb == map->sections_nb_alloc) {
962 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
963 map->sections = g_renew(MemoryRegionSection, map->sections,
964 map->sections_nb_alloc);
965 }
966 map->sections[map->sections_nb] = *section;
967 memory_region_ref(section->mr);
968 return map->sections_nb++;
969 }
970
971 static void phys_section_destroy(MemoryRegion *mr)
972 {
973 memory_region_unref(mr);
974
975 if (mr->subpage) {
976 subpage_t *subpage = container_of(mr, subpage_t, iomem);
977 object_unref(OBJECT(&subpage->iomem));
978 g_free(subpage);
979 }
980 }
981
982 static void phys_sections_free(PhysPageMap *map)
983 {
984 while (map->sections_nb > 0) {
985 MemoryRegionSection *section = &map->sections[--map->sections_nb];
986 phys_section_destroy(section->mr);
987 }
988 g_free(map->sections);
989 g_free(map->nodes);
990 }
991
992 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
993 {
994 subpage_t *subpage;
995 hwaddr base = section->offset_within_address_space
996 & TARGET_PAGE_MASK;
997 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
998 d->map.nodes, d->map.sections);
999 MemoryRegionSection subsection = {
1000 .offset_within_address_space = base,
1001 .size = int128_make64(TARGET_PAGE_SIZE),
1002 };
1003 hwaddr start, end;
1004
1005 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1006
1007 if (!(existing->mr->subpage)) {
1008 subpage = subpage_init(d->as, base);
1009 subsection.address_space = d->as;
1010 subsection.mr = &subpage->iomem;
1011 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1012 phys_section_add(&d->map, &subsection));
1013 } else {
1014 subpage = container_of(existing->mr, subpage_t, iomem);
1015 }
1016 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1017 end = start + int128_get64(section->size) - 1;
1018 subpage_register(subpage, start, end,
1019 phys_section_add(&d->map, section));
1020 }
1021
1022
1023 static void register_multipage(AddressSpaceDispatch *d,
1024 MemoryRegionSection *section)
1025 {
1026 hwaddr start_addr = section->offset_within_address_space;
1027 uint16_t section_index = phys_section_add(&d->map, section);
1028 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1029 TARGET_PAGE_BITS));
1030
1031 assert(num_pages);
1032 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1033 }
1034
1035 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1036 {
1037 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1038 AddressSpaceDispatch *d = as->next_dispatch;
1039 MemoryRegionSection now = *section, remain = *section;
1040 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1041
1042 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1043 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1044 - now.offset_within_address_space;
1045
1046 now.size = int128_min(int128_make64(left), now.size);
1047 register_subpage(d, &now);
1048 } else {
1049 now.size = int128_zero();
1050 }
1051 while (int128_ne(remain.size, now.size)) {
1052 remain.size = int128_sub(remain.size, now.size);
1053 remain.offset_within_address_space += int128_get64(now.size);
1054 remain.offset_within_region += int128_get64(now.size);
1055 now = remain;
1056 if (int128_lt(remain.size, page_size)) {
1057 register_subpage(d, &now);
1058 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1059 now.size = page_size;
1060 register_subpage(d, &now);
1061 } else {
1062 now.size = int128_and(now.size, int128_neg(page_size));
1063 register_multipage(d, &now);
1064 }
1065 }
1066 }
1067
1068 void qemu_flush_coalesced_mmio_buffer(void)
1069 {
1070 if (kvm_enabled())
1071 kvm_flush_coalesced_mmio_buffer();
1072 }
1073
1074 void qemu_mutex_lock_ramlist(void)
1075 {
1076 qemu_mutex_lock(&ram_list.mutex);
1077 }
1078
1079 void qemu_mutex_unlock_ramlist(void)
1080 {
1081 qemu_mutex_unlock(&ram_list.mutex);
1082 }
1083
1084 #ifdef __linux__
1085
1086 #include <sys/vfs.h>
1087
1088 #define HUGETLBFS_MAGIC 0x958458f6
1089
1090 static long gethugepagesize(const char *path, Error **errp)
1091 {
1092 struct statfs fs;
1093 int ret;
1094
1095 do {
1096 ret = statfs(path, &fs);
1097 } while (ret != 0 && errno == EINTR);
1098
1099 if (ret != 0) {
1100 error_setg_errno(errp, errno, "failed to get page size of file %s",
1101 path);
1102 return 0;
1103 }
1104
1105 if (fs.f_type != HUGETLBFS_MAGIC)
1106 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1107
1108 return fs.f_bsize;
1109 }
1110
1111 static void *file_ram_alloc(RAMBlock *block,
1112 ram_addr_t memory,
1113 const char *path,
1114 Error **errp)
1115 {
1116 char *filename;
1117 char *sanitized_name;
1118 char *c;
1119 void *area = NULL;
1120 int fd;
1121 uint64_t hpagesize;
1122 Error *local_err = NULL;
1123
1124 hpagesize = gethugepagesize(path, &local_err);
1125 if (local_err) {
1126 error_propagate(errp, local_err);
1127 goto error;
1128 }
1129 block->mr->align = hpagesize;
1130
1131 if (memory < hpagesize) {
1132 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1133 "or larger than huge page size 0x%" PRIx64,
1134 memory, hpagesize);
1135 goto error;
1136 }
1137
1138 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1139 error_setg(errp,
1140 "host lacks kvm mmu notifiers, -mem-path unsupported");
1141 goto error;
1142 }
1143
1144 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1145 sanitized_name = g_strdup(memory_region_name(block->mr));
1146 for (c = sanitized_name; *c != '\0'; c++) {
1147 if (*c == '/')
1148 *c = '_';
1149 }
1150
1151 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1152 sanitized_name);
1153 g_free(sanitized_name);
1154
1155 fd = mkstemp(filename);
1156 if (fd < 0) {
1157 error_setg_errno(errp, errno,
1158 "unable to create backing store for hugepages");
1159 g_free(filename);
1160 goto error;
1161 }
1162 unlink(filename);
1163 g_free(filename);
1164
1165 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1166
1167 /*
1168 * ftruncate is not supported by hugetlbfs in older
1169 * hosts, so don't bother bailing out on errors.
1170 * If anything goes wrong with it under other filesystems,
1171 * mmap will fail.
1172 */
1173 if (ftruncate(fd, memory)) {
1174 perror("ftruncate");
1175 }
1176
1177 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1178 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1179 fd, 0);
1180 if (area == MAP_FAILED) {
1181 error_setg_errno(errp, errno,
1182 "unable to map backing store for hugepages");
1183 close(fd);
1184 goto error;
1185 }
1186
1187 if (mem_prealloc) {
1188 os_mem_prealloc(fd, area, memory);
1189 }
1190
1191 block->fd = fd;
1192 return area;
1193
1194 error:
1195 if (mem_prealloc) {
1196 error_report("%s", error_get_pretty(*errp));
1197 exit(1);
1198 }
1199 return NULL;
1200 }
1201 #endif
1202
1203 /* Called with the ramlist lock held. */
1204 static ram_addr_t find_ram_offset(ram_addr_t size)
1205 {
1206 RAMBlock *block, *next_block;
1207 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1208
1209 assert(size != 0); /* it would hand out same offset multiple times */
1210
1211 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1212 return 0;
1213 }
1214
1215 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1216 ram_addr_t end, next = RAM_ADDR_MAX;
1217
1218 end = block->offset + block->max_length;
1219
1220 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1221 if (next_block->offset >= end) {
1222 next = MIN(next, next_block->offset);
1223 }
1224 }
1225 if (next - end >= size && next - end < mingap) {
1226 offset = end;
1227 mingap = next - end;
1228 }
1229 }
1230
1231 if (offset == RAM_ADDR_MAX) {
1232 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1233 (uint64_t)size);
1234 abort();
1235 }
1236
1237 return offset;
1238 }
1239
1240 ram_addr_t last_ram_offset(void)
1241 {
1242 RAMBlock *block;
1243 ram_addr_t last = 0;
1244
1245 rcu_read_lock();
1246 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1247 last = MAX(last, block->offset + block->max_length);
1248 }
1249 rcu_read_unlock();
1250 return last;
1251 }
1252
1253 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1254 {
1255 int ret;
1256
1257 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1258 if (!machine_dump_guest_core(current_machine)) {
1259 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1260 if (ret) {
1261 perror("qemu_madvise");
1262 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1263 "but dump_guest_core=off specified\n");
1264 }
1265 }
1266 }
1267
1268 /* Called within an RCU critical section, or while the ramlist lock
1269 * is held.
1270 */
1271 static RAMBlock *find_ram_block(ram_addr_t addr)
1272 {
1273 RAMBlock *block;
1274
1275 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1276 if (block->offset == addr) {
1277 return block;
1278 }
1279 }
1280
1281 return NULL;
1282 }
1283
1284 /* Called with iothread lock held. */
1285 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1286 {
1287 RAMBlock *new_block, *block;
1288
1289 rcu_read_lock();
1290 new_block = find_ram_block(addr);
1291 assert(new_block);
1292 assert(!new_block->idstr[0]);
1293
1294 if (dev) {
1295 char *id = qdev_get_dev_path(dev);
1296 if (id) {
1297 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1298 g_free(id);
1299 }
1300 }
1301 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1302
1303 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1304 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1305 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1306 new_block->idstr);
1307 abort();
1308 }
1309 }
1310 rcu_read_unlock();
1311 }
1312
1313 /* Called with iothread lock held. */
1314 void qemu_ram_unset_idstr(ram_addr_t addr)
1315 {
1316 RAMBlock *block;
1317
1318 /* FIXME: arch_init.c assumes that this is not called throughout
1319 * migration. Ignore the problem since hot-unplug during migration
1320 * does not work anyway.
1321 */
1322
1323 rcu_read_lock();
1324 block = find_ram_block(addr);
1325 if (block) {
1326 memset(block->idstr, 0, sizeof(block->idstr));
1327 }
1328 rcu_read_unlock();
1329 }
1330
1331 static int memory_try_enable_merging(void *addr, size_t len)
1332 {
1333 if (!machine_mem_merge(current_machine)) {
1334 /* disabled by the user */
1335 return 0;
1336 }
1337
1338 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1339 }
1340
1341 /* Only legal before guest might have detected the memory size: e.g. on
1342 * incoming migration, or right after reset.
1343 *
1344 * As memory core doesn't know how is memory accessed, it is up to
1345 * resize callback to update device state and/or add assertions to detect
1346 * misuse, if necessary.
1347 */
1348 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1349 {
1350 RAMBlock *block = find_ram_block(base);
1351
1352 assert(block);
1353
1354 newsize = TARGET_PAGE_ALIGN(newsize);
1355
1356 if (block->used_length == newsize) {
1357 return 0;
1358 }
1359
1360 if (!(block->flags & RAM_RESIZEABLE)) {
1361 error_setg_errno(errp, EINVAL,
1362 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1363 " in != 0x" RAM_ADDR_FMT, block->idstr,
1364 newsize, block->used_length);
1365 return -EINVAL;
1366 }
1367
1368 if (block->max_length < newsize) {
1369 error_setg_errno(errp, EINVAL,
1370 "Length too large: %s: 0x" RAM_ADDR_FMT
1371 " > 0x" RAM_ADDR_FMT, block->idstr,
1372 newsize, block->max_length);
1373 return -EINVAL;
1374 }
1375
1376 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1377 block->used_length = newsize;
1378 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1379 DIRTY_CLIENTS_ALL);
1380 memory_region_set_size(block->mr, newsize);
1381 if (block->resized) {
1382 block->resized(block->idstr, newsize, block->host);
1383 }
1384 return 0;
1385 }
1386
1387 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1388 {
1389 RAMBlock *block;
1390 RAMBlock *last_block = NULL;
1391 ram_addr_t old_ram_size, new_ram_size;
1392
1393 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1394
1395 qemu_mutex_lock_ramlist();
1396 new_block->offset = find_ram_offset(new_block->max_length);
1397
1398 if (!new_block->host) {
1399 if (xen_enabled()) {
1400 xen_ram_alloc(new_block->offset, new_block->max_length,
1401 new_block->mr);
1402 } else {
1403 new_block->host = phys_mem_alloc(new_block->max_length,
1404 &new_block->mr->align);
1405 if (!new_block->host) {
1406 error_setg_errno(errp, errno,
1407 "cannot set up guest memory '%s'",
1408 memory_region_name(new_block->mr));
1409 qemu_mutex_unlock_ramlist();
1410 return -1;
1411 }
1412 memory_try_enable_merging(new_block->host, new_block->max_length);
1413 }
1414 }
1415
1416 new_ram_size = MAX(old_ram_size,
1417 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1418 if (new_ram_size > old_ram_size) {
1419 migration_bitmap_extend(old_ram_size, new_ram_size);
1420 }
1421 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1422 * QLIST (which has an RCU-friendly variant) does not have insertion at
1423 * tail, so save the last element in last_block.
1424 */
1425 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1426 last_block = block;
1427 if (block->max_length < new_block->max_length) {
1428 break;
1429 }
1430 }
1431 if (block) {
1432 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1433 } else if (last_block) {
1434 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1435 } else { /* list is empty */
1436 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1437 }
1438 ram_list.mru_block = NULL;
1439
1440 /* Write list before version */
1441 smp_wmb();
1442 ram_list.version++;
1443 qemu_mutex_unlock_ramlist();
1444
1445 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1446
1447 if (new_ram_size > old_ram_size) {
1448 int i;
1449
1450 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1451 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1452 ram_list.dirty_memory[i] =
1453 bitmap_zero_extend(ram_list.dirty_memory[i],
1454 old_ram_size, new_ram_size);
1455 }
1456 }
1457 cpu_physical_memory_set_dirty_range(new_block->offset,
1458 new_block->used_length,
1459 DIRTY_CLIENTS_ALL);
1460
1461 if (new_block->host) {
1462 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1463 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1464 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1465 if (kvm_enabled()) {
1466 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1467 }
1468 }
1469
1470 return new_block->offset;
1471 }
1472
1473 #ifdef __linux__
1474 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1475 bool share, const char *mem_path,
1476 Error **errp)
1477 {
1478 RAMBlock *new_block;
1479 ram_addr_t addr;
1480 Error *local_err = NULL;
1481
1482 if (xen_enabled()) {
1483 error_setg(errp, "-mem-path not supported with Xen");
1484 return -1;
1485 }
1486
1487 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1488 /*
1489 * file_ram_alloc() needs to allocate just like
1490 * phys_mem_alloc, but we haven't bothered to provide
1491 * a hook there.
1492 */
1493 error_setg(errp,
1494 "-mem-path not supported with this accelerator");
1495 return -1;
1496 }
1497
1498 size = TARGET_PAGE_ALIGN(size);
1499 new_block = g_malloc0(sizeof(*new_block));
1500 new_block->mr = mr;
1501 new_block->used_length = size;
1502 new_block->max_length = size;
1503 new_block->flags = share ? RAM_SHARED : 0;
1504 new_block->host = file_ram_alloc(new_block, size,
1505 mem_path, errp);
1506 if (!new_block->host) {
1507 g_free(new_block);
1508 return -1;
1509 }
1510
1511 addr = ram_block_add(new_block, &local_err);
1512 if (local_err) {
1513 g_free(new_block);
1514 error_propagate(errp, local_err);
1515 return -1;
1516 }
1517 return addr;
1518 }
1519 #endif
1520
1521 static
1522 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1523 void (*resized)(const char*,
1524 uint64_t length,
1525 void *host),
1526 void *host, bool resizeable,
1527 MemoryRegion *mr, Error **errp)
1528 {
1529 RAMBlock *new_block;
1530 ram_addr_t addr;
1531 Error *local_err = NULL;
1532
1533 size = TARGET_PAGE_ALIGN(size);
1534 max_size = TARGET_PAGE_ALIGN(max_size);
1535 new_block = g_malloc0(sizeof(*new_block));
1536 new_block->mr = mr;
1537 new_block->resized = resized;
1538 new_block->used_length = size;
1539 new_block->max_length = max_size;
1540 assert(max_size >= size);
1541 new_block->fd = -1;
1542 new_block->host = host;
1543 if (host) {
1544 new_block->flags |= RAM_PREALLOC;
1545 }
1546 if (resizeable) {
1547 new_block->flags |= RAM_RESIZEABLE;
1548 }
1549 addr = ram_block_add(new_block, &local_err);
1550 if (local_err) {
1551 g_free(new_block);
1552 error_propagate(errp, local_err);
1553 return -1;
1554 }
1555 return addr;
1556 }
1557
1558 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1559 MemoryRegion *mr, Error **errp)
1560 {
1561 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1562 }
1563
1564 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1565 {
1566 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1567 }
1568
1569 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1570 void (*resized)(const char*,
1571 uint64_t length,
1572 void *host),
1573 MemoryRegion *mr, Error **errp)
1574 {
1575 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1576 }
1577
1578 void qemu_ram_free_from_ptr(ram_addr_t addr)
1579 {
1580 RAMBlock *block;
1581
1582 qemu_mutex_lock_ramlist();
1583 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1584 if (addr == block->offset) {
1585 QLIST_REMOVE_RCU(block, next);
1586 ram_list.mru_block = NULL;
1587 /* Write list before version */
1588 smp_wmb();
1589 ram_list.version++;
1590 g_free_rcu(block, rcu);
1591 break;
1592 }
1593 }
1594 qemu_mutex_unlock_ramlist();
1595 }
1596
1597 static void reclaim_ramblock(RAMBlock *block)
1598 {
1599 if (block->flags & RAM_PREALLOC) {
1600 ;
1601 } else if (xen_enabled()) {
1602 xen_invalidate_map_cache_entry(block->host);
1603 #ifndef _WIN32
1604 } else if (block->fd >= 0) {
1605 munmap(block->host, block->max_length);
1606 close(block->fd);
1607 #endif
1608 } else {
1609 qemu_anon_ram_free(block->host, block->max_length);
1610 }
1611 g_free(block);
1612 }
1613
1614 void qemu_ram_free(ram_addr_t addr)
1615 {
1616 RAMBlock *block;
1617
1618 qemu_mutex_lock_ramlist();
1619 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1620 if (addr == block->offset) {
1621 QLIST_REMOVE_RCU(block, next);
1622 ram_list.mru_block = NULL;
1623 /* Write list before version */
1624 smp_wmb();
1625 ram_list.version++;
1626 call_rcu(block, reclaim_ramblock, rcu);
1627 break;
1628 }
1629 }
1630 qemu_mutex_unlock_ramlist();
1631 }
1632
1633 #ifndef _WIN32
1634 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1635 {
1636 RAMBlock *block;
1637 ram_addr_t offset;
1638 int flags;
1639 void *area, *vaddr;
1640
1641 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1642 offset = addr - block->offset;
1643 if (offset < block->max_length) {
1644 vaddr = ramblock_ptr(block, offset);
1645 if (block->flags & RAM_PREALLOC) {
1646 ;
1647 } else if (xen_enabled()) {
1648 abort();
1649 } else {
1650 flags = MAP_FIXED;
1651 if (block->fd >= 0) {
1652 flags |= (block->flags & RAM_SHARED ?
1653 MAP_SHARED : MAP_PRIVATE);
1654 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1655 flags, block->fd, offset);
1656 } else {
1657 /*
1658 * Remap needs to match alloc. Accelerators that
1659 * set phys_mem_alloc never remap. If they did,
1660 * we'd need a remap hook here.
1661 */
1662 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1663
1664 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1665 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1666 flags, -1, 0);
1667 }
1668 if (area != vaddr) {
1669 fprintf(stderr, "Could not remap addr: "
1670 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1671 length, addr);
1672 exit(1);
1673 }
1674 memory_try_enable_merging(vaddr, length);
1675 qemu_ram_setup_dump(vaddr, length);
1676 }
1677 }
1678 }
1679 }
1680 #endif /* !_WIN32 */
1681
1682 int qemu_get_ram_fd(ram_addr_t addr)
1683 {
1684 RAMBlock *block;
1685 int fd;
1686
1687 rcu_read_lock();
1688 block = qemu_get_ram_block(addr);
1689 fd = block->fd;
1690 rcu_read_unlock();
1691 return fd;
1692 }
1693
1694 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1695 {
1696 RAMBlock *block;
1697 void *ptr;
1698
1699 rcu_read_lock();
1700 block = qemu_get_ram_block(addr);
1701 ptr = ramblock_ptr(block, 0);
1702 rcu_read_unlock();
1703 return ptr;
1704 }
1705
1706 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1707 * This should not be used for general purpose DMA. Use address_space_map
1708 * or address_space_rw instead. For local memory (e.g. video ram) that the
1709 * device owns, use memory_region_get_ram_ptr.
1710 *
1711 * By the time this function returns, the returned pointer is not protected
1712 * by RCU anymore. If the caller is not within an RCU critical section and
1713 * does not hold the iothread lock, it must have other means of protecting the
1714 * pointer, such as a reference to the region that includes the incoming
1715 * ram_addr_t.
1716 */
1717 void *qemu_get_ram_ptr(ram_addr_t addr)
1718 {
1719 RAMBlock *block;
1720 void *ptr;
1721
1722 rcu_read_lock();
1723 block = qemu_get_ram_block(addr);
1724
1725 if (xen_enabled() && block->host == NULL) {
1726 /* We need to check if the requested address is in the RAM
1727 * because we don't want to map the entire memory in QEMU.
1728 * In that case just map until the end of the page.
1729 */
1730 if (block->offset == 0) {
1731 ptr = xen_map_cache(addr, 0, 0);
1732 goto unlock;
1733 }
1734
1735 block->host = xen_map_cache(block->offset, block->max_length, 1);
1736 }
1737 ptr = ramblock_ptr(block, addr - block->offset);
1738
1739 unlock:
1740 rcu_read_unlock();
1741 return ptr;
1742 }
1743
1744 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1745 * but takes a size argument.
1746 *
1747 * By the time this function returns, the returned pointer is not protected
1748 * by RCU anymore. If the caller is not within an RCU critical section and
1749 * does not hold the iothread lock, it must have other means of protecting the
1750 * pointer, such as a reference to the region that includes the incoming
1751 * ram_addr_t.
1752 */
1753 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1754 {
1755 void *ptr;
1756 if (*size == 0) {
1757 return NULL;
1758 }
1759 if (xen_enabled()) {
1760 return xen_map_cache(addr, *size, 1);
1761 } else {
1762 RAMBlock *block;
1763 rcu_read_lock();
1764 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1765 if (addr - block->offset < block->max_length) {
1766 if (addr - block->offset + *size > block->max_length)
1767 *size = block->max_length - addr + block->offset;
1768 ptr = ramblock_ptr(block, addr - block->offset);
1769 rcu_read_unlock();
1770 return ptr;
1771 }
1772 }
1773
1774 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1775 abort();
1776 }
1777 }
1778
1779 /* Some of the softmmu routines need to translate from a host pointer
1780 * (typically a TLB entry) back to a ram offset.
1781 *
1782 * By the time this function returns, the returned pointer is not protected
1783 * by RCU anymore. If the caller is not within an RCU critical section and
1784 * does not hold the iothread lock, it must have other means of protecting the
1785 * pointer, such as a reference to the region that includes the incoming
1786 * ram_addr_t.
1787 */
1788 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1789 {
1790 RAMBlock *block;
1791 uint8_t *host = ptr;
1792 MemoryRegion *mr;
1793
1794 if (xen_enabled()) {
1795 rcu_read_lock();
1796 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1797 mr = qemu_get_ram_block(*ram_addr)->mr;
1798 rcu_read_unlock();
1799 return mr;
1800 }
1801
1802 rcu_read_lock();
1803 block = atomic_rcu_read(&ram_list.mru_block);
1804 if (block && block->host && host - block->host < block->max_length) {
1805 goto found;
1806 }
1807
1808 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1809 /* This case append when the block is not mapped. */
1810 if (block->host == NULL) {
1811 continue;
1812 }
1813 if (host - block->host < block->max_length) {
1814 goto found;
1815 }
1816 }
1817
1818 rcu_read_unlock();
1819 return NULL;
1820
1821 found:
1822 *ram_addr = block->offset + (host - block->host);
1823 mr = block->mr;
1824 rcu_read_unlock();
1825 return mr;
1826 }
1827
1828 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1829 uint64_t val, unsigned size)
1830 {
1831 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1832 tb_invalidate_phys_page_fast(ram_addr, size);
1833 }
1834 switch (size) {
1835 case 1:
1836 stb_p(qemu_get_ram_ptr(ram_addr), val);
1837 break;
1838 case 2:
1839 stw_p(qemu_get_ram_ptr(ram_addr), val);
1840 break;
1841 case 4:
1842 stl_p(qemu_get_ram_ptr(ram_addr), val);
1843 break;
1844 default:
1845 abort();
1846 }
1847 /* Set both VGA and migration bits for simplicity and to remove
1848 * the notdirty callback faster.
1849 */
1850 cpu_physical_memory_set_dirty_range(ram_addr, size,
1851 DIRTY_CLIENTS_NOCODE);
1852 /* we remove the notdirty callback only if the code has been
1853 flushed */
1854 if (!cpu_physical_memory_is_clean(ram_addr)) {
1855 CPUArchState *env = current_cpu->env_ptr;
1856 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1857 }
1858 }
1859
1860 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1861 unsigned size, bool is_write)
1862 {
1863 return is_write;
1864 }
1865
1866 static const MemoryRegionOps notdirty_mem_ops = {
1867 .write = notdirty_mem_write,
1868 .valid.accepts = notdirty_mem_accepts,
1869 .endianness = DEVICE_NATIVE_ENDIAN,
1870 };
1871
1872 /* Generate a debug exception if a watchpoint has been hit. */
1873 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1874 {
1875 CPUState *cpu = current_cpu;
1876 CPUArchState *env = cpu->env_ptr;
1877 target_ulong pc, cs_base;
1878 target_ulong vaddr;
1879 CPUWatchpoint *wp;
1880 int cpu_flags;
1881
1882 if (cpu->watchpoint_hit) {
1883 /* We re-entered the check after replacing the TB. Now raise
1884 * the debug interrupt so that is will trigger after the
1885 * current instruction. */
1886 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1887 return;
1888 }
1889 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1890 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1891 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1892 && (wp->flags & flags)) {
1893 if (flags == BP_MEM_READ) {
1894 wp->flags |= BP_WATCHPOINT_HIT_READ;
1895 } else {
1896 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1897 }
1898 wp->hitaddr = vaddr;
1899 wp->hitattrs = attrs;
1900 if (!cpu->watchpoint_hit) {
1901 cpu->watchpoint_hit = wp;
1902 tb_check_watchpoint(cpu);
1903 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1904 cpu->exception_index = EXCP_DEBUG;
1905 cpu_loop_exit(cpu);
1906 } else {
1907 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1908 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1909 cpu_resume_from_signal(cpu, NULL);
1910 }
1911 }
1912 } else {
1913 wp->flags &= ~BP_WATCHPOINT_HIT;
1914 }
1915 }
1916 }
1917
1918 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1919 so these check for a hit then pass through to the normal out-of-line
1920 phys routines. */
1921 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1922 unsigned size, MemTxAttrs attrs)
1923 {
1924 MemTxResult res;
1925 uint64_t data;
1926
1927 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1928 switch (size) {
1929 case 1:
1930 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1931 break;
1932 case 2:
1933 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1934 break;
1935 case 4:
1936 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1937 break;
1938 default: abort();
1939 }
1940 *pdata = data;
1941 return res;
1942 }
1943
1944 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1945 uint64_t val, unsigned size,
1946 MemTxAttrs attrs)
1947 {
1948 MemTxResult res;
1949
1950 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1951 switch (size) {
1952 case 1:
1953 address_space_stb(&address_space_memory, addr, val, attrs, &res);
1954 break;
1955 case 2:
1956 address_space_stw(&address_space_memory, addr, val, attrs, &res);
1957 break;
1958 case 4:
1959 address_space_stl(&address_space_memory, addr, val, attrs, &res);
1960 break;
1961 default: abort();
1962 }
1963 return res;
1964 }
1965
1966 static const MemoryRegionOps watch_mem_ops = {
1967 .read_with_attrs = watch_mem_read,
1968 .write_with_attrs = watch_mem_write,
1969 .endianness = DEVICE_NATIVE_ENDIAN,
1970 };
1971
1972 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1973 unsigned len, MemTxAttrs attrs)
1974 {
1975 subpage_t *subpage = opaque;
1976 uint8_t buf[8];
1977 MemTxResult res;
1978
1979 #if defined(DEBUG_SUBPAGE)
1980 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1981 subpage, len, addr);
1982 #endif
1983 res = address_space_read(subpage->as, addr + subpage->base,
1984 attrs, buf, len);
1985 if (res) {
1986 return res;
1987 }
1988 switch (len) {
1989 case 1:
1990 *data = ldub_p(buf);
1991 return MEMTX_OK;
1992 case 2:
1993 *data = lduw_p(buf);
1994 return MEMTX_OK;
1995 case 4:
1996 *data = ldl_p(buf);
1997 return MEMTX_OK;
1998 case 8:
1999 *data = ldq_p(buf);
2000 return MEMTX_OK;
2001 default:
2002 abort();
2003 }
2004 }
2005
2006 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2007 uint64_t value, unsigned len, MemTxAttrs attrs)
2008 {
2009 subpage_t *subpage = opaque;
2010 uint8_t buf[8];
2011
2012 #if defined(DEBUG_SUBPAGE)
2013 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2014 " value %"PRIx64"\n",
2015 __func__, subpage, len, addr, value);
2016 #endif
2017 switch (len) {
2018 case 1:
2019 stb_p(buf, value);
2020 break;
2021 case 2:
2022 stw_p(buf, value);
2023 break;
2024 case 4:
2025 stl_p(buf, value);
2026 break;
2027 case 8:
2028 stq_p(buf, value);
2029 break;
2030 default:
2031 abort();
2032 }
2033 return address_space_write(subpage->as, addr + subpage->base,
2034 attrs, buf, len);
2035 }
2036
2037 static bool subpage_accepts(void *opaque, hwaddr addr,
2038 unsigned len, bool is_write)
2039 {
2040 subpage_t *subpage = opaque;
2041 #if defined(DEBUG_SUBPAGE)
2042 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2043 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2044 #endif
2045
2046 return address_space_access_valid(subpage->as, addr + subpage->base,
2047 len, is_write);
2048 }
2049
2050 static const MemoryRegionOps subpage_ops = {
2051 .read_with_attrs = subpage_read,
2052 .write_with_attrs = subpage_write,
2053 .impl.min_access_size = 1,
2054 .impl.max_access_size = 8,
2055 .valid.min_access_size = 1,
2056 .valid.max_access_size = 8,
2057 .valid.accepts = subpage_accepts,
2058 .endianness = DEVICE_NATIVE_ENDIAN,
2059 };
2060
2061 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2062 uint16_t section)
2063 {
2064 int idx, eidx;
2065
2066 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2067 return -1;
2068 idx = SUBPAGE_IDX(start);
2069 eidx = SUBPAGE_IDX(end);
2070 #if defined(DEBUG_SUBPAGE)
2071 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2072 __func__, mmio, start, end, idx, eidx, section);
2073 #endif
2074 for (; idx <= eidx; idx++) {
2075 mmio->sub_section[idx] = section;
2076 }
2077
2078 return 0;
2079 }
2080
2081 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2082 {
2083 subpage_t *mmio;
2084
2085 mmio = g_malloc0(sizeof(subpage_t));
2086
2087 mmio->as = as;
2088 mmio->base = base;
2089 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2090 NULL, TARGET_PAGE_SIZE);
2091 mmio->iomem.subpage = true;
2092 #if defined(DEBUG_SUBPAGE)
2093 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2094 mmio, base, TARGET_PAGE_SIZE);
2095 #endif
2096 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2097
2098 return mmio;
2099 }
2100
2101 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2102 MemoryRegion *mr)
2103 {
2104 assert(as);
2105 MemoryRegionSection section = {
2106 .address_space = as,
2107 .mr = mr,
2108 .offset_within_address_space = 0,
2109 .offset_within_region = 0,
2110 .size = int128_2_64(),
2111 };
2112
2113 return phys_section_add(map, &section);
2114 }
2115
2116 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2117 {
2118 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2119 MemoryRegionSection *sections = d->map.sections;
2120
2121 return sections[index & ~TARGET_PAGE_MASK].mr;
2122 }
2123
2124 static void io_mem_init(void)
2125 {
2126 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2127 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2128 NULL, UINT64_MAX);
2129 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2130 NULL, UINT64_MAX);
2131 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2132 NULL, UINT64_MAX);
2133 }
2134
2135 static void mem_begin(MemoryListener *listener)
2136 {
2137 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2138 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2139 uint16_t n;
2140
2141 n = dummy_section(&d->map, as, &io_mem_unassigned);
2142 assert(n == PHYS_SECTION_UNASSIGNED);
2143 n = dummy_section(&d->map, as, &io_mem_notdirty);
2144 assert(n == PHYS_SECTION_NOTDIRTY);
2145 n = dummy_section(&d->map, as, &io_mem_rom);
2146 assert(n == PHYS_SECTION_ROM);
2147 n = dummy_section(&d->map, as, &io_mem_watch);
2148 assert(n == PHYS_SECTION_WATCH);
2149
2150 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2151 d->as = as;
2152 as->next_dispatch = d;
2153 }
2154
2155 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2156 {
2157 phys_sections_free(&d->map);
2158 g_free(d);
2159 }
2160
2161 static void mem_commit(MemoryListener *listener)
2162 {
2163 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2164 AddressSpaceDispatch *cur = as->dispatch;
2165 AddressSpaceDispatch *next = as->next_dispatch;
2166
2167 phys_page_compact_all(next, next->map.nodes_nb);
2168
2169 atomic_rcu_set(&as->dispatch, next);
2170 if (cur) {
2171 call_rcu(cur, address_space_dispatch_free, rcu);
2172 }
2173 }
2174
2175 static void tcg_commit(MemoryListener *listener)
2176 {
2177 CPUState *cpu;
2178
2179 /* since each CPU stores ram addresses in its TLB cache, we must
2180 reset the modified entries */
2181 /* XXX: slow ! */
2182 CPU_FOREACH(cpu) {
2183 /* FIXME: Disentangle the cpu.h circular files deps so we can
2184 directly get the right CPU from listener. */
2185 if (cpu->tcg_as_listener != listener) {
2186 continue;
2187 }
2188 cpu_reload_memory_map(cpu);
2189 }
2190 }
2191
2192 void address_space_init_dispatch(AddressSpace *as)
2193 {
2194 as->dispatch = NULL;
2195 as->dispatch_listener = (MemoryListener) {
2196 .begin = mem_begin,
2197 .commit = mem_commit,
2198 .region_add = mem_add,
2199 .region_nop = mem_add,
2200 .priority = 0,
2201 };
2202 memory_listener_register(&as->dispatch_listener, as);
2203 }
2204
2205 void address_space_unregister(AddressSpace *as)
2206 {
2207 memory_listener_unregister(&as->dispatch_listener);
2208 }
2209
2210 void address_space_destroy_dispatch(AddressSpace *as)
2211 {
2212 AddressSpaceDispatch *d = as->dispatch;
2213
2214 atomic_rcu_set(&as->dispatch, NULL);
2215 if (d) {
2216 call_rcu(d, address_space_dispatch_free, rcu);
2217 }
2218 }
2219
2220 static void memory_map_init(void)
2221 {
2222 system_memory = g_malloc(sizeof(*system_memory));
2223
2224 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2225 address_space_init(&address_space_memory, system_memory, "memory");
2226
2227 system_io = g_malloc(sizeof(*system_io));
2228 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2229 65536);
2230 address_space_init(&address_space_io, system_io, "I/O");
2231 }
2232
2233 MemoryRegion *get_system_memory(void)
2234 {
2235 return system_memory;
2236 }
2237
2238 MemoryRegion *get_system_io(void)
2239 {
2240 return system_io;
2241 }
2242
2243 #endif /* !defined(CONFIG_USER_ONLY) */
2244
2245 /* physical memory access (slow version, mainly for debug) */
2246 #if defined(CONFIG_USER_ONLY)
2247 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2248 uint8_t *buf, int len, int is_write)
2249 {
2250 int l, flags;
2251 target_ulong page;
2252 void * p;
2253
2254 while (len > 0) {
2255 page = addr & TARGET_PAGE_MASK;
2256 l = (page + TARGET_PAGE_SIZE) - addr;
2257 if (l > len)
2258 l = len;
2259 flags = page_get_flags(page);
2260 if (!(flags & PAGE_VALID))
2261 return -1;
2262 if (is_write) {
2263 if (!(flags & PAGE_WRITE))
2264 return -1;
2265 /* XXX: this code should not depend on lock_user */
2266 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2267 return -1;
2268 memcpy(p, buf, l);
2269 unlock_user(p, addr, l);
2270 } else {
2271 if (!(flags & PAGE_READ))
2272 return -1;
2273 /* XXX: this code should not depend on lock_user */
2274 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2275 return -1;
2276 memcpy(buf, p, l);
2277 unlock_user(p, addr, 0);
2278 }
2279 len -= l;
2280 buf += l;
2281 addr += l;
2282 }
2283 return 0;
2284 }
2285
2286 #else
2287
2288 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2289 hwaddr length)
2290 {
2291 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2292 /* No early return if dirty_log_mask is or becomes 0, because
2293 * cpu_physical_memory_set_dirty_range will still call
2294 * xen_modified_memory.
2295 */
2296 if (dirty_log_mask) {
2297 dirty_log_mask =
2298 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2299 }
2300 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2301 tb_invalidate_phys_range(addr, addr + length);
2302 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2303 }
2304 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2305 }
2306
2307 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2308 {
2309 unsigned access_size_max = mr->ops->valid.max_access_size;
2310
2311 /* Regions are assumed to support 1-4 byte accesses unless
2312 otherwise specified. */
2313 if (access_size_max == 0) {
2314 access_size_max = 4;
2315 }
2316
2317 /* Bound the maximum access by the alignment of the address. */
2318 if (!mr->ops->impl.unaligned) {
2319 unsigned align_size_max = addr & -addr;
2320 if (align_size_max != 0 && align_size_max < access_size_max) {
2321 access_size_max = align_size_max;
2322 }
2323 }
2324
2325 /* Don't attempt accesses larger than the maximum. */
2326 if (l > access_size_max) {
2327 l = access_size_max;
2328 }
2329 if (l & (l - 1)) {
2330 l = 1 << (qemu_fls(l) - 1);
2331 }
2332
2333 return l;
2334 }
2335
2336 static bool prepare_mmio_access(MemoryRegion *mr)
2337 {
2338 bool unlocked = !qemu_mutex_iothread_locked();
2339 bool release_lock = false;
2340
2341 if (unlocked && mr->global_locking) {
2342 qemu_mutex_lock_iothread();
2343 unlocked = false;
2344 release_lock = true;
2345 }
2346 if (mr->flush_coalesced_mmio) {
2347 if (unlocked) {
2348 qemu_mutex_lock_iothread();
2349 }
2350 qemu_flush_coalesced_mmio_buffer();
2351 if (unlocked) {
2352 qemu_mutex_unlock_iothread();
2353 }
2354 }
2355
2356 return release_lock;
2357 }
2358
2359 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2360 uint8_t *buf, int len, bool is_write)
2361 {
2362 hwaddr l;
2363 uint8_t *ptr;
2364 uint64_t val;
2365 hwaddr addr1;
2366 MemoryRegion *mr;
2367 MemTxResult result = MEMTX_OK;
2368 bool release_lock = false;
2369
2370 rcu_read_lock();
2371 while (len > 0) {
2372 l = len;
2373 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2374
2375 if (is_write) {
2376 if (!memory_access_is_direct(mr, is_write)) {
2377 release_lock |= prepare_mmio_access(mr);
2378 l = memory_access_size(mr, l, addr1);
2379 /* XXX: could force current_cpu to NULL to avoid
2380 potential bugs */
2381 switch (l) {
2382 case 8:
2383 /* 64 bit write access */
2384 val = ldq_p(buf);
2385 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2386 attrs);
2387 break;
2388 case 4:
2389 /* 32 bit write access */
2390 val = ldl_p(buf);
2391 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2392 attrs);
2393 break;
2394 case 2:
2395 /* 16 bit write access */
2396 val = lduw_p(buf);
2397 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2398 attrs);
2399 break;
2400 case 1:
2401 /* 8 bit write access */
2402 val = ldub_p(buf);
2403 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2404 attrs);
2405 break;
2406 default:
2407 abort();
2408 }
2409 } else {
2410 addr1 += memory_region_get_ram_addr(mr);
2411 /* RAM case */
2412 ptr = qemu_get_ram_ptr(addr1);
2413 memcpy(ptr, buf, l);
2414 invalidate_and_set_dirty(mr, addr1, l);
2415 }
2416 } else {
2417 if (!memory_access_is_direct(mr, is_write)) {
2418 /* I/O case */
2419 release_lock |= prepare_mmio_access(mr);
2420 l = memory_access_size(mr, l, addr1);
2421 switch (l) {
2422 case 8:
2423 /* 64 bit read access */
2424 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2425 attrs);
2426 stq_p(buf, val);
2427 break;
2428 case 4:
2429 /* 32 bit read access */
2430 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2431 attrs);
2432 stl_p(buf, val);
2433 break;
2434 case 2:
2435 /* 16 bit read access */
2436 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2437 attrs);
2438 stw_p(buf, val);
2439 break;
2440 case 1:
2441 /* 8 bit read access */
2442 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2443 attrs);
2444 stb_p(buf, val);
2445 break;
2446 default:
2447 abort();
2448 }
2449 } else {
2450 /* RAM case */
2451 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2452 memcpy(buf, ptr, l);
2453 }
2454 }
2455
2456 if (release_lock) {
2457 qemu_mutex_unlock_iothread();
2458 release_lock = false;
2459 }
2460
2461 len -= l;
2462 buf += l;
2463 addr += l;
2464 }
2465 rcu_read_unlock();
2466
2467 return result;
2468 }
2469
2470 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2471 const uint8_t *buf, int len)
2472 {
2473 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2474 }
2475
2476 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2477 uint8_t *buf, int len)
2478 {
2479 return address_space_rw(as, addr, attrs, buf, len, false);
2480 }
2481
2482
2483 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2484 int len, int is_write)
2485 {
2486 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2487 buf, len, is_write);
2488 }
2489
2490 enum write_rom_type {
2491 WRITE_DATA,
2492 FLUSH_CACHE,
2493 };
2494
2495 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2496 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2497 {
2498 hwaddr l;
2499 uint8_t *ptr;
2500 hwaddr addr1;
2501 MemoryRegion *mr;
2502
2503 rcu_read_lock();
2504 while (len > 0) {
2505 l = len;
2506 mr = address_space_translate(as, addr, &addr1, &l, true);
2507
2508 if (!(memory_region_is_ram(mr) ||
2509 memory_region_is_romd(mr))) {
2510 l = memory_access_size(mr, l, addr1);
2511 } else {
2512 addr1 += memory_region_get_ram_addr(mr);
2513 /* ROM/RAM case */
2514 ptr = qemu_get_ram_ptr(addr1);
2515 switch (type) {
2516 case WRITE_DATA:
2517 memcpy(ptr, buf, l);
2518 invalidate_and_set_dirty(mr, addr1, l);
2519 break;
2520 case FLUSH_CACHE:
2521 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2522 break;
2523 }
2524 }
2525 len -= l;
2526 buf += l;
2527 addr += l;
2528 }
2529 rcu_read_unlock();
2530 }
2531
2532 /* used for ROM loading : can write in RAM and ROM */
2533 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2534 const uint8_t *buf, int len)
2535 {
2536 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2537 }
2538
2539 void cpu_flush_icache_range(hwaddr start, int len)
2540 {
2541 /*
2542 * This function should do the same thing as an icache flush that was
2543 * triggered from within the guest. For TCG we are always cache coherent,
2544 * so there is no need to flush anything. For KVM / Xen we need to flush
2545 * the host's instruction cache at least.
2546 */
2547 if (tcg_enabled()) {
2548 return;
2549 }
2550
2551 cpu_physical_memory_write_rom_internal(&address_space_memory,
2552 start, NULL, len, FLUSH_CACHE);
2553 }
2554
2555 typedef struct {
2556 MemoryRegion *mr;
2557 void *buffer;
2558 hwaddr addr;
2559 hwaddr len;
2560 bool in_use;
2561 } BounceBuffer;
2562
2563 static BounceBuffer bounce;
2564
2565 typedef struct MapClient {
2566 QEMUBH *bh;
2567 QLIST_ENTRY(MapClient) link;
2568 } MapClient;
2569
2570 QemuMutex map_client_list_lock;
2571 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2572 = QLIST_HEAD_INITIALIZER(map_client_list);
2573
2574 static void cpu_unregister_map_client_do(MapClient *client)
2575 {
2576 QLIST_REMOVE(client, link);
2577 g_free(client);
2578 }
2579
2580 static void cpu_notify_map_clients_locked(void)
2581 {
2582 MapClient *client;
2583
2584 while (!QLIST_EMPTY(&map_client_list)) {
2585 client = QLIST_FIRST(&map_client_list);
2586 qemu_bh_schedule(client->bh);
2587 cpu_unregister_map_client_do(client);
2588 }
2589 }
2590
2591 void cpu_register_map_client(QEMUBH *bh)
2592 {
2593 MapClient *client = g_malloc(sizeof(*client));
2594
2595 qemu_mutex_lock(&map_client_list_lock);
2596 client->bh = bh;
2597 QLIST_INSERT_HEAD(&map_client_list, client, link);
2598 if (!atomic_read(&bounce.in_use)) {
2599 cpu_notify_map_clients_locked();
2600 }
2601 qemu_mutex_unlock(&map_client_list_lock);
2602 }
2603
2604 void cpu_exec_init_all(void)
2605 {
2606 qemu_mutex_init(&ram_list.mutex);
2607 memory_map_init();
2608 io_mem_init();
2609 qemu_mutex_init(&map_client_list_lock);
2610 }
2611
2612 void cpu_unregister_map_client(QEMUBH *bh)
2613 {
2614 MapClient *client;
2615
2616 qemu_mutex_lock(&map_client_list_lock);
2617 QLIST_FOREACH(client, &map_client_list, link) {
2618 if (client->bh == bh) {
2619 cpu_unregister_map_client_do(client);
2620 break;
2621 }
2622 }
2623 qemu_mutex_unlock(&map_client_list_lock);
2624 }
2625
2626 static void cpu_notify_map_clients(void)
2627 {
2628 qemu_mutex_lock(&map_client_list_lock);
2629 cpu_notify_map_clients_locked();
2630 qemu_mutex_unlock(&map_client_list_lock);
2631 }
2632
2633 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2634 {
2635 MemoryRegion *mr;
2636 hwaddr l, xlat;
2637
2638 rcu_read_lock();
2639 while (len > 0) {
2640 l = len;
2641 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2642 if (!memory_access_is_direct(mr, is_write)) {
2643 l = memory_access_size(mr, l, addr);
2644 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2645 return false;
2646 }
2647 }
2648
2649 len -= l;
2650 addr += l;
2651 }
2652 rcu_read_unlock();
2653 return true;
2654 }
2655
2656 /* Map a physical memory region into a host virtual address.
2657 * May map a subset of the requested range, given by and returned in *plen.
2658 * May return NULL if resources needed to perform the mapping are exhausted.
2659 * Use only for reads OR writes - not for read-modify-write operations.
2660 * Use cpu_register_map_client() to know when retrying the map operation is
2661 * likely to succeed.
2662 */
2663 void *address_space_map(AddressSpace *as,
2664 hwaddr addr,
2665 hwaddr *plen,
2666 bool is_write)
2667 {
2668 hwaddr len = *plen;
2669 hwaddr done = 0;
2670 hwaddr l, xlat, base;
2671 MemoryRegion *mr, *this_mr;
2672 ram_addr_t raddr;
2673
2674 if (len == 0) {
2675 return NULL;
2676 }
2677
2678 l = len;
2679 rcu_read_lock();
2680 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2681
2682 if (!memory_access_is_direct(mr, is_write)) {
2683 if (atomic_xchg(&bounce.in_use, true)) {
2684 rcu_read_unlock();
2685 return NULL;
2686 }
2687 /* Avoid unbounded allocations */
2688 l = MIN(l, TARGET_PAGE_SIZE);
2689 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2690 bounce.addr = addr;
2691 bounce.len = l;
2692
2693 memory_region_ref(mr);
2694 bounce.mr = mr;
2695 if (!is_write) {
2696 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2697 bounce.buffer, l);
2698 }
2699
2700 rcu_read_unlock();
2701 *plen = l;
2702 return bounce.buffer;
2703 }
2704
2705 base = xlat;
2706 raddr = memory_region_get_ram_addr(mr);
2707
2708 for (;;) {
2709 len -= l;
2710 addr += l;
2711 done += l;
2712 if (len == 0) {
2713 break;
2714 }
2715
2716 l = len;
2717 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2718 if (this_mr != mr || xlat != base + done) {
2719 break;
2720 }
2721 }
2722
2723 memory_region_ref(mr);
2724 rcu_read_unlock();
2725 *plen = done;
2726 return qemu_ram_ptr_length(raddr + base, plen);
2727 }
2728
2729 /* Unmaps a memory region previously mapped by address_space_map().
2730 * Will also mark the memory as dirty if is_write == 1. access_len gives
2731 * the amount of memory that was actually read or written by the caller.
2732 */
2733 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2734 int is_write, hwaddr access_len)
2735 {
2736 if (buffer != bounce.buffer) {
2737 MemoryRegion *mr;
2738 ram_addr_t addr1;
2739
2740 mr = qemu_ram_addr_from_host(buffer, &addr1);
2741 assert(mr != NULL);
2742 if (is_write) {
2743 invalidate_and_set_dirty(mr, addr1, access_len);
2744 }
2745 if (xen_enabled()) {
2746 xen_invalidate_map_cache_entry(buffer);
2747 }
2748 memory_region_unref(mr);
2749 return;
2750 }
2751 if (is_write) {
2752 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2753 bounce.buffer, access_len);
2754 }
2755 qemu_vfree(bounce.buffer);
2756 bounce.buffer = NULL;
2757 memory_region_unref(bounce.mr);
2758 atomic_mb_set(&bounce.in_use, false);
2759 cpu_notify_map_clients();
2760 }
2761
2762 void *cpu_physical_memory_map(hwaddr addr,
2763 hwaddr *plen,
2764 int is_write)
2765 {
2766 return address_space_map(&address_space_memory, addr, plen, is_write);
2767 }
2768
2769 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2770 int is_write, hwaddr access_len)
2771 {
2772 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2773 }
2774
2775 /* warning: addr must be aligned */
2776 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2777 MemTxAttrs attrs,
2778 MemTxResult *result,
2779 enum device_endian endian)
2780 {
2781 uint8_t *ptr;
2782 uint64_t val;
2783 MemoryRegion *mr;
2784 hwaddr l = 4;
2785 hwaddr addr1;
2786 MemTxResult r;
2787 bool release_lock = false;
2788
2789 rcu_read_lock();
2790 mr = address_space_translate(as, addr, &addr1, &l, false);
2791 if (l < 4 || !memory_access_is_direct(mr, false)) {
2792 release_lock |= prepare_mmio_access(mr);
2793
2794 /* I/O case */
2795 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2796 #if defined(TARGET_WORDS_BIGENDIAN)
2797 if (endian == DEVICE_LITTLE_ENDIAN) {
2798 val = bswap32(val);
2799 }
2800 #else
2801 if (endian == DEVICE_BIG_ENDIAN) {
2802 val = bswap32(val);
2803 }
2804 #endif
2805 } else {
2806 /* RAM case */
2807 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2808 & TARGET_PAGE_MASK)
2809 + addr1);
2810 switch (endian) {
2811 case DEVICE_LITTLE_ENDIAN:
2812 val = ldl_le_p(ptr);
2813 break;
2814 case DEVICE_BIG_ENDIAN:
2815 val = ldl_be_p(ptr);
2816 break;
2817 default:
2818 val = ldl_p(ptr);
2819 break;
2820 }
2821 r = MEMTX_OK;
2822 }
2823 if (result) {
2824 *result = r;
2825 }
2826 if (release_lock) {
2827 qemu_mutex_unlock_iothread();
2828 }
2829 rcu_read_unlock();
2830 return val;
2831 }
2832
2833 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2834 MemTxAttrs attrs, MemTxResult *result)
2835 {
2836 return address_space_ldl_internal(as, addr, attrs, result,
2837 DEVICE_NATIVE_ENDIAN);
2838 }
2839
2840 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2841 MemTxAttrs attrs, MemTxResult *result)
2842 {
2843 return address_space_ldl_internal(as, addr, attrs, result,
2844 DEVICE_LITTLE_ENDIAN);
2845 }
2846
2847 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2848 MemTxAttrs attrs, MemTxResult *result)
2849 {
2850 return address_space_ldl_internal(as, addr, attrs, result,
2851 DEVICE_BIG_ENDIAN);
2852 }
2853
2854 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2855 {
2856 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2857 }
2858
2859 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2860 {
2861 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2862 }
2863
2864 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2865 {
2866 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2867 }
2868
2869 /* warning: addr must be aligned */
2870 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2871 MemTxAttrs attrs,
2872 MemTxResult *result,
2873 enum device_endian endian)
2874 {
2875 uint8_t *ptr;
2876 uint64_t val;
2877 MemoryRegion *mr;
2878 hwaddr l = 8;
2879 hwaddr addr1;
2880 MemTxResult r;
2881 bool release_lock = false;
2882
2883 rcu_read_lock();
2884 mr = address_space_translate(as, addr, &addr1, &l,
2885 false);
2886 if (l < 8 || !memory_access_is_direct(mr, false)) {
2887 release_lock |= prepare_mmio_access(mr);
2888
2889 /* I/O case */
2890 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2891 #if defined(TARGET_WORDS_BIGENDIAN)
2892 if (endian == DEVICE_LITTLE_ENDIAN) {
2893 val = bswap64(val);
2894 }
2895 #else
2896 if (endian == DEVICE_BIG_ENDIAN) {
2897 val = bswap64(val);
2898 }
2899 #endif
2900 } else {
2901 /* RAM case */
2902 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2903 & TARGET_PAGE_MASK)
2904 + addr1);
2905 switch (endian) {
2906 case DEVICE_LITTLE_ENDIAN:
2907 val = ldq_le_p(ptr);
2908 break;
2909 case DEVICE_BIG_ENDIAN:
2910 val = ldq_be_p(ptr);
2911 break;
2912 default:
2913 val = ldq_p(ptr);
2914 break;
2915 }
2916 r = MEMTX_OK;
2917 }
2918 if (result) {
2919 *result = r;
2920 }
2921 if (release_lock) {
2922 qemu_mutex_unlock_iothread();
2923 }
2924 rcu_read_unlock();
2925 return val;
2926 }
2927
2928 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2929 MemTxAttrs attrs, MemTxResult *result)
2930 {
2931 return address_space_ldq_internal(as, addr, attrs, result,
2932 DEVICE_NATIVE_ENDIAN);
2933 }
2934
2935 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2936 MemTxAttrs attrs, MemTxResult *result)
2937 {
2938 return address_space_ldq_internal(as, addr, attrs, result,
2939 DEVICE_LITTLE_ENDIAN);
2940 }
2941
2942 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2943 MemTxAttrs attrs, MemTxResult *result)
2944 {
2945 return address_space_ldq_internal(as, addr, attrs, result,
2946 DEVICE_BIG_ENDIAN);
2947 }
2948
2949 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2950 {
2951 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2952 }
2953
2954 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2955 {
2956 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2957 }
2958
2959 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2960 {
2961 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2962 }
2963
2964 /* XXX: optimize */
2965 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2966 MemTxAttrs attrs, MemTxResult *result)
2967 {
2968 uint8_t val;
2969 MemTxResult r;
2970
2971 r = address_space_rw(as, addr, attrs, &val, 1, 0);
2972 if (result) {
2973 *result = r;
2974 }
2975 return val;
2976 }
2977
2978 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2979 {
2980 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2981 }
2982
2983 /* warning: addr must be aligned */
2984 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2985 hwaddr addr,
2986 MemTxAttrs attrs,
2987 MemTxResult *result,
2988 enum device_endian endian)
2989 {
2990 uint8_t *ptr;
2991 uint64_t val;
2992 MemoryRegion *mr;
2993 hwaddr l = 2;
2994 hwaddr addr1;
2995 MemTxResult r;
2996 bool release_lock = false;
2997
2998 rcu_read_lock();
2999 mr = address_space_translate(as, addr, &addr1, &l,
3000 false);
3001 if (l < 2 || !memory_access_is_direct(mr, false)) {
3002 release_lock |= prepare_mmio_access(mr);
3003
3004 /* I/O case */
3005 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3006 #if defined(TARGET_WORDS_BIGENDIAN)
3007 if (endian == DEVICE_LITTLE_ENDIAN) {
3008 val = bswap16(val);
3009 }
3010 #else
3011 if (endian == DEVICE_BIG_ENDIAN) {
3012 val = bswap16(val);
3013 }
3014 #endif
3015 } else {
3016 /* RAM case */
3017 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3018 & TARGET_PAGE_MASK)
3019 + addr1);
3020 switch (endian) {
3021 case DEVICE_LITTLE_ENDIAN:
3022 val = lduw_le_p(ptr);
3023 break;
3024 case DEVICE_BIG_ENDIAN:
3025 val = lduw_be_p(ptr);
3026 break;
3027 default:
3028 val = lduw_p(ptr);
3029 break;
3030 }
3031 r = MEMTX_OK;
3032 }
3033 if (result) {
3034 *result = r;
3035 }
3036 if (release_lock) {
3037 qemu_mutex_unlock_iothread();
3038 }
3039 rcu_read_unlock();
3040 return val;
3041 }
3042
3043 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3044 MemTxAttrs attrs, MemTxResult *result)
3045 {
3046 return address_space_lduw_internal(as, addr, attrs, result,
3047 DEVICE_NATIVE_ENDIAN);
3048 }
3049
3050 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3051 MemTxAttrs attrs, MemTxResult *result)
3052 {
3053 return address_space_lduw_internal(as, addr, attrs, result,
3054 DEVICE_LITTLE_ENDIAN);
3055 }
3056
3057 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3058 MemTxAttrs attrs, MemTxResult *result)
3059 {
3060 return address_space_lduw_internal(as, addr, attrs, result,
3061 DEVICE_BIG_ENDIAN);
3062 }
3063
3064 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3065 {
3066 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3067 }
3068
3069 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3070 {
3071 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3072 }
3073
3074 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3075 {
3076 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3077 }
3078
3079 /* warning: addr must be aligned. The ram page is not masked as dirty
3080 and the code inside is not invalidated. It is useful if the dirty
3081 bits are used to track modified PTEs */
3082 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3083 MemTxAttrs attrs, MemTxResult *result)
3084 {
3085 uint8_t *ptr;
3086 MemoryRegion *mr;
3087 hwaddr l = 4;
3088 hwaddr addr1;
3089 MemTxResult r;
3090 uint8_t dirty_log_mask;
3091 bool release_lock = false;
3092
3093 rcu_read_lock();
3094 mr = address_space_translate(as, addr, &addr1, &l,
3095 true);
3096 if (l < 4 || !memory_access_is_direct(mr, true)) {
3097 release_lock |= prepare_mmio_access(mr);
3098
3099 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3100 } else {
3101 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3102 ptr = qemu_get_ram_ptr(addr1);
3103 stl_p(ptr, val);
3104
3105 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3106 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3107 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3108 r = MEMTX_OK;
3109 }
3110 if (result) {
3111 *result = r;
3112 }
3113 if (release_lock) {
3114 qemu_mutex_unlock_iothread();
3115 }
3116 rcu_read_unlock();
3117 }
3118
3119 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3120 {
3121 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3122 }
3123
3124 /* warning: addr must be aligned */
3125 static inline void address_space_stl_internal(AddressSpace *as,
3126 hwaddr addr, uint32_t val,
3127 MemTxAttrs attrs,
3128 MemTxResult *result,
3129 enum device_endian endian)
3130 {
3131 uint8_t *ptr;
3132 MemoryRegion *mr;
3133 hwaddr l = 4;
3134 hwaddr addr1;
3135 MemTxResult r;
3136 bool release_lock = false;
3137
3138 rcu_read_lock();
3139 mr = address_space_translate(as, addr, &addr1, &l,
3140 true);
3141 if (l < 4 || !memory_access_is_direct(mr, true)) {
3142 release_lock |= prepare_mmio_access(mr);
3143
3144 #if defined(TARGET_WORDS_BIGENDIAN)
3145 if (endian == DEVICE_LITTLE_ENDIAN) {
3146 val = bswap32(val);
3147 }
3148 #else
3149 if (endian == DEVICE_BIG_ENDIAN) {
3150 val = bswap32(val);
3151 }
3152 #endif
3153 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3154 } else {
3155 /* RAM case */
3156 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3157 ptr = qemu_get_ram_ptr(addr1);
3158 switch (endian) {
3159 case DEVICE_LITTLE_ENDIAN:
3160 stl_le_p(ptr, val);
3161 break;
3162 case DEVICE_BIG_ENDIAN:
3163 stl_be_p(ptr, val);
3164 break;
3165 default:
3166 stl_p(ptr, val);
3167 break;
3168 }
3169 invalidate_and_set_dirty(mr, addr1, 4);
3170 r = MEMTX_OK;
3171 }
3172 if (result) {
3173 *result = r;
3174 }
3175 if (release_lock) {
3176 qemu_mutex_unlock_iothread();
3177 }
3178 rcu_read_unlock();
3179 }
3180
3181 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3182 MemTxAttrs attrs, MemTxResult *result)
3183 {
3184 address_space_stl_internal(as, addr, val, attrs, result,
3185 DEVICE_NATIVE_ENDIAN);
3186 }
3187
3188 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3189 MemTxAttrs attrs, MemTxResult *result)
3190 {
3191 address_space_stl_internal(as, addr, val, attrs, result,
3192 DEVICE_LITTLE_ENDIAN);
3193 }
3194
3195 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3196 MemTxAttrs attrs, MemTxResult *result)
3197 {
3198 address_space_stl_internal(as, addr, val, attrs, result,
3199 DEVICE_BIG_ENDIAN);
3200 }
3201
3202 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3203 {
3204 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3205 }
3206
3207 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3208 {
3209 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3210 }
3211
3212 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3213 {
3214 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3215 }
3216
3217 /* XXX: optimize */
3218 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3219 MemTxAttrs attrs, MemTxResult *result)
3220 {
3221 uint8_t v = val;
3222 MemTxResult r;
3223
3224 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3225 if (result) {
3226 *result = r;
3227 }
3228 }
3229
3230 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3231 {
3232 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3233 }
3234
3235 /* warning: addr must be aligned */
3236 static inline void address_space_stw_internal(AddressSpace *as,
3237 hwaddr addr, uint32_t val,
3238 MemTxAttrs attrs,
3239 MemTxResult *result,
3240 enum device_endian endian)
3241 {
3242 uint8_t *ptr;
3243 MemoryRegion *mr;
3244 hwaddr l = 2;
3245 hwaddr addr1;
3246 MemTxResult r;
3247 bool release_lock = false;
3248
3249 rcu_read_lock();
3250 mr = address_space_translate(as, addr, &addr1, &l, true);
3251 if (l < 2 || !memory_access_is_direct(mr, true)) {
3252 release_lock |= prepare_mmio_access(mr);
3253
3254 #if defined(TARGET_WORDS_BIGENDIAN)
3255 if (endian == DEVICE_LITTLE_ENDIAN) {
3256 val = bswap16(val);
3257 }
3258 #else
3259 if (endian == DEVICE_BIG_ENDIAN) {
3260 val = bswap16(val);
3261 }
3262 #endif
3263 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3264 } else {
3265 /* RAM case */
3266 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3267 ptr = qemu_get_ram_ptr(addr1);
3268 switch (endian) {
3269 case DEVICE_LITTLE_ENDIAN:
3270 stw_le_p(ptr, val);
3271 break;
3272 case DEVICE_BIG_ENDIAN:
3273 stw_be_p(ptr, val);
3274 break;
3275 default:
3276 stw_p(ptr, val);
3277 break;
3278 }
3279 invalidate_and_set_dirty(mr, addr1, 2);
3280 r = MEMTX_OK;
3281 }
3282 if (result) {
3283 *result = r;
3284 }
3285 if (release_lock) {
3286 qemu_mutex_unlock_iothread();
3287 }
3288 rcu_read_unlock();
3289 }
3290
3291 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3292 MemTxAttrs attrs, MemTxResult *result)
3293 {
3294 address_space_stw_internal(as, addr, val, attrs, result,
3295 DEVICE_NATIVE_ENDIAN);
3296 }
3297
3298 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3299 MemTxAttrs attrs, MemTxResult *result)
3300 {
3301 address_space_stw_internal(as, addr, val, attrs, result,
3302 DEVICE_LITTLE_ENDIAN);
3303 }
3304
3305 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3306 MemTxAttrs attrs, MemTxResult *result)
3307 {
3308 address_space_stw_internal(as, addr, val, attrs, result,
3309 DEVICE_BIG_ENDIAN);
3310 }
3311
3312 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3313 {
3314 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3315 }
3316
3317 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3318 {
3319 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3320 }
3321
3322 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3323 {
3324 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3325 }
3326
3327 /* XXX: optimize */
3328 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3329 MemTxAttrs attrs, MemTxResult *result)
3330 {
3331 MemTxResult r;
3332 val = tswap64(val);
3333 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3334 if (result) {
3335 *result = r;
3336 }
3337 }
3338
3339 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3340 MemTxAttrs attrs, MemTxResult *result)
3341 {
3342 MemTxResult r;
3343 val = cpu_to_le64(val);
3344 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3345 if (result) {
3346 *result = r;
3347 }
3348 }
3349 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3350 MemTxAttrs attrs, MemTxResult *result)
3351 {
3352 MemTxResult r;
3353 val = cpu_to_be64(val);
3354 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3355 if (result) {
3356 *result = r;
3357 }
3358 }
3359
3360 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3361 {
3362 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3363 }
3364
3365 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3366 {
3367 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3368 }
3369
3370 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3371 {
3372 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3373 }
3374
3375 /* virtual memory access for debug (includes writing to ROM) */
3376 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3377 uint8_t *buf, int len, int is_write)
3378 {
3379 int l;
3380 hwaddr phys_addr;
3381 target_ulong page;
3382
3383 while (len > 0) {
3384 page = addr & TARGET_PAGE_MASK;
3385 phys_addr = cpu_get_phys_page_debug(cpu, page);
3386 /* if no physical page mapped, return an error */
3387 if (phys_addr == -1)
3388 return -1;
3389 l = (page + TARGET_PAGE_SIZE) - addr;
3390 if (l > len)
3391 l = len;
3392 phys_addr += (addr & ~TARGET_PAGE_MASK);
3393 if (is_write) {
3394 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3395 } else {
3396 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3397 buf, l, 0);
3398 }
3399 len -= l;
3400 buf += l;
3401 addr += l;
3402 }
3403 return 0;
3404 }
3405 #endif
3406
3407 /*
3408 * A helper function for the _utterly broken_ virtio device model to find out if
3409 * it's running on a big endian machine. Don't do this at home kids!
3410 */
3411 bool target_words_bigendian(void);
3412 bool target_words_bigendian(void)
3413 {
3414 #if defined(TARGET_WORDS_BIGENDIAN)
3415 return true;
3416 #else
3417 return false;
3418 #endif
3419 }
3420
3421 #ifndef CONFIG_USER_ONLY
3422 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3423 {
3424 MemoryRegion*mr;
3425 hwaddr l = 1;
3426 bool res;
3427
3428 rcu_read_lock();
3429 mr = address_space_translate(&address_space_memory,
3430 phys_addr, &phys_addr, &l, false);
3431
3432 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3433 rcu_read_unlock();
3434 return res;
3435 }
3436
3437 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3438 {
3439 RAMBlock *block;
3440 int ret = 0;
3441
3442 rcu_read_lock();
3443 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3444 ret = func(block->idstr, block->host, block->offset,
3445 block->used_length, opaque);
3446 if (ret) {
3447 break;
3448 }
3449 }
3450 rcu_read_unlock();
3451 return ret;
3452 }
3453 #endif