]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
usb: usb-mtp QOMify
[mirror_qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
53
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
56
57 #include "qemu/range.h"
58
59 //#define DEBUG_SUBPAGE
60
61 #if !defined(CONFIG_USER_ONLY)
62 static bool in_migration;
63
64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
66 */
67 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
68
69 static MemoryRegion *system_memory;
70 static MemoryRegion *system_io;
71
72 AddressSpace address_space_io;
73 AddressSpace address_space_memory;
74
75 MemoryRegion io_mem_rom, io_mem_notdirty;
76 static MemoryRegion io_mem_unassigned;
77
78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79 #define RAM_PREALLOC (1 << 0)
80
81 /* RAM is mmap-ed with MAP_SHARED */
82 #define RAM_SHARED (1 << 1)
83
84 /* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
86 */
87 #define RAM_RESIZEABLE (1 << 2)
88
89 #endif
90
91 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
92 /* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
94 DEFINE_TLS(CPUState *, current_cpu);
95 /* 0 = Do not count executed instructions.
96 1 = Precise instruction counting.
97 2 = Adaptive rate instruction counting. */
98 int use_icount;
99
100 #if !defined(CONFIG_USER_ONLY)
101
102 typedef struct PhysPageEntry PhysPageEntry;
103
104 struct PhysPageEntry {
105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
106 uint32_t skip : 6;
107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
108 uint32_t ptr : 26;
109 };
110
111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112
113 /* Size of the L2 (and L3, etc) page tables. */
114 #define ADDR_SPACE_BITS 64
115
116 #define P_L2_BITS 9
117 #define P_L2_SIZE (1 << P_L2_BITS)
118
119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120
121 typedef PhysPageEntry Node[P_L2_SIZE];
122
123 typedef struct PhysPageMap {
124 struct rcu_head rcu;
125
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132 } PhysPageMap;
133
134 struct AddressSpaceDispatch {
135 struct rcu_head rcu;
136
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
139 */
140 PhysPageEntry phys_map;
141 PhysPageMap map;
142 AddressSpace *as;
143 };
144
145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146 typedef struct subpage_t {
147 MemoryRegion iomem;
148 AddressSpace *as;
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151 } subpage_t;
152
153 #define PHYS_SECTION_UNASSIGNED 0
154 #define PHYS_SECTION_NOTDIRTY 1
155 #define PHYS_SECTION_ROM 2
156 #define PHYS_SECTION_WATCH 3
157
158 static void io_mem_init(void);
159 static void memory_map_init(void);
160 static void tcg_commit(MemoryListener *listener);
161
162 static MemoryRegion io_mem_watch;
163 #endif
164
165 #if !defined(CONFIG_USER_ONLY)
166
167 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
168 {
169 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
171 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
172 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
173 }
174 }
175
176 static uint32_t phys_map_node_alloc(PhysPageMap *map)
177 {
178 unsigned i;
179 uint32_t ret;
180
181 ret = map->nodes_nb++;
182 assert(ret != PHYS_MAP_NODE_NIL);
183 assert(ret != map->nodes_nb_alloc);
184 for (i = 0; i < P_L2_SIZE; ++i) {
185 map->nodes[ret][i].skip = 1;
186 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
187 }
188 return ret;
189 }
190
191 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
192 hwaddr *index, hwaddr *nb, uint16_t leaf,
193 int level)
194 {
195 PhysPageEntry *p;
196 int i;
197 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
198
199 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
200 lp->ptr = phys_map_node_alloc(map);
201 p = map->nodes[lp->ptr];
202 if (level == 0) {
203 for (i = 0; i < P_L2_SIZE; i++) {
204 p[i].skip = 0;
205 p[i].ptr = PHYS_SECTION_UNASSIGNED;
206 }
207 }
208 } else {
209 p = map->nodes[lp->ptr];
210 }
211 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
212
213 while (*nb && lp < &p[P_L2_SIZE]) {
214 if ((*index & (step - 1)) == 0 && *nb >= step) {
215 lp->skip = 0;
216 lp->ptr = leaf;
217 *index += step;
218 *nb -= step;
219 } else {
220 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
221 }
222 ++lp;
223 }
224 }
225
226 static void phys_page_set(AddressSpaceDispatch *d,
227 hwaddr index, hwaddr nb,
228 uint16_t leaf)
229 {
230 /* Wildly overreserve - it doesn't matter much. */
231 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
232
233 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
234 }
235
236 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
238 */
239 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
240 {
241 unsigned valid_ptr = P_L2_SIZE;
242 int valid = 0;
243 PhysPageEntry *p;
244 int i;
245
246 if (lp->ptr == PHYS_MAP_NODE_NIL) {
247 return;
248 }
249
250 p = nodes[lp->ptr];
251 for (i = 0; i < P_L2_SIZE; i++) {
252 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
253 continue;
254 }
255
256 valid_ptr = i;
257 valid++;
258 if (p[i].skip) {
259 phys_page_compact(&p[i], nodes, compacted);
260 }
261 }
262
263 /* We can only compress if there's only one child. */
264 if (valid != 1) {
265 return;
266 }
267
268 assert(valid_ptr < P_L2_SIZE);
269
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
272 return;
273 }
274
275 lp->ptr = p[valid_ptr].ptr;
276 if (!p[valid_ptr].skip) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
281 * change this rule.
282 */
283 lp->skip = 0;
284 } else {
285 lp->skip += p[valid_ptr].skip;
286 }
287 }
288
289 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
290 {
291 DECLARE_BITMAP(compacted, nodes_nb);
292
293 if (d->phys_map.skip) {
294 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
295 }
296 }
297
298 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
299 Node *nodes, MemoryRegionSection *sections)
300 {
301 PhysPageEntry *p;
302 hwaddr index = addr >> TARGET_PAGE_BITS;
303 int i;
304
305 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
306 if (lp.ptr == PHYS_MAP_NODE_NIL) {
307 return &sections[PHYS_SECTION_UNASSIGNED];
308 }
309 p = nodes[lp.ptr];
310 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
311 }
312
313 if (sections[lp.ptr].size.hi ||
314 range_covers_byte(sections[lp.ptr].offset_within_address_space,
315 sections[lp.ptr].size.lo, addr)) {
316 return &sections[lp.ptr];
317 } else {
318 return &sections[PHYS_SECTION_UNASSIGNED];
319 }
320 }
321
322 bool memory_region_is_unassigned(MemoryRegion *mr)
323 {
324 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
325 && mr != &io_mem_watch;
326 }
327
328 /* Called from RCU critical section */
329 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
330 hwaddr addr,
331 bool resolve_subpage)
332 {
333 MemoryRegionSection *section;
334 subpage_t *subpage;
335
336 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
337 if (resolve_subpage && section->mr->subpage) {
338 subpage = container_of(section->mr, subpage_t, iomem);
339 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
340 }
341 return section;
342 }
343
344 /* Called from RCU critical section */
345 static MemoryRegionSection *
346 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
347 hwaddr *plen, bool resolve_subpage)
348 {
349 MemoryRegionSection *section;
350 Int128 diff;
351
352 section = address_space_lookup_region(d, addr, resolve_subpage);
353 /* Compute offset within MemoryRegionSection */
354 addr -= section->offset_within_address_space;
355
356 /* Compute offset within MemoryRegion */
357 *xlat = addr + section->offset_within_region;
358
359 diff = int128_sub(section->mr->size, int128_make64(addr));
360 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
361 return section;
362 }
363
364 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
365 {
366 if (memory_region_is_ram(mr)) {
367 return !(is_write && mr->readonly);
368 }
369 if (memory_region_is_romd(mr)) {
370 return !is_write;
371 }
372
373 return false;
374 }
375
376 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
377 hwaddr *xlat, hwaddr *plen,
378 bool is_write)
379 {
380 IOMMUTLBEntry iotlb;
381 MemoryRegionSection *section;
382 MemoryRegion *mr;
383
384 rcu_read_lock();
385 for (;;) {
386 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
387 section = address_space_translate_internal(d, addr, &addr, plen, true);
388 mr = section->mr;
389
390 if (!mr->iommu_ops) {
391 break;
392 }
393
394 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
395 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
396 | (addr & iotlb.addr_mask));
397 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
398 if (!(iotlb.perm & (1 << is_write))) {
399 mr = &io_mem_unassigned;
400 break;
401 }
402
403 as = iotlb.target_as;
404 }
405
406 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
407 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
408 *plen = MIN(page, *plen);
409 }
410
411 *xlat = addr;
412 rcu_read_unlock();
413 return mr;
414 }
415
416 /* Called from RCU critical section */
417 MemoryRegionSection *
418 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
419 hwaddr *xlat, hwaddr *plen)
420 {
421 MemoryRegionSection *section;
422 section = address_space_translate_internal(cpu->memory_dispatch,
423 addr, xlat, plen, false);
424
425 assert(!section->mr->iommu_ops);
426 return section;
427 }
428 #endif
429
430 #if !defined(CONFIG_USER_ONLY)
431
432 static int cpu_common_post_load(void *opaque, int version_id)
433 {
434 CPUState *cpu = opaque;
435
436 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
437 version_id is increased. */
438 cpu->interrupt_request &= ~0x01;
439 tlb_flush(cpu, 1);
440
441 return 0;
442 }
443
444 static int cpu_common_pre_load(void *opaque)
445 {
446 CPUState *cpu = opaque;
447
448 cpu->exception_index = -1;
449
450 return 0;
451 }
452
453 static bool cpu_common_exception_index_needed(void *opaque)
454 {
455 CPUState *cpu = opaque;
456
457 return tcg_enabled() && cpu->exception_index != -1;
458 }
459
460 static const VMStateDescription vmstate_cpu_common_exception_index = {
461 .name = "cpu_common/exception_index",
462 .version_id = 1,
463 .minimum_version_id = 1,
464 .fields = (VMStateField[]) {
465 VMSTATE_INT32(exception_index, CPUState),
466 VMSTATE_END_OF_LIST()
467 }
468 };
469
470 const VMStateDescription vmstate_cpu_common = {
471 .name = "cpu_common",
472 .version_id = 1,
473 .minimum_version_id = 1,
474 .pre_load = cpu_common_pre_load,
475 .post_load = cpu_common_post_load,
476 .fields = (VMStateField[]) {
477 VMSTATE_UINT32(halted, CPUState),
478 VMSTATE_UINT32(interrupt_request, CPUState),
479 VMSTATE_END_OF_LIST()
480 },
481 .subsections = (VMStateSubsection[]) {
482 {
483 .vmsd = &vmstate_cpu_common_exception_index,
484 .needed = cpu_common_exception_index_needed,
485 } , {
486 /* empty */
487 }
488 }
489 };
490
491 #endif
492
493 CPUState *qemu_get_cpu(int index)
494 {
495 CPUState *cpu;
496
497 CPU_FOREACH(cpu) {
498 if (cpu->cpu_index == index) {
499 return cpu;
500 }
501 }
502
503 return NULL;
504 }
505
506 #if !defined(CONFIG_USER_ONLY)
507 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
508 {
509 /* We only support one address space per cpu at the moment. */
510 assert(cpu->as == as);
511
512 if (cpu->tcg_as_listener) {
513 memory_listener_unregister(cpu->tcg_as_listener);
514 } else {
515 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
516 }
517 cpu->tcg_as_listener->commit = tcg_commit;
518 memory_listener_register(cpu->tcg_as_listener, as);
519 }
520 #endif
521
522 void cpu_exec_init(CPUArchState *env)
523 {
524 CPUState *cpu = ENV_GET_CPU(env);
525 CPUClass *cc = CPU_GET_CLASS(cpu);
526 CPUState *some_cpu;
527 int cpu_index;
528
529 #if defined(CONFIG_USER_ONLY)
530 cpu_list_lock();
531 #endif
532 cpu_index = 0;
533 CPU_FOREACH(some_cpu) {
534 cpu_index++;
535 }
536 cpu->cpu_index = cpu_index;
537 cpu->numa_node = 0;
538 QTAILQ_INIT(&cpu->breakpoints);
539 QTAILQ_INIT(&cpu->watchpoints);
540 #ifndef CONFIG_USER_ONLY
541 cpu->as = &address_space_memory;
542 cpu->thread_id = qemu_get_thread_id();
543 cpu_reload_memory_map(cpu);
544 #endif
545 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
546 #if defined(CONFIG_USER_ONLY)
547 cpu_list_unlock();
548 #endif
549 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
550 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
551 }
552 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
553 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
554 cpu_save, cpu_load, env);
555 assert(cc->vmsd == NULL);
556 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
557 #endif
558 if (cc->vmsd != NULL) {
559 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
560 }
561 }
562
563 #if defined(CONFIG_USER_ONLY)
564 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
565 {
566 tb_invalidate_phys_page_range(pc, pc + 1, 0);
567 }
568 #else
569 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
570 {
571 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
572 if (phys != -1) {
573 tb_invalidate_phys_addr(cpu->as,
574 phys | (pc & ~TARGET_PAGE_MASK));
575 }
576 }
577 #endif
578
579 #if defined(CONFIG_USER_ONLY)
580 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
581
582 {
583 }
584
585 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
586 int flags)
587 {
588 return -ENOSYS;
589 }
590
591 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
592 {
593 }
594
595 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
596 int flags, CPUWatchpoint **watchpoint)
597 {
598 return -ENOSYS;
599 }
600 #else
601 /* Add a watchpoint. */
602 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
603 int flags, CPUWatchpoint **watchpoint)
604 {
605 CPUWatchpoint *wp;
606
607 /* forbid ranges which are empty or run off the end of the address space */
608 if (len == 0 || (addr + len - 1) < addr) {
609 error_report("tried to set invalid watchpoint at %"
610 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
611 return -EINVAL;
612 }
613 wp = g_malloc(sizeof(*wp));
614
615 wp->vaddr = addr;
616 wp->len = len;
617 wp->flags = flags;
618
619 /* keep all GDB-injected watchpoints in front */
620 if (flags & BP_GDB) {
621 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
622 } else {
623 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
624 }
625
626 tlb_flush_page(cpu, addr);
627
628 if (watchpoint)
629 *watchpoint = wp;
630 return 0;
631 }
632
633 /* Remove a specific watchpoint. */
634 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
635 int flags)
636 {
637 CPUWatchpoint *wp;
638
639 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
640 if (addr == wp->vaddr && len == wp->len
641 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
642 cpu_watchpoint_remove_by_ref(cpu, wp);
643 return 0;
644 }
645 }
646 return -ENOENT;
647 }
648
649 /* Remove a specific watchpoint by reference. */
650 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
651 {
652 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
653
654 tlb_flush_page(cpu, watchpoint->vaddr);
655
656 g_free(watchpoint);
657 }
658
659 /* Remove all matching watchpoints. */
660 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
661 {
662 CPUWatchpoint *wp, *next;
663
664 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
665 if (wp->flags & mask) {
666 cpu_watchpoint_remove_by_ref(cpu, wp);
667 }
668 }
669 }
670
671 /* Return true if this watchpoint address matches the specified
672 * access (ie the address range covered by the watchpoint overlaps
673 * partially or completely with the address range covered by the
674 * access).
675 */
676 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
677 vaddr addr,
678 vaddr len)
679 {
680 /* We know the lengths are non-zero, but a little caution is
681 * required to avoid errors in the case where the range ends
682 * exactly at the top of the address space and so addr + len
683 * wraps round to zero.
684 */
685 vaddr wpend = wp->vaddr + wp->len - 1;
686 vaddr addrend = addr + len - 1;
687
688 return !(addr > wpend || wp->vaddr > addrend);
689 }
690
691 #endif
692
693 /* Add a breakpoint. */
694 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
695 CPUBreakpoint **breakpoint)
696 {
697 CPUBreakpoint *bp;
698
699 bp = g_malloc(sizeof(*bp));
700
701 bp->pc = pc;
702 bp->flags = flags;
703
704 /* keep all GDB-injected breakpoints in front */
705 if (flags & BP_GDB) {
706 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
707 } else {
708 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
709 }
710
711 breakpoint_invalidate(cpu, pc);
712
713 if (breakpoint) {
714 *breakpoint = bp;
715 }
716 return 0;
717 }
718
719 /* Remove a specific breakpoint. */
720 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
721 {
722 CPUBreakpoint *bp;
723
724 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
725 if (bp->pc == pc && bp->flags == flags) {
726 cpu_breakpoint_remove_by_ref(cpu, bp);
727 return 0;
728 }
729 }
730 return -ENOENT;
731 }
732
733 /* Remove a specific breakpoint by reference. */
734 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
735 {
736 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
737
738 breakpoint_invalidate(cpu, breakpoint->pc);
739
740 g_free(breakpoint);
741 }
742
743 /* Remove all matching breakpoints. */
744 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
745 {
746 CPUBreakpoint *bp, *next;
747
748 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
749 if (bp->flags & mask) {
750 cpu_breakpoint_remove_by_ref(cpu, bp);
751 }
752 }
753 }
754
755 /* enable or disable single step mode. EXCP_DEBUG is returned by the
756 CPU loop after each instruction */
757 void cpu_single_step(CPUState *cpu, int enabled)
758 {
759 if (cpu->singlestep_enabled != enabled) {
760 cpu->singlestep_enabled = enabled;
761 if (kvm_enabled()) {
762 kvm_update_guest_debug(cpu, 0);
763 } else {
764 /* must flush all the translated code to avoid inconsistencies */
765 /* XXX: only flush what is necessary */
766 CPUArchState *env = cpu->env_ptr;
767 tb_flush(env);
768 }
769 }
770 }
771
772 void cpu_abort(CPUState *cpu, const char *fmt, ...)
773 {
774 va_list ap;
775 va_list ap2;
776
777 va_start(ap, fmt);
778 va_copy(ap2, ap);
779 fprintf(stderr, "qemu: fatal: ");
780 vfprintf(stderr, fmt, ap);
781 fprintf(stderr, "\n");
782 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
783 if (qemu_log_enabled()) {
784 qemu_log("qemu: fatal: ");
785 qemu_log_vprintf(fmt, ap2);
786 qemu_log("\n");
787 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
788 qemu_log_flush();
789 qemu_log_close();
790 }
791 va_end(ap2);
792 va_end(ap);
793 #if defined(CONFIG_USER_ONLY)
794 {
795 struct sigaction act;
796 sigfillset(&act.sa_mask);
797 act.sa_handler = SIG_DFL;
798 sigaction(SIGABRT, &act, NULL);
799 }
800 #endif
801 abort();
802 }
803
804 #if !defined(CONFIG_USER_ONLY)
805 /* Called from RCU critical section */
806 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
807 {
808 RAMBlock *block;
809
810 block = atomic_rcu_read(&ram_list.mru_block);
811 if (block && addr - block->offset < block->max_length) {
812 goto found;
813 }
814 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
815 if (addr - block->offset < block->max_length) {
816 goto found;
817 }
818 }
819
820 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
821 abort();
822
823 found:
824 /* It is safe to write mru_block outside the iothread lock. This
825 * is what happens:
826 *
827 * mru_block = xxx
828 * rcu_read_unlock()
829 * xxx removed from list
830 * rcu_read_lock()
831 * read mru_block
832 * mru_block = NULL;
833 * call_rcu(reclaim_ramblock, xxx);
834 * rcu_read_unlock()
835 *
836 * atomic_rcu_set is not needed here. The block was already published
837 * when it was placed into the list. Here we're just making an extra
838 * copy of the pointer.
839 */
840 ram_list.mru_block = block;
841 return block;
842 }
843
844 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
845 {
846 ram_addr_t start1;
847 RAMBlock *block;
848 ram_addr_t end;
849
850 end = TARGET_PAGE_ALIGN(start + length);
851 start &= TARGET_PAGE_MASK;
852
853 rcu_read_lock();
854 block = qemu_get_ram_block(start);
855 assert(block == qemu_get_ram_block(end - 1));
856 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
857 cpu_tlb_reset_dirty_all(start1, length);
858 rcu_read_unlock();
859 }
860
861 /* Note: start and end must be within the same ram block. */
862 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
863 unsigned client)
864 {
865 if (length == 0)
866 return;
867 cpu_physical_memory_clear_dirty_range_type(start, length, client);
868
869 if (tcg_enabled()) {
870 tlb_reset_dirty_range_all(start, length);
871 }
872 }
873
874 static void cpu_physical_memory_set_dirty_tracking(bool enable)
875 {
876 in_migration = enable;
877 }
878
879 /* Called from RCU critical section */
880 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
881 MemoryRegionSection *section,
882 target_ulong vaddr,
883 hwaddr paddr, hwaddr xlat,
884 int prot,
885 target_ulong *address)
886 {
887 hwaddr iotlb;
888 CPUWatchpoint *wp;
889
890 if (memory_region_is_ram(section->mr)) {
891 /* Normal RAM. */
892 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
893 + xlat;
894 if (!section->readonly) {
895 iotlb |= PHYS_SECTION_NOTDIRTY;
896 } else {
897 iotlb |= PHYS_SECTION_ROM;
898 }
899 } else {
900 iotlb = section - section->address_space->dispatch->map.sections;
901 iotlb += xlat;
902 }
903
904 /* Make accesses to pages with watchpoints go via the
905 watchpoint trap routines. */
906 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
907 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
908 /* Avoid trapping reads of pages with a write breakpoint. */
909 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
910 iotlb = PHYS_SECTION_WATCH + paddr;
911 *address |= TLB_MMIO;
912 break;
913 }
914 }
915 }
916
917 return iotlb;
918 }
919 #endif /* defined(CONFIG_USER_ONLY) */
920
921 #if !defined(CONFIG_USER_ONLY)
922
923 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
924 uint16_t section);
925 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
926
927 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
928 qemu_anon_ram_alloc;
929
930 /*
931 * Set a custom physical guest memory alloator.
932 * Accelerators with unusual needs may need this. Hopefully, we can
933 * get rid of it eventually.
934 */
935 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
936 {
937 phys_mem_alloc = alloc;
938 }
939
940 static uint16_t phys_section_add(PhysPageMap *map,
941 MemoryRegionSection *section)
942 {
943 /* The physical section number is ORed with a page-aligned
944 * pointer to produce the iotlb entries. Thus it should
945 * never overflow into the page-aligned value.
946 */
947 assert(map->sections_nb < TARGET_PAGE_SIZE);
948
949 if (map->sections_nb == map->sections_nb_alloc) {
950 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
951 map->sections = g_renew(MemoryRegionSection, map->sections,
952 map->sections_nb_alloc);
953 }
954 map->sections[map->sections_nb] = *section;
955 memory_region_ref(section->mr);
956 return map->sections_nb++;
957 }
958
959 static void phys_section_destroy(MemoryRegion *mr)
960 {
961 memory_region_unref(mr);
962
963 if (mr->subpage) {
964 subpage_t *subpage = container_of(mr, subpage_t, iomem);
965 object_unref(OBJECT(&subpage->iomem));
966 g_free(subpage);
967 }
968 }
969
970 static void phys_sections_free(PhysPageMap *map)
971 {
972 while (map->sections_nb > 0) {
973 MemoryRegionSection *section = &map->sections[--map->sections_nb];
974 phys_section_destroy(section->mr);
975 }
976 g_free(map->sections);
977 g_free(map->nodes);
978 }
979
980 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
981 {
982 subpage_t *subpage;
983 hwaddr base = section->offset_within_address_space
984 & TARGET_PAGE_MASK;
985 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
986 d->map.nodes, d->map.sections);
987 MemoryRegionSection subsection = {
988 .offset_within_address_space = base,
989 .size = int128_make64(TARGET_PAGE_SIZE),
990 };
991 hwaddr start, end;
992
993 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
994
995 if (!(existing->mr->subpage)) {
996 subpage = subpage_init(d->as, base);
997 subsection.address_space = d->as;
998 subsection.mr = &subpage->iomem;
999 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1000 phys_section_add(&d->map, &subsection));
1001 } else {
1002 subpage = container_of(existing->mr, subpage_t, iomem);
1003 }
1004 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1005 end = start + int128_get64(section->size) - 1;
1006 subpage_register(subpage, start, end,
1007 phys_section_add(&d->map, section));
1008 }
1009
1010
1011 static void register_multipage(AddressSpaceDispatch *d,
1012 MemoryRegionSection *section)
1013 {
1014 hwaddr start_addr = section->offset_within_address_space;
1015 uint16_t section_index = phys_section_add(&d->map, section);
1016 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1017 TARGET_PAGE_BITS));
1018
1019 assert(num_pages);
1020 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1021 }
1022
1023 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1024 {
1025 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1026 AddressSpaceDispatch *d = as->next_dispatch;
1027 MemoryRegionSection now = *section, remain = *section;
1028 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1029
1030 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1031 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1032 - now.offset_within_address_space;
1033
1034 now.size = int128_min(int128_make64(left), now.size);
1035 register_subpage(d, &now);
1036 } else {
1037 now.size = int128_zero();
1038 }
1039 while (int128_ne(remain.size, now.size)) {
1040 remain.size = int128_sub(remain.size, now.size);
1041 remain.offset_within_address_space += int128_get64(now.size);
1042 remain.offset_within_region += int128_get64(now.size);
1043 now = remain;
1044 if (int128_lt(remain.size, page_size)) {
1045 register_subpage(d, &now);
1046 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1047 now.size = page_size;
1048 register_subpage(d, &now);
1049 } else {
1050 now.size = int128_and(now.size, int128_neg(page_size));
1051 register_multipage(d, &now);
1052 }
1053 }
1054 }
1055
1056 void qemu_flush_coalesced_mmio_buffer(void)
1057 {
1058 if (kvm_enabled())
1059 kvm_flush_coalesced_mmio_buffer();
1060 }
1061
1062 void qemu_mutex_lock_ramlist(void)
1063 {
1064 qemu_mutex_lock(&ram_list.mutex);
1065 }
1066
1067 void qemu_mutex_unlock_ramlist(void)
1068 {
1069 qemu_mutex_unlock(&ram_list.mutex);
1070 }
1071
1072 #ifdef __linux__
1073
1074 #include <sys/vfs.h>
1075
1076 #define HUGETLBFS_MAGIC 0x958458f6
1077
1078 static long gethugepagesize(const char *path, Error **errp)
1079 {
1080 struct statfs fs;
1081 int ret;
1082
1083 do {
1084 ret = statfs(path, &fs);
1085 } while (ret != 0 && errno == EINTR);
1086
1087 if (ret != 0) {
1088 error_setg_errno(errp, errno, "failed to get page size of file %s",
1089 path);
1090 return 0;
1091 }
1092
1093 if (fs.f_type != HUGETLBFS_MAGIC)
1094 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1095
1096 return fs.f_bsize;
1097 }
1098
1099 static void *file_ram_alloc(RAMBlock *block,
1100 ram_addr_t memory,
1101 const char *path,
1102 Error **errp)
1103 {
1104 char *filename;
1105 char *sanitized_name;
1106 char *c;
1107 void *area = NULL;
1108 int fd;
1109 uint64_t hpagesize;
1110 Error *local_err = NULL;
1111
1112 hpagesize = gethugepagesize(path, &local_err);
1113 if (local_err) {
1114 error_propagate(errp, local_err);
1115 goto error;
1116 }
1117 block->mr->align = hpagesize;
1118
1119 if (memory < hpagesize) {
1120 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1121 "or larger than huge page size 0x%" PRIx64,
1122 memory, hpagesize);
1123 goto error;
1124 }
1125
1126 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1127 error_setg(errp,
1128 "host lacks kvm mmu notifiers, -mem-path unsupported");
1129 goto error;
1130 }
1131
1132 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1133 sanitized_name = g_strdup(memory_region_name(block->mr));
1134 for (c = sanitized_name; *c != '\0'; c++) {
1135 if (*c == '/')
1136 *c = '_';
1137 }
1138
1139 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1140 sanitized_name);
1141 g_free(sanitized_name);
1142
1143 fd = mkstemp(filename);
1144 if (fd < 0) {
1145 error_setg_errno(errp, errno,
1146 "unable to create backing store for hugepages");
1147 g_free(filename);
1148 goto error;
1149 }
1150 unlink(filename);
1151 g_free(filename);
1152
1153 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1154
1155 /*
1156 * ftruncate is not supported by hugetlbfs in older
1157 * hosts, so don't bother bailing out on errors.
1158 * If anything goes wrong with it under other filesystems,
1159 * mmap will fail.
1160 */
1161 if (ftruncate(fd, memory)) {
1162 perror("ftruncate");
1163 }
1164
1165 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1166 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1167 fd, 0);
1168 if (area == MAP_FAILED) {
1169 error_setg_errno(errp, errno,
1170 "unable to map backing store for hugepages");
1171 close(fd);
1172 goto error;
1173 }
1174
1175 if (mem_prealloc) {
1176 os_mem_prealloc(fd, area, memory);
1177 }
1178
1179 block->fd = fd;
1180 return area;
1181
1182 error:
1183 if (mem_prealloc) {
1184 error_report("%s", error_get_pretty(*errp));
1185 exit(1);
1186 }
1187 return NULL;
1188 }
1189 #endif
1190
1191 /* Called with the ramlist lock held. */
1192 static ram_addr_t find_ram_offset(ram_addr_t size)
1193 {
1194 RAMBlock *block, *next_block;
1195 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1196
1197 assert(size != 0); /* it would hand out same offset multiple times */
1198
1199 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1200 return 0;
1201 }
1202
1203 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1204 ram_addr_t end, next = RAM_ADDR_MAX;
1205
1206 end = block->offset + block->max_length;
1207
1208 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1209 if (next_block->offset >= end) {
1210 next = MIN(next, next_block->offset);
1211 }
1212 }
1213 if (next - end >= size && next - end < mingap) {
1214 offset = end;
1215 mingap = next - end;
1216 }
1217 }
1218
1219 if (offset == RAM_ADDR_MAX) {
1220 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1221 (uint64_t)size);
1222 abort();
1223 }
1224
1225 return offset;
1226 }
1227
1228 ram_addr_t last_ram_offset(void)
1229 {
1230 RAMBlock *block;
1231 ram_addr_t last = 0;
1232
1233 rcu_read_lock();
1234 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1235 last = MAX(last, block->offset + block->max_length);
1236 }
1237 rcu_read_unlock();
1238 return last;
1239 }
1240
1241 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1242 {
1243 int ret;
1244
1245 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1246 if (!machine_dump_guest_core(current_machine)) {
1247 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1248 if (ret) {
1249 perror("qemu_madvise");
1250 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1251 "but dump_guest_core=off specified\n");
1252 }
1253 }
1254 }
1255
1256 /* Called within an RCU critical section, or while the ramlist lock
1257 * is held.
1258 */
1259 static RAMBlock *find_ram_block(ram_addr_t addr)
1260 {
1261 RAMBlock *block;
1262
1263 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1264 if (block->offset == addr) {
1265 return block;
1266 }
1267 }
1268
1269 return NULL;
1270 }
1271
1272 /* Called with iothread lock held. */
1273 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1274 {
1275 RAMBlock *new_block, *block;
1276
1277 rcu_read_lock();
1278 new_block = find_ram_block(addr);
1279 assert(new_block);
1280 assert(!new_block->idstr[0]);
1281
1282 if (dev) {
1283 char *id = qdev_get_dev_path(dev);
1284 if (id) {
1285 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1286 g_free(id);
1287 }
1288 }
1289 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1290
1291 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1292 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1293 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1294 new_block->idstr);
1295 abort();
1296 }
1297 }
1298 rcu_read_unlock();
1299 }
1300
1301 /* Called with iothread lock held. */
1302 void qemu_ram_unset_idstr(ram_addr_t addr)
1303 {
1304 RAMBlock *block;
1305
1306 /* FIXME: arch_init.c assumes that this is not called throughout
1307 * migration. Ignore the problem since hot-unplug during migration
1308 * does not work anyway.
1309 */
1310
1311 rcu_read_lock();
1312 block = find_ram_block(addr);
1313 if (block) {
1314 memset(block->idstr, 0, sizeof(block->idstr));
1315 }
1316 rcu_read_unlock();
1317 }
1318
1319 static int memory_try_enable_merging(void *addr, size_t len)
1320 {
1321 if (!machine_mem_merge(current_machine)) {
1322 /* disabled by the user */
1323 return 0;
1324 }
1325
1326 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1327 }
1328
1329 /* Only legal before guest might have detected the memory size: e.g. on
1330 * incoming migration, or right after reset.
1331 *
1332 * As memory core doesn't know how is memory accessed, it is up to
1333 * resize callback to update device state and/or add assertions to detect
1334 * misuse, if necessary.
1335 */
1336 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1337 {
1338 RAMBlock *block = find_ram_block(base);
1339
1340 assert(block);
1341
1342 newsize = TARGET_PAGE_ALIGN(newsize);
1343
1344 if (block->used_length == newsize) {
1345 return 0;
1346 }
1347
1348 if (!(block->flags & RAM_RESIZEABLE)) {
1349 error_setg_errno(errp, EINVAL,
1350 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1351 " in != 0x" RAM_ADDR_FMT, block->idstr,
1352 newsize, block->used_length);
1353 return -EINVAL;
1354 }
1355
1356 if (block->max_length < newsize) {
1357 error_setg_errno(errp, EINVAL,
1358 "Length too large: %s: 0x" RAM_ADDR_FMT
1359 " > 0x" RAM_ADDR_FMT, block->idstr,
1360 newsize, block->max_length);
1361 return -EINVAL;
1362 }
1363
1364 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1365 block->used_length = newsize;
1366 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1367 memory_region_set_size(block->mr, newsize);
1368 if (block->resized) {
1369 block->resized(block->idstr, newsize, block->host);
1370 }
1371 return 0;
1372 }
1373
1374 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1375 {
1376 RAMBlock *block;
1377 RAMBlock *last_block = NULL;
1378 ram_addr_t old_ram_size, new_ram_size;
1379
1380 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1381
1382 qemu_mutex_lock_ramlist();
1383 new_block->offset = find_ram_offset(new_block->max_length);
1384
1385 if (!new_block->host) {
1386 if (xen_enabled()) {
1387 xen_ram_alloc(new_block->offset, new_block->max_length,
1388 new_block->mr);
1389 } else {
1390 new_block->host = phys_mem_alloc(new_block->max_length,
1391 &new_block->mr->align);
1392 if (!new_block->host) {
1393 error_setg_errno(errp, errno,
1394 "cannot set up guest memory '%s'",
1395 memory_region_name(new_block->mr));
1396 qemu_mutex_unlock_ramlist();
1397 return -1;
1398 }
1399 memory_try_enable_merging(new_block->host, new_block->max_length);
1400 }
1401 }
1402
1403 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1404 * QLIST (which has an RCU-friendly variant) does not have insertion at
1405 * tail, so save the last element in last_block.
1406 */
1407 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1408 last_block = block;
1409 if (block->max_length < new_block->max_length) {
1410 break;
1411 }
1412 }
1413 if (block) {
1414 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1415 } else if (last_block) {
1416 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1417 } else { /* list is empty */
1418 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1419 }
1420 ram_list.mru_block = NULL;
1421
1422 /* Write list before version */
1423 smp_wmb();
1424 ram_list.version++;
1425 qemu_mutex_unlock_ramlist();
1426
1427 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1428
1429 if (new_ram_size > old_ram_size) {
1430 int i;
1431
1432 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1433 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1434 ram_list.dirty_memory[i] =
1435 bitmap_zero_extend(ram_list.dirty_memory[i],
1436 old_ram_size, new_ram_size);
1437 }
1438 }
1439 cpu_physical_memory_set_dirty_range(new_block->offset,
1440 new_block->used_length);
1441
1442 if (new_block->host) {
1443 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1444 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1445 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1446 if (kvm_enabled()) {
1447 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1448 }
1449 }
1450
1451 return new_block->offset;
1452 }
1453
1454 #ifdef __linux__
1455 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1456 bool share, const char *mem_path,
1457 Error **errp)
1458 {
1459 RAMBlock *new_block;
1460 ram_addr_t addr;
1461 Error *local_err = NULL;
1462
1463 if (xen_enabled()) {
1464 error_setg(errp, "-mem-path not supported with Xen");
1465 return -1;
1466 }
1467
1468 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1469 /*
1470 * file_ram_alloc() needs to allocate just like
1471 * phys_mem_alloc, but we haven't bothered to provide
1472 * a hook there.
1473 */
1474 error_setg(errp,
1475 "-mem-path not supported with this accelerator");
1476 return -1;
1477 }
1478
1479 size = TARGET_PAGE_ALIGN(size);
1480 new_block = g_malloc0(sizeof(*new_block));
1481 new_block->mr = mr;
1482 new_block->used_length = size;
1483 new_block->max_length = size;
1484 new_block->flags = share ? RAM_SHARED : 0;
1485 new_block->host = file_ram_alloc(new_block, size,
1486 mem_path, errp);
1487 if (!new_block->host) {
1488 g_free(new_block);
1489 return -1;
1490 }
1491
1492 addr = ram_block_add(new_block, &local_err);
1493 if (local_err) {
1494 g_free(new_block);
1495 error_propagate(errp, local_err);
1496 return -1;
1497 }
1498 return addr;
1499 }
1500 #endif
1501
1502 static
1503 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1504 void (*resized)(const char*,
1505 uint64_t length,
1506 void *host),
1507 void *host, bool resizeable,
1508 MemoryRegion *mr, Error **errp)
1509 {
1510 RAMBlock *new_block;
1511 ram_addr_t addr;
1512 Error *local_err = NULL;
1513
1514 size = TARGET_PAGE_ALIGN(size);
1515 max_size = TARGET_PAGE_ALIGN(max_size);
1516 new_block = g_malloc0(sizeof(*new_block));
1517 new_block->mr = mr;
1518 new_block->resized = resized;
1519 new_block->used_length = size;
1520 new_block->max_length = max_size;
1521 assert(max_size >= size);
1522 new_block->fd = -1;
1523 new_block->host = host;
1524 if (host) {
1525 new_block->flags |= RAM_PREALLOC;
1526 }
1527 if (resizeable) {
1528 new_block->flags |= RAM_RESIZEABLE;
1529 }
1530 addr = ram_block_add(new_block, &local_err);
1531 if (local_err) {
1532 g_free(new_block);
1533 error_propagate(errp, local_err);
1534 return -1;
1535 }
1536 return addr;
1537 }
1538
1539 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1540 MemoryRegion *mr, Error **errp)
1541 {
1542 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1543 }
1544
1545 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1546 {
1547 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1548 }
1549
1550 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1551 void (*resized)(const char*,
1552 uint64_t length,
1553 void *host),
1554 MemoryRegion *mr, Error **errp)
1555 {
1556 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1557 }
1558
1559 void qemu_ram_free_from_ptr(ram_addr_t addr)
1560 {
1561 RAMBlock *block;
1562
1563 qemu_mutex_lock_ramlist();
1564 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1565 if (addr == block->offset) {
1566 QLIST_REMOVE_RCU(block, next);
1567 ram_list.mru_block = NULL;
1568 /* Write list before version */
1569 smp_wmb();
1570 ram_list.version++;
1571 g_free_rcu(block, rcu);
1572 break;
1573 }
1574 }
1575 qemu_mutex_unlock_ramlist();
1576 }
1577
1578 static void reclaim_ramblock(RAMBlock *block)
1579 {
1580 if (block->flags & RAM_PREALLOC) {
1581 ;
1582 } else if (xen_enabled()) {
1583 xen_invalidate_map_cache_entry(block->host);
1584 #ifndef _WIN32
1585 } else if (block->fd >= 0) {
1586 munmap(block->host, block->max_length);
1587 close(block->fd);
1588 #endif
1589 } else {
1590 qemu_anon_ram_free(block->host, block->max_length);
1591 }
1592 g_free(block);
1593 }
1594
1595 void qemu_ram_free(ram_addr_t addr)
1596 {
1597 RAMBlock *block;
1598
1599 qemu_mutex_lock_ramlist();
1600 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1601 if (addr == block->offset) {
1602 QLIST_REMOVE_RCU(block, next);
1603 ram_list.mru_block = NULL;
1604 /* Write list before version */
1605 smp_wmb();
1606 ram_list.version++;
1607 call_rcu(block, reclaim_ramblock, rcu);
1608 break;
1609 }
1610 }
1611 qemu_mutex_unlock_ramlist();
1612 }
1613
1614 #ifndef _WIN32
1615 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1616 {
1617 RAMBlock *block;
1618 ram_addr_t offset;
1619 int flags;
1620 void *area, *vaddr;
1621
1622 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1623 offset = addr - block->offset;
1624 if (offset < block->max_length) {
1625 vaddr = ramblock_ptr(block, offset);
1626 if (block->flags & RAM_PREALLOC) {
1627 ;
1628 } else if (xen_enabled()) {
1629 abort();
1630 } else {
1631 flags = MAP_FIXED;
1632 if (block->fd >= 0) {
1633 flags |= (block->flags & RAM_SHARED ?
1634 MAP_SHARED : MAP_PRIVATE);
1635 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1636 flags, block->fd, offset);
1637 } else {
1638 /*
1639 * Remap needs to match alloc. Accelerators that
1640 * set phys_mem_alloc never remap. If they did,
1641 * we'd need a remap hook here.
1642 */
1643 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1644
1645 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1646 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1647 flags, -1, 0);
1648 }
1649 if (area != vaddr) {
1650 fprintf(stderr, "Could not remap addr: "
1651 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1652 length, addr);
1653 exit(1);
1654 }
1655 memory_try_enable_merging(vaddr, length);
1656 qemu_ram_setup_dump(vaddr, length);
1657 }
1658 }
1659 }
1660 }
1661 #endif /* !_WIN32 */
1662
1663 int qemu_get_ram_fd(ram_addr_t addr)
1664 {
1665 RAMBlock *block;
1666 int fd;
1667
1668 rcu_read_lock();
1669 block = qemu_get_ram_block(addr);
1670 fd = block->fd;
1671 rcu_read_unlock();
1672 return fd;
1673 }
1674
1675 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1676 {
1677 RAMBlock *block;
1678 void *ptr;
1679
1680 rcu_read_lock();
1681 block = qemu_get_ram_block(addr);
1682 ptr = ramblock_ptr(block, 0);
1683 rcu_read_unlock();
1684 return ptr;
1685 }
1686
1687 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1688 * This should not be used for general purpose DMA. Use address_space_map
1689 * or address_space_rw instead. For local memory (e.g. video ram) that the
1690 * device owns, use memory_region_get_ram_ptr.
1691 *
1692 * By the time this function returns, the returned pointer is not protected
1693 * by RCU anymore. If the caller is not within an RCU critical section and
1694 * does not hold the iothread lock, it must have other means of protecting the
1695 * pointer, such as a reference to the region that includes the incoming
1696 * ram_addr_t.
1697 */
1698 void *qemu_get_ram_ptr(ram_addr_t addr)
1699 {
1700 RAMBlock *block;
1701 void *ptr;
1702
1703 rcu_read_lock();
1704 block = qemu_get_ram_block(addr);
1705
1706 if (xen_enabled() && block->host == NULL) {
1707 /* We need to check if the requested address is in the RAM
1708 * because we don't want to map the entire memory in QEMU.
1709 * In that case just map until the end of the page.
1710 */
1711 if (block->offset == 0) {
1712 ptr = xen_map_cache(addr, 0, 0);
1713 goto unlock;
1714 }
1715
1716 block->host = xen_map_cache(block->offset, block->max_length, 1);
1717 }
1718 ptr = ramblock_ptr(block, addr - block->offset);
1719
1720 unlock:
1721 rcu_read_unlock();
1722 return ptr;
1723 }
1724
1725 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1726 * but takes a size argument.
1727 *
1728 * By the time this function returns, the returned pointer is not protected
1729 * by RCU anymore. If the caller is not within an RCU critical section and
1730 * does not hold the iothread lock, it must have other means of protecting the
1731 * pointer, such as a reference to the region that includes the incoming
1732 * ram_addr_t.
1733 */
1734 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1735 {
1736 void *ptr;
1737 if (*size == 0) {
1738 return NULL;
1739 }
1740 if (xen_enabled()) {
1741 return xen_map_cache(addr, *size, 1);
1742 } else {
1743 RAMBlock *block;
1744 rcu_read_lock();
1745 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1746 if (addr - block->offset < block->max_length) {
1747 if (addr - block->offset + *size > block->max_length)
1748 *size = block->max_length - addr + block->offset;
1749 ptr = ramblock_ptr(block, addr - block->offset);
1750 rcu_read_unlock();
1751 return ptr;
1752 }
1753 }
1754
1755 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1756 abort();
1757 }
1758 }
1759
1760 /* Some of the softmmu routines need to translate from a host pointer
1761 * (typically a TLB entry) back to a ram offset.
1762 *
1763 * By the time this function returns, the returned pointer is not protected
1764 * by RCU anymore. If the caller is not within an RCU critical section and
1765 * does not hold the iothread lock, it must have other means of protecting the
1766 * pointer, such as a reference to the region that includes the incoming
1767 * ram_addr_t.
1768 */
1769 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1770 {
1771 RAMBlock *block;
1772 uint8_t *host = ptr;
1773 MemoryRegion *mr;
1774
1775 if (xen_enabled()) {
1776 rcu_read_lock();
1777 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1778 mr = qemu_get_ram_block(*ram_addr)->mr;
1779 rcu_read_unlock();
1780 return mr;
1781 }
1782
1783 rcu_read_lock();
1784 block = atomic_rcu_read(&ram_list.mru_block);
1785 if (block && block->host && host - block->host < block->max_length) {
1786 goto found;
1787 }
1788
1789 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1790 /* This case append when the block is not mapped. */
1791 if (block->host == NULL) {
1792 continue;
1793 }
1794 if (host - block->host < block->max_length) {
1795 goto found;
1796 }
1797 }
1798
1799 rcu_read_unlock();
1800 return NULL;
1801
1802 found:
1803 *ram_addr = block->offset + (host - block->host);
1804 mr = block->mr;
1805 rcu_read_unlock();
1806 return mr;
1807 }
1808
1809 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1810 uint64_t val, unsigned size)
1811 {
1812 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1813 tb_invalidate_phys_page_fast(ram_addr, size);
1814 }
1815 switch (size) {
1816 case 1:
1817 stb_p(qemu_get_ram_ptr(ram_addr), val);
1818 break;
1819 case 2:
1820 stw_p(qemu_get_ram_ptr(ram_addr), val);
1821 break;
1822 case 4:
1823 stl_p(qemu_get_ram_ptr(ram_addr), val);
1824 break;
1825 default:
1826 abort();
1827 }
1828 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1829 /* we remove the notdirty callback only if the code has been
1830 flushed */
1831 if (!cpu_physical_memory_is_clean(ram_addr)) {
1832 CPUArchState *env = current_cpu->env_ptr;
1833 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1834 }
1835 }
1836
1837 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1838 unsigned size, bool is_write)
1839 {
1840 return is_write;
1841 }
1842
1843 static const MemoryRegionOps notdirty_mem_ops = {
1844 .write = notdirty_mem_write,
1845 .valid.accepts = notdirty_mem_accepts,
1846 .endianness = DEVICE_NATIVE_ENDIAN,
1847 };
1848
1849 /* Generate a debug exception if a watchpoint has been hit. */
1850 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1851 {
1852 CPUState *cpu = current_cpu;
1853 CPUArchState *env = cpu->env_ptr;
1854 target_ulong pc, cs_base;
1855 target_ulong vaddr;
1856 CPUWatchpoint *wp;
1857 int cpu_flags;
1858
1859 if (cpu->watchpoint_hit) {
1860 /* We re-entered the check after replacing the TB. Now raise
1861 * the debug interrupt so that is will trigger after the
1862 * current instruction. */
1863 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1864 return;
1865 }
1866 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1867 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1868 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1869 && (wp->flags & flags)) {
1870 if (flags == BP_MEM_READ) {
1871 wp->flags |= BP_WATCHPOINT_HIT_READ;
1872 } else {
1873 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1874 }
1875 wp->hitaddr = vaddr;
1876 wp->hitattrs = attrs;
1877 if (!cpu->watchpoint_hit) {
1878 cpu->watchpoint_hit = wp;
1879 tb_check_watchpoint(cpu);
1880 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1881 cpu->exception_index = EXCP_DEBUG;
1882 cpu_loop_exit(cpu);
1883 } else {
1884 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1885 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1886 cpu_resume_from_signal(cpu, NULL);
1887 }
1888 }
1889 } else {
1890 wp->flags &= ~BP_WATCHPOINT_HIT;
1891 }
1892 }
1893 }
1894
1895 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1896 so these check for a hit then pass through to the normal out-of-line
1897 phys routines. */
1898 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1899 unsigned size, MemTxAttrs attrs)
1900 {
1901 MemTxResult res;
1902 uint64_t data;
1903
1904 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1905 switch (size) {
1906 case 1:
1907 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1908 break;
1909 case 2:
1910 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1911 break;
1912 case 4:
1913 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1914 break;
1915 default: abort();
1916 }
1917 *pdata = data;
1918 return res;
1919 }
1920
1921 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1922 uint64_t val, unsigned size,
1923 MemTxAttrs attrs)
1924 {
1925 MemTxResult res;
1926
1927 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1928 switch (size) {
1929 case 1:
1930 address_space_stb(&address_space_memory, addr, val, attrs, &res);
1931 break;
1932 case 2:
1933 address_space_stw(&address_space_memory, addr, val, attrs, &res);
1934 break;
1935 case 4:
1936 address_space_stl(&address_space_memory, addr, val, attrs, &res);
1937 break;
1938 default: abort();
1939 }
1940 return res;
1941 }
1942
1943 static const MemoryRegionOps watch_mem_ops = {
1944 .read_with_attrs = watch_mem_read,
1945 .write_with_attrs = watch_mem_write,
1946 .endianness = DEVICE_NATIVE_ENDIAN,
1947 };
1948
1949 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1950 unsigned len, MemTxAttrs attrs)
1951 {
1952 subpage_t *subpage = opaque;
1953 uint8_t buf[8];
1954 MemTxResult res;
1955
1956 #if defined(DEBUG_SUBPAGE)
1957 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1958 subpage, len, addr);
1959 #endif
1960 res = address_space_read(subpage->as, addr + subpage->base,
1961 attrs, buf, len);
1962 if (res) {
1963 return res;
1964 }
1965 switch (len) {
1966 case 1:
1967 *data = ldub_p(buf);
1968 return MEMTX_OK;
1969 case 2:
1970 *data = lduw_p(buf);
1971 return MEMTX_OK;
1972 case 4:
1973 *data = ldl_p(buf);
1974 return MEMTX_OK;
1975 case 8:
1976 *data = ldq_p(buf);
1977 return MEMTX_OK;
1978 default:
1979 abort();
1980 }
1981 }
1982
1983 static MemTxResult subpage_write(void *opaque, hwaddr addr,
1984 uint64_t value, unsigned len, MemTxAttrs attrs)
1985 {
1986 subpage_t *subpage = opaque;
1987 uint8_t buf[8];
1988
1989 #if defined(DEBUG_SUBPAGE)
1990 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1991 " value %"PRIx64"\n",
1992 __func__, subpage, len, addr, value);
1993 #endif
1994 switch (len) {
1995 case 1:
1996 stb_p(buf, value);
1997 break;
1998 case 2:
1999 stw_p(buf, value);
2000 break;
2001 case 4:
2002 stl_p(buf, value);
2003 break;
2004 case 8:
2005 stq_p(buf, value);
2006 break;
2007 default:
2008 abort();
2009 }
2010 return address_space_write(subpage->as, addr + subpage->base,
2011 attrs, buf, len);
2012 }
2013
2014 static bool subpage_accepts(void *opaque, hwaddr addr,
2015 unsigned len, bool is_write)
2016 {
2017 subpage_t *subpage = opaque;
2018 #if defined(DEBUG_SUBPAGE)
2019 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2020 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2021 #endif
2022
2023 return address_space_access_valid(subpage->as, addr + subpage->base,
2024 len, is_write);
2025 }
2026
2027 static const MemoryRegionOps subpage_ops = {
2028 .read_with_attrs = subpage_read,
2029 .write_with_attrs = subpage_write,
2030 .impl.min_access_size = 1,
2031 .impl.max_access_size = 8,
2032 .valid.min_access_size = 1,
2033 .valid.max_access_size = 8,
2034 .valid.accepts = subpage_accepts,
2035 .endianness = DEVICE_NATIVE_ENDIAN,
2036 };
2037
2038 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2039 uint16_t section)
2040 {
2041 int idx, eidx;
2042
2043 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2044 return -1;
2045 idx = SUBPAGE_IDX(start);
2046 eidx = SUBPAGE_IDX(end);
2047 #if defined(DEBUG_SUBPAGE)
2048 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2049 __func__, mmio, start, end, idx, eidx, section);
2050 #endif
2051 for (; idx <= eidx; idx++) {
2052 mmio->sub_section[idx] = section;
2053 }
2054
2055 return 0;
2056 }
2057
2058 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2059 {
2060 subpage_t *mmio;
2061
2062 mmio = g_malloc0(sizeof(subpage_t));
2063
2064 mmio->as = as;
2065 mmio->base = base;
2066 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2067 NULL, TARGET_PAGE_SIZE);
2068 mmio->iomem.subpage = true;
2069 #if defined(DEBUG_SUBPAGE)
2070 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2071 mmio, base, TARGET_PAGE_SIZE);
2072 #endif
2073 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2074
2075 return mmio;
2076 }
2077
2078 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2079 MemoryRegion *mr)
2080 {
2081 assert(as);
2082 MemoryRegionSection section = {
2083 .address_space = as,
2084 .mr = mr,
2085 .offset_within_address_space = 0,
2086 .offset_within_region = 0,
2087 .size = int128_2_64(),
2088 };
2089
2090 return phys_section_add(map, &section);
2091 }
2092
2093 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2094 {
2095 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2096 MemoryRegionSection *sections = d->map.sections;
2097
2098 return sections[index & ~TARGET_PAGE_MASK].mr;
2099 }
2100
2101 static void io_mem_init(void)
2102 {
2103 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2104 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2105 NULL, UINT64_MAX);
2106 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2107 NULL, UINT64_MAX);
2108 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2109 NULL, UINT64_MAX);
2110 }
2111
2112 static void mem_begin(MemoryListener *listener)
2113 {
2114 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2115 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2116 uint16_t n;
2117
2118 n = dummy_section(&d->map, as, &io_mem_unassigned);
2119 assert(n == PHYS_SECTION_UNASSIGNED);
2120 n = dummy_section(&d->map, as, &io_mem_notdirty);
2121 assert(n == PHYS_SECTION_NOTDIRTY);
2122 n = dummy_section(&d->map, as, &io_mem_rom);
2123 assert(n == PHYS_SECTION_ROM);
2124 n = dummy_section(&d->map, as, &io_mem_watch);
2125 assert(n == PHYS_SECTION_WATCH);
2126
2127 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2128 d->as = as;
2129 as->next_dispatch = d;
2130 }
2131
2132 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2133 {
2134 phys_sections_free(&d->map);
2135 g_free(d);
2136 }
2137
2138 static void mem_commit(MemoryListener *listener)
2139 {
2140 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2141 AddressSpaceDispatch *cur = as->dispatch;
2142 AddressSpaceDispatch *next = as->next_dispatch;
2143
2144 phys_page_compact_all(next, next->map.nodes_nb);
2145
2146 atomic_rcu_set(&as->dispatch, next);
2147 if (cur) {
2148 call_rcu(cur, address_space_dispatch_free, rcu);
2149 }
2150 }
2151
2152 static void tcg_commit(MemoryListener *listener)
2153 {
2154 CPUState *cpu;
2155
2156 /* since each CPU stores ram addresses in its TLB cache, we must
2157 reset the modified entries */
2158 /* XXX: slow ! */
2159 CPU_FOREACH(cpu) {
2160 /* FIXME: Disentangle the cpu.h circular files deps so we can
2161 directly get the right CPU from listener. */
2162 if (cpu->tcg_as_listener != listener) {
2163 continue;
2164 }
2165 cpu_reload_memory_map(cpu);
2166 }
2167 }
2168
2169 static void core_log_global_start(MemoryListener *listener)
2170 {
2171 cpu_physical_memory_set_dirty_tracking(true);
2172 }
2173
2174 static void core_log_global_stop(MemoryListener *listener)
2175 {
2176 cpu_physical_memory_set_dirty_tracking(false);
2177 }
2178
2179 static MemoryListener core_memory_listener = {
2180 .log_global_start = core_log_global_start,
2181 .log_global_stop = core_log_global_stop,
2182 .priority = 1,
2183 };
2184
2185 void address_space_init_dispatch(AddressSpace *as)
2186 {
2187 as->dispatch = NULL;
2188 as->dispatch_listener = (MemoryListener) {
2189 .begin = mem_begin,
2190 .commit = mem_commit,
2191 .region_add = mem_add,
2192 .region_nop = mem_add,
2193 .priority = 0,
2194 };
2195 memory_listener_register(&as->dispatch_listener, as);
2196 }
2197
2198 void address_space_unregister(AddressSpace *as)
2199 {
2200 memory_listener_unregister(&as->dispatch_listener);
2201 }
2202
2203 void address_space_destroy_dispatch(AddressSpace *as)
2204 {
2205 AddressSpaceDispatch *d = as->dispatch;
2206
2207 atomic_rcu_set(&as->dispatch, NULL);
2208 if (d) {
2209 call_rcu(d, address_space_dispatch_free, rcu);
2210 }
2211 }
2212
2213 static void memory_map_init(void)
2214 {
2215 system_memory = g_malloc(sizeof(*system_memory));
2216
2217 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2218 address_space_init(&address_space_memory, system_memory, "memory");
2219
2220 system_io = g_malloc(sizeof(*system_io));
2221 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2222 65536);
2223 address_space_init(&address_space_io, system_io, "I/O");
2224
2225 memory_listener_register(&core_memory_listener, &address_space_memory);
2226 }
2227
2228 MemoryRegion *get_system_memory(void)
2229 {
2230 return system_memory;
2231 }
2232
2233 MemoryRegion *get_system_io(void)
2234 {
2235 return system_io;
2236 }
2237
2238 #endif /* !defined(CONFIG_USER_ONLY) */
2239
2240 /* physical memory access (slow version, mainly for debug) */
2241 #if defined(CONFIG_USER_ONLY)
2242 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2243 uint8_t *buf, int len, int is_write)
2244 {
2245 int l, flags;
2246 target_ulong page;
2247 void * p;
2248
2249 while (len > 0) {
2250 page = addr & TARGET_PAGE_MASK;
2251 l = (page + TARGET_PAGE_SIZE) - addr;
2252 if (l > len)
2253 l = len;
2254 flags = page_get_flags(page);
2255 if (!(flags & PAGE_VALID))
2256 return -1;
2257 if (is_write) {
2258 if (!(flags & PAGE_WRITE))
2259 return -1;
2260 /* XXX: this code should not depend on lock_user */
2261 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2262 return -1;
2263 memcpy(p, buf, l);
2264 unlock_user(p, addr, l);
2265 } else {
2266 if (!(flags & PAGE_READ))
2267 return -1;
2268 /* XXX: this code should not depend on lock_user */
2269 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2270 return -1;
2271 memcpy(buf, p, l);
2272 unlock_user(p, addr, 0);
2273 }
2274 len -= l;
2275 buf += l;
2276 addr += l;
2277 }
2278 return 0;
2279 }
2280
2281 #else
2282
2283 static void invalidate_and_set_dirty(hwaddr addr,
2284 hwaddr length)
2285 {
2286 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2287 tb_invalidate_phys_range(addr, addr + length, 0);
2288 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2289 }
2290 xen_modified_memory(addr, length);
2291 }
2292
2293 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2294 {
2295 unsigned access_size_max = mr->ops->valid.max_access_size;
2296
2297 /* Regions are assumed to support 1-4 byte accesses unless
2298 otherwise specified. */
2299 if (access_size_max == 0) {
2300 access_size_max = 4;
2301 }
2302
2303 /* Bound the maximum access by the alignment of the address. */
2304 if (!mr->ops->impl.unaligned) {
2305 unsigned align_size_max = addr & -addr;
2306 if (align_size_max != 0 && align_size_max < access_size_max) {
2307 access_size_max = align_size_max;
2308 }
2309 }
2310
2311 /* Don't attempt accesses larger than the maximum. */
2312 if (l > access_size_max) {
2313 l = access_size_max;
2314 }
2315 if (l & (l - 1)) {
2316 l = 1 << (qemu_fls(l) - 1);
2317 }
2318
2319 return l;
2320 }
2321
2322 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2323 uint8_t *buf, int len, bool is_write)
2324 {
2325 hwaddr l;
2326 uint8_t *ptr;
2327 uint64_t val;
2328 hwaddr addr1;
2329 MemoryRegion *mr;
2330 MemTxResult result = MEMTX_OK;
2331
2332 while (len > 0) {
2333 l = len;
2334 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2335
2336 if (is_write) {
2337 if (!memory_access_is_direct(mr, is_write)) {
2338 l = memory_access_size(mr, l, addr1);
2339 /* XXX: could force current_cpu to NULL to avoid
2340 potential bugs */
2341 switch (l) {
2342 case 8:
2343 /* 64 bit write access */
2344 val = ldq_p(buf);
2345 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2346 attrs);
2347 break;
2348 case 4:
2349 /* 32 bit write access */
2350 val = ldl_p(buf);
2351 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2352 attrs);
2353 break;
2354 case 2:
2355 /* 16 bit write access */
2356 val = lduw_p(buf);
2357 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2358 attrs);
2359 break;
2360 case 1:
2361 /* 8 bit write access */
2362 val = ldub_p(buf);
2363 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2364 attrs);
2365 break;
2366 default:
2367 abort();
2368 }
2369 } else {
2370 addr1 += memory_region_get_ram_addr(mr);
2371 /* RAM case */
2372 ptr = qemu_get_ram_ptr(addr1);
2373 memcpy(ptr, buf, l);
2374 invalidate_and_set_dirty(addr1, l);
2375 }
2376 } else {
2377 if (!memory_access_is_direct(mr, is_write)) {
2378 /* I/O case */
2379 l = memory_access_size(mr, l, addr1);
2380 switch (l) {
2381 case 8:
2382 /* 64 bit read access */
2383 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2384 attrs);
2385 stq_p(buf, val);
2386 break;
2387 case 4:
2388 /* 32 bit read access */
2389 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2390 attrs);
2391 stl_p(buf, val);
2392 break;
2393 case 2:
2394 /* 16 bit read access */
2395 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2396 attrs);
2397 stw_p(buf, val);
2398 break;
2399 case 1:
2400 /* 8 bit read access */
2401 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2402 attrs);
2403 stb_p(buf, val);
2404 break;
2405 default:
2406 abort();
2407 }
2408 } else {
2409 /* RAM case */
2410 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2411 memcpy(buf, ptr, l);
2412 }
2413 }
2414 len -= l;
2415 buf += l;
2416 addr += l;
2417 }
2418
2419 return result;
2420 }
2421
2422 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2423 const uint8_t *buf, int len)
2424 {
2425 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2426 }
2427
2428 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2429 uint8_t *buf, int len)
2430 {
2431 return address_space_rw(as, addr, attrs, buf, len, false);
2432 }
2433
2434
2435 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2436 int len, int is_write)
2437 {
2438 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2439 buf, len, is_write);
2440 }
2441
2442 enum write_rom_type {
2443 WRITE_DATA,
2444 FLUSH_CACHE,
2445 };
2446
2447 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2448 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2449 {
2450 hwaddr l;
2451 uint8_t *ptr;
2452 hwaddr addr1;
2453 MemoryRegion *mr;
2454
2455 while (len > 0) {
2456 l = len;
2457 mr = address_space_translate(as, addr, &addr1, &l, true);
2458
2459 if (!(memory_region_is_ram(mr) ||
2460 memory_region_is_romd(mr))) {
2461 /* do nothing */
2462 } else {
2463 addr1 += memory_region_get_ram_addr(mr);
2464 /* ROM/RAM case */
2465 ptr = qemu_get_ram_ptr(addr1);
2466 switch (type) {
2467 case WRITE_DATA:
2468 memcpy(ptr, buf, l);
2469 invalidate_and_set_dirty(addr1, l);
2470 break;
2471 case FLUSH_CACHE:
2472 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2473 break;
2474 }
2475 }
2476 len -= l;
2477 buf += l;
2478 addr += l;
2479 }
2480 }
2481
2482 /* used for ROM loading : can write in RAM and ROM */
2483 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2484 const uint8_t *buf, int len)
2485 {
2486 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2487 }
2488
2489 void cpu_flush_icache_range(hwaddr start, int len)
2490 {
2491 /*
2492 * This function should do the same thing as an icache flush that was
2493 * triggered from within the guest. For TCG we are always cache coherent,
2494 * so there is no need to flush anything. For KVM / Xen we need to flush
2495 * the host's instruction cache at least.
2496 */
2497 if (tcg_enabled()) {
2498 return;
2499 }
2500
2501 cpu_physical_memory_write_rom_internal(&address_space_memory,
2502 start, NULL, len, FLUSH_CACHE);
2503 }
2504
2505 typedef struct {
2506 MemoryRegion *mr;
2507 void *buffer;
2508 hwaddr addr;
2509 hwaddr len;
2510 bool in_use;
2511 } BounceBuffer;
2512
2513 static BounceBuffer bounce;
2514
2515 typedef struct MapClient {
2516 QEMUBH *bh;
2517 QLIST_ENTRY(MapClient) link;
2518 } MapClient;
2519
2520 QemuMutex map_client_list_lock;
2521 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2522 = QLIST_HEAD_INITIALIZER(map_client_list);
2523
2524 static void cpu_unregister_map_client_do(MapClient *client)
2525 {
2526 QLIST_REMOVE(client, link);
2527 g_free(client);
2528 }
2529
2530 static void cpu_notify_map_clients_locked(void)
2531 {
2532 MapClient *client;
2533
2534 while (!QLIST_EMPTY(&map_client_list)) {
2535 client = QLIST_FIRST(&map_client_list);
2536 qemu_bh_schedule(client->bh);
2537 cpu_unregister_map_client_do(client);
2538 }
2539 }
2540
2541 void cpu_register_map_client(QEMUBH *bh)
2542 {
2543 MapClient *client = g_malloc(sizeof(*client));
2544
2545 qemu_mutex_lock(&map_client_list_lock);
2546 client->bh = bh;
2547 QLIST_INSERT_HEAD(&map_client_list, client, link);
2548 if (!atomic_read(&bounce.in_use)) {
2549 cpu_notify_map_clients_locked();
2550 }
2551 qemu_mutex_unlock(&map_client_list_lock);
2552 }
2553
2554 void cpu_exec_init_all(void)
2555 {
2556 qemu_mutex_init(&ram_list.mutex);
2557 memory_map_init();
2558 io_mem_init();
2559 qemu_mutex_init(&map_client_list_lock);
2560 }
2561
2562 void cpu_unregister_map_client(QEMUBH *bh)
2563 {
2564 MapClient *client;
2565
2566 qemu_mutex_lock(&map_client_list_lock);
2567 QLIST_FOREACH(client, &map_client_list, link) {
2568 if (client->bh == bh) {
2569 cpu_unregister_map_client_do(client);
2570 break;
2571 }
2572 }
2573 qemu_mutex_unlock(&map_client_list_lock);
2574 }
2575
2576 static void cpu_notify_map_clients(void)
2577 {
2578 qemu_mutex_lock(&map_client_list_lock);
2579 cpu_notify_map_clients_locked();
2580 qemu_mutex_unlock(&map_client_list_lock);
2581 }
2582
2583 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2584 {
2585 MemoryRegion *mr;
2586 hwaddr l, xlat;
2587
2588 while (len > 0) {
2589 l = len;
2590 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2591 if (!memory_access_is_direct(mr, is_write)) {
2592 l = memory_access_size(mr, l, addr);
2593 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2594 return false;
2595 }
2596 }
2597
2598 len -= l;
2599 addr += l;
2600 }
2601 return true;
2602 }
2603
2604 /* Map a physical memory region into a host virtual address.
2605 * May map a subset of the requested range, given by and returned in *plen.
2606 * May return NULL if resources needed to perform the mapping are exhausted.
2607 * Use only for reads OR writes - not for read-modify-write operations.
2608 * Use cpu_register_map_client() to know when retrying the map operation is
2609 * likely to succeed.
2610 */
2611 void *address_space_map(AddressSpace *as,
2612 hwaddr addr,
2613 hwaddr *plen,
2614 bool is_write)
2615 {
2616 hwaddr len = *plen;
2617 hwaddr done = 0;
2618 hwaddr l, xlat, base;
2619 MemoryRegion *mr, *this_mr;
2620 ram_addr_t raddr;
2621
2622 if (len == 0) {
2623 return NULL;
2624 }
2625
2626 l = len;
2627 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2628 if (!memory_access_is_direct(mr, is_write)) {
2629 if (atomic_xchg(&bounce.in_use, true)) {
2630 return NULL;
2631 }
2632 /* Avoid unbounded allocations */
2633 l = MIN(l, TARGET_PAGE_SIZE);
2634 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2635 bounce.addr = addr;
2636 bounce.len = l;
2637
2638 memory_region_ref(mr);
2639 bounce.mr = mr;
2640 if (!is_write) {
2641 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2642 bounce.buffer, l);
2643 }
2644
2645 *plen = l;
2646 return bounce.buffer;
2647 }
2648
2649 base = xlat;
2650 raddr = memory_region_get_ram_addr(mr);
2651
2652 for (;;) {
2653 len -= l;
2654 addr += l;
2655 done += l;
2656 if (len == 0) {
2657 break;
2658 }
2659
2660 l = len;
2661 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2662 if (this_mr != mr || xlat != base + done) {
2663 break;
2664 }
2665 }
2666
2667 memory_region_ref(mr);
2668 *plen = done;
2669 return qemu_ram_ptr_length(raddr + base, plen);
2670 }
2671
2672 /* Unmaps a memory region previously mapped by address_space_map().
2673 * Will also mark the memory as dirty if is_write == 1. access_len gives
2674 * the amount of memory that was actually read or written by the caller.
2675 */
2676 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2677 int is_write, hwaddr access_len)
2678 {
2679 if (buffer != bounce.buffer) {
2680 MemoryRegion *mr;
2681 ram_addr_t addr1;
2682
2683 mr = qemu_ram_addr_from_host(buffer, &addr1);
2684 assert(mr != NULL);
2685 if (is_write) {
2686 invalidate_and_set_dirty(addr1, access_len);
2687 }
2688 if (xen_enabled()) {
2689 xen_invalidate_map_cache_entry(buffer);
2690 }
2691 memory_region_unref(mr);
2692 return;
2693 }
2694 if (is_write) {
2695 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2696 bounce.buffer, access_len);
2697 }
2698 qemu_vfree(bounce.buffer);
2699 bounce.buffer = NULL;
2700 memory_region_unref(bounce.mr);
2701 atomic_mb_set(&bounce.in_use, false);
2702 cpu_notify_map_clients();
2703 }
2704
2705 void *cpu_physical_memory_map(hwaddr addr,
2706 hwaddr *plen,
2707 int is_write)
2708 {
2709 return address_space_map(&address_space_memory, addr, plen, is_write);
2710 }
2711
2712 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2713 int is_write, hwaddr access_len)
2714 {
2715 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2716 }
2717
2718 /* warning: addr must be aligned */
2719 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2720 MemTxAttrs attrs,
2721 MemTxResult *result,
2722 enum device_endian endian)
2723 {
2724 uint8_t *ptr;
2725 uint64_t val;
2726 MemoryRegion *mr;
2727 hwaddr l = 4;
2728 hwaddr addr1;
2729 MemTxResult r;
2730
2731 mr = address_space_translate(as, addr, &addr1, &l, false);
2732 if (l < 4 || !memory_access_is_direct(mr, false)) {
2733 /* I/O case */
2734 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2735 #if defined(TARGET_WORDS_BIGENDIAN)
2736 if (endian == DEVICE_LITTLE_ENDIAN) {
2737 val = bswap32(val);
2738 }
2739 #else
2740 if (endian == DEVICE_BIG_ENDIAN) {
2741 val = bswap32(val);
2742 }
2743 #endif
2744 } else {
2745 /* RAM case */
2746 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2747 & TARGET_PAGE_MASK)
2748 + addr1);
2749 switch (endian) {
2750 case DEVICE_LITTLE_ENDIAN:
2751 val = ldl_le_p(ptr);
2752 break;
2753 case DEVICE_BIG_ENDIAN:
2754 val = ldl_be_p(ptr);
2755 break;
2756 default:
2757 val = ldl_p(ptr);
2758 break;
2759 }
2760 r = MEMTX_OK;
2761 }
2762 if (result) {
2763 *result = r;
2764 }
2765 return val;
2766 }
2767
2768 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2769 MemTxAttrs attrs, MemTxResult *result)
2770 {
2771 return address_space_ldl_internal(as, addr, attrs, result,
2772 DEVICE_NATIVE_ENDIAN);
2773 }
2774
2775 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2776 MemTxAttrs attrs, MemTxResult *result)
2777 {
2778 return address_space_ldl_internal(as, addr, attrs, result,
2779 DEVICE_LITTLE_ENDIAN);
2780 }
2781
2782 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2783 MemTxAttrs attrs, MemTxResult *result)
2784 {
2785 return address_space_ldl_internal(as, addr, attrs, result,
2786 DEVICE_BIG_ENDIAN);
2787 }
2788
2789 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2790 {
2791 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2792 }
2793
2794 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2795 {
2796 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2797 }
2798
2799 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2800 {
2801 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2802 }
2803
2804 /* warning: addr must be aligned */
2805 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2806 MemTxAttrs attrs,
2807 MemTxResult *result,
2808 enum device_endian endian)
2809 {
2810 uint8_t *ptr;
2811 uint64_t val;
2812 MemoryRegion *mr;
2813 hwaddr l = 8;
2814 hwaddr addr1;
2815 MemTxResult r;
2816
2817 mr = address_space_translate(as, addr, &addr1, &l,
2818 false);
2819 if (l < 8 || !memory_access_is_direct(mr, false)) {
2820 /* I/O case */
2821 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2822 #if defined(TARGET_WORDS_BIGENDIAN)
2823 if (endian == DEVICE_LITTLE_ENDIAN) {
2824 val = bswap64(val);
2825 }
2826 #else
2827 if (endian == DEVICE_BIG_ENDIAN) {
2828 val = bswap64(val);
2829 }
2830 #endif
2831 } else {
2832 /* RAM case */
2833 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2834 & TARGET_PAGE_MASK)
2835 + addr1);
2836 switch (endian) {
2837 case DEVICE_LITTLE_ENDIAN:
2838 val = ldq_le_p(ptr);
2839 break;
2840 case DEVICE_BIG_ENDIAN:
2841 val = ldq_be_p(ptr);
2842 break;
2843 default:
2844 val = ldq_p(ptr);
2845 break;
2846 }
2847 r = MEMTX_OK;
2848 }
2849 if (result) {
2850 *result = r;
2851 }
2852 return val;
2853 }
2854
2855 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2856 MemTxAttrs attrs, MemTxResult *result)
2857 {
2858 return address_space_ldq_internal(as, addr, attrs, result,
2859 DEVICE_NATIVE_ENDIAN);
2860 }
2861
2862 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2863 MemTxAttrs attrs, MemTxResult *result)
2864 {
2865 return address_space_ldq_internal(as, addr, attrs, result,
2866 DEVICE_LITTLE_ENDIAN);
2867 }
2868
2869 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2870 MemTxAttrs attrs, MemTxResult *result)
2871 {
2872 return address_space_ldq_internal(as, addr, attrs, result,
2873 DEVICE_BIG_ENDIAN);
2874 }
2875
2876 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2877 {
2878 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2879 }
2880
2881 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2882 {
2883 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2884 }
2885
2886 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2887 {
2888 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2889 }
2890
2891 /* XXX: optimize */
2892 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2893 MemTxAttrs attrs, MemTxResult *result)
2894 {
2895 uint8_t val;
2896 MemTxResult r;
2897
2898 r = address_space_rw(as, addr, attrs, &val, 1, 0);
2899 if (result) {
2900 *result = r;
2901 }
2902 return val;
2903 }
2904
2905 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2906 {
2907 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2908 }
2909
2910 /* warning: addr must be aligned */
2911 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2912 hwaddr addr,
2913 MemTxAttrs attrs,
2914 MemTxResult *result,
2915 enum device_endian endian)
2916 {
2917 uint8_t *ptr;
2918 uint64_t val;
2919 MemoryRegion *mr;
2920 hwaddr l = 2;
2921 hwaddr addr1;
2922 MemTxResult r;
2923
2924 mr = address_space_translate(as, addr, &addr1, &l,
2925 false);
2926 if (l < 2 || !memory_access_is_direct(mr, false)) {
2927 /* I/O case */
2928 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
2929 #if defined(TARGET_WORDS_BIGENDIAN)
2930 if (endian == DEVICE_LITTLE_ENDIAN) {
2931 val = bswap16(val);
2932 }
2933 #else
2934 if (endian == DEVICE_BIG_ENDIAN) {
2935 val = bswap16(val);
2936 }
2937 #endif
2938 } else {
2939 /* RAM case */
2940 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2941 & TARGET_PAGE_MASK)
2942 + addr1);
2943 switch (endian) {
2944 case DEVICE_LITTLE_ENDIAN:
2945 val = lduw_le_p(ptr);
2946 break;
2947 case DEVICE_BIG_ENDIAN:
2948 val = lduw_be_p(ptr);
2949 break;
2950 default:
2951 val = lduw_p(ptr);
2952 break;
2953 }
2954 r = MEMTX_OK;
2955 }
2956 if (result) {
2957 *result = r;
2958 }
2959 return val;
2960 }
2961
2962 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
2963 MemTxAttrs attrs, MemTxResult *result)
2964 {
2965 return address_space_lduw_internal(as, addr, attrs, result,
2966 DEVICE_NATIVE_ENDIAN);
2967 }
2968
2969 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
2970 MemTxAttrs attrs, MemTxResult *result)
2971 {
2972 return address_space_lduw_internal(as, addr, attrs, result,
2973 DEVICE_LITTLE_ENDIAN);
2974 }
2975
2976 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
2977 MemTxAttrs attrs, MemTxResult *result)
2978 {
2979 return address_space_lduw_internal(as, addr, attrs, result,
2980 DEVICE_BIG_ENDIAN);
2981 }
2982
2983 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2984 {
2985 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2986 }
2987
2988 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2989 {
2990 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2991 }
2992
2993 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2994 {
2995 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2996 }
2997
2998 /* warning: addr must be aligned. The ram page is not masked as dirty
2999 and the code inside is not invalidated. It is useful if the dirty
3000 bits are used to track modified PTEs */
3001 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3002 MemTxAttrs attrs, MemTxResult *result)
3003 {
3004 uint8_t *ptr;
3005 MemoryRegion *mr;
3006 hwaddr l = 4;
3007 hwaddr addr1;
3008 MemTxResult r;
3009
3010 mr = address_space_translate(as, addr, &addr1, &l,
3011 true);
3012 if (l < 4 || !memory_access_is_direct(mr, true)) {
3013 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3014 } else {
3015 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3016 ptr = qemu_get_ram_ptr(addr1);
3017 stl_p(ptr, val);
3018
3019 if (unlikely(in_migration)) {
3020 if (cpu_physical_memory_is_clean(addr1)) {
3021 /* invalidate code */
3022 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3023 /* set dirty bit */
3024 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
3025 }
3026 }
3027 r = MEMTX_OK;
3028 }
3029 if (result) {
3030 *result = r;
3031 }
3032 }
3033
3034 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3035 {
3036 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3037 }
3038
3039 /* warning: addr must be aligned */
3040 static inline void address_space_stl_internal(AddressSpace *as,
3041 hwaddr addr, uint32_t val,
3042 MemTxAttrs attrs,
3043 MemTxResult *result,
3044 enum device_endian endian)
3045 {
3046 uint8_t *ptr;
3047 MemoryRegion *mr;
3048 hwaddr l = 4;
3049 hwaddr addr1;
3050 MemTxResult r;
3051
3052 mr = address_space_translate(as, addr, &addr1, &l,
3053 true);
3054 if (l < 4 || !memory_access_is_direct(mr, true)) {
3055 #if defined(TARGET_WORDS_BIGENDIAN)
3056 if (endian == DEVICE_LITTLE_ENDIAN) {
3057 val = bswap32(val);
3058 }
3059 #else
3060 if (endian == DEVICE_BIG_ENDIAN) {
3061 val = bswap32(val);
3062 }
3063 #endif
3064 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3065 } else {
3066 /* RAM case */
3067 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3068 ptr = qemu_get_ram_ptr(addr1);
3069 switch (endian) {
3070 case DEVICE_LITTLE_ENDIAN:
3071 stl_le_p(ptr, val);
3072 break;
3073 case DEVICE_BIG_ENDIAN:
3074 stl_be_p(ptr, val);
3075 break;
3076 default:
3077 stl_p(ptr, val);
3078 break;
3079 }
3080 invalidate_and_set_dirty(addr1, 4);
3081 r = MEMTX_OK;
3082 }
3083 if (result) {
3084 *result = r;
3085 }
3086 }
3087
3088 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3089 MemTxAttrs attrs, MemTxResult *result)
3090 {
3091 address_space_stl_internal(as, addr, val, attrs, result,
3092 DEVICE_NATIVE_ENDIAN);
3093 }
3094
3095 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3096 MemTxAttrs attrs, MemTxResult *result)
3097 {
3098 address_space_stl_internal(as, addr, val, attrs, result,
3099 DEVICE_LITTLE_ENDIAN);
3100 }
3101
3102 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3103 MemTxAttrs attrs, MemTxResult *result)
3104 {
3105 address_space_stl_internal(as, addr, val, attrs, result,
3106 DEVICE_BIG_ENDIAN);
3107 }
3108
3109 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3110 {
3111 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3112 }
3113
3114 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3115 {
3116 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3117 }
3118
3119 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3120 {
3121 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3122 }
3123
3124 /* XXX: optimize */
3125 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3126 MemTxAttrs attrs, MemTxResult *result)
3127 {
3128 uint8_t v = val;
3129 MemTxResult r;
3130
3131 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3132 if (result) {
3133 *result = r;
3134 }
3135 }
3136
3137 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3138 {
3139 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3140 }
3141
3142 /* warning: addr must be aligned */
3143 static inline void address_space_stw_internal(AddressSpace *as,
3144 hwaddr addr, uint32_t val,
3145 MemTxAttrs attrs,
3146 MemTxResult *result,
3147 enum device_endian endian)
3148 {
3149 uint8_t *ptr;
3150 MemoryRegion *mr;
3151 hwaddr l = 2;
3152 hwaddr addr1;
3153 MemTxResult r;
3154
3155 mr = address_space_translate(as, addr, &addr1, &l, true);
3156 if (l < 2 || !memory_access_is_direct(mr, true)) {
3157 #if defined(TARGET_WORDS_BIGENDIAN)
3158 if (endian == DEVICE_LITTLE_ENDIAN) {
3159 val = bswap16(val);
3160 }
3161 #else
3162 if (endian == DEVICE_BIG_ENDIAN) {
3163 val = bswap16(val);
3164 }
3165 #endif
3166 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3167 } else {
3168 /* RAM case */
3169 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3170 ptr = qemu_get_ram_ptr(addr1);
3171 switch (endian) {
3172 case DEVICE_LITTLE_ENDIAN:
3173 stw_le_p(ptr, val);
3174 break;
3175 case DEVICE_BIG_ENDIAN:
3176 stw_be_p(ptr, val);
3177 break;
3178 default:
3179 stw_p(ptr, val);
3180 break;
3181 }
3182 invalidate_and_set_dirty(addr1, 2);
3183 r = MEMTX_OK;
3184 }
3185 if (result) {
3186 *result = r;
3187 }
3188 }
3189
3190 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3191 MemTxAttrs attrs, MemTxResult *result)
3192 {
3193 address_space_stw_internal(as, addr, val, attrs, result,
3194 DEVICE_NATIVE_ENDIAN);
3195 }
3196
3197 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3198 MemTxAttrs attrs, MemTxResult *result)
3199 {
3200 address_space_stw_internal(as, addr, val, attrs, result,
3201 DEVICE_LITTLE_ENDIAN);
3202 }
3203
3204 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3205 MemTxAttrs attrs, MemTxResult *result)
3206 {
3207 address_space_stw_internal(as, addr, val, attrs, result,
3208 DEVICE_BIG_ENDIAN);
3209 }
3210
3211 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3212 {
3213 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3214 }
3215
3216 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3217 {
3218 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3219 }
3220
3221 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3222 {
3223 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3224 }
3225
3226 /* XXX: optimize */
3227 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3228 MemTxAttrs attrs, MemTxResult *result)
3229 {
3230 MemTxResult r;
3231 val = tswap64(val);
3232 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3233 if (result) {
3234 *result = r;
3235 }
3236 }
3237
3238 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3239 MemTxAttrs attrs, MemTxResult *result)
3240 {
3241 MemTxResult r;
3242 val = cpu_to_le64(val);
3243 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3244 if (result) {
3245 *result = r;
3246 }
3247 }
3248 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3249 MemTxAttrs attrs, MemTxResult *result)
3250 {
3251 MemTxResult r;
3252 val = cpu_to_be64(val);
3253 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3254 if (result) {
3255 *result = r;
3256 }
3257 }
3258
3259 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3260 {
3261 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3262 }
3263
3264 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3265 {
3266 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3267 }
3268
3269 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3270 {
3271 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3272 }
3273
3274 /* virtual memory access for debug (includes writing to ROM) */
3275 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3276 uint8_t *buf, int len, int is_write)
3277 {
3278 int l;
3279 hwaddr phys_addr;
3280 target_ulong page;
3281
3282 while (len > 0) {
3283 page = addr & TARGET_PAGE_MASK;
3284 phys_addr = cpu_get_phys_page_debug(cpu, page);
3285 /* if no physical page mapped, return an error */
3286 if (phys_addr == -1)
3287 return -1;
3288 l = (page + TARGET_PAGE_SIZE) - addr;
3289 if (l > len)
3290 l = len;
3291 phys_addr += (addr & ~TARGET_PAGE_MASK);
3292 if (is_write) {
3293 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3294 } else {
3295 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3296 buf, l, 0);
3297 }
3298 len -= l;
3299 buf += l;
3300 addr += l;
3301 }
3302 return 0;
3303 }
3304 #endif
3305
3306 /*
3307 * A helper function for the _utterly broken_ virtio device model to find out if
3308 * it's running on a big endian machine. Don't do this at home kids!
3309 */
3310 bool target_words_bigendian(void);
3311 bool target_words_bigendian(void)
3312 {
3313 #if defined(TARGET_WORDS_BIGENDIAN)
3314 return true;
3315 #else
3316 return false;
3317 #endif
3318 }
3319
3320 #ifndef CONFIG_USER_ONLY
3321 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3322 {
3323 MemoryRegion*mr;
3324 hwaddr l = 1;
3325
3326 mr = address_space_translate(&address_space_memory,
3327 phys_addr, &phys_addr, &l, false);
3328
3329 return !(memory_region_is_ram(mr) ||
3330 memory_region_is_romd(mr));
3331 }
3332
3333 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3334 {
3335 RAMBlock *block;
3336
3337 rcu_read_lock();
3338 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3339 func(block->host, block->offset, block->used_length, opaque);
3340 }
3341 rcu_read_unlock();
3342 }
3343 #endif