]> git.proxmox.com Git - mirror_qemu.git/blame - exec.c
Merge remote-tracking branch 'remotes/gonglei/tags/bootdevice-next-20150303' into...
[mirror_qemu.git] / exec.c
CommitLineData
54936004 1/*
5b6dd868 2 * Virtual page mapping
5fafdf24 3 *
54936004
FB
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
8167ee88 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
54936004 18 */
67b915a5 19#include "config.h"
777872e5 20#ifndef _WIN32
a98d49b1 21#include <sys/types.h>
d5a8f07c
FB
22#include <sys/mman.h>
23#endif
54936004 24
055403b2 25#include "qemu-common.h"
6180a181 26#include "cpu.h"
b67d9a52 27#include "tcg.h"
b3c7724c 28#include "hw/hw.h"
cc9e98cb 29#include "hw/qdev.h"
1de7afc9 30#include "qemu/osdep.h"
9c17d615 31#include "sysemu/kvm.h"
2ff3de68 32#include "sysemu/sysemu.h"
0d09e41a 33#include "hw/xen/xen.h"
1de7afc9
PB
34#include "qemu/timer.h"
35#include "qemu/config-file.h"
75a34036 36#include "qemu/error-report.h"
022c62cb 37#include "exec/memory.h"
9c17d615 38#include "sysemu/dma.h"
022c62cb 39#include "exec/address-spaces.h"
53a5960a
PB
40#if defined(CONFIG_USER_ONLY)
41#include <qemu.h>
432d268c 42#else /* !CONFIG_USER_ONLY */
9c17d615 43#include "sysemu/xen-mapcache.h"
6506e4f9 44#include "trace.h"
53a5960a 45#endif
0d6d3c87 46#include "exec/cpu-all.h"
0dc3f44a 47#include "qemu/rcu_queue.h"
022c62cb 48#include "exec/cputlb.h"
5b6dd868 49#include "translate-all.h"
0cac1b66 50
022c62cb 51#include "exec/memory-internal.h"
220c3ebd 52#include "exec/ram_addr.h"
67d95c15 53
b35ba30f
MT
54#include "qemu/range.h"
55
db7b5426 56//#define DEBUG_SUBPAGE
1196be37 57
e2eef170 58#if !defined(CONFIG_USER_ONLY)
981fdf23 59static bool in_migration;
94a6b54f 60
0dc3f44a
MD
61/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
62 * are protected by the ramlist lock.
63 */
0d53d9fe 64RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
62152b8a
AK
65
66static MemoryRegion *system_memory;
309cb471 67static MemoryRegion *system_io;
62152b8a 68
f6790af6
AK
69AddressSpace address_space_io;
70AddressSpace address_space_memory;
2673a5da 71
0844e007 72MemoryRegion io_mem_rom, io_mem_notdirty;
acc9d80b 73static MemoryRegion io_mem_unassigned;
0e0df1e2 74
7bd4f430
PB
75/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
76#define RAM_PREALLOC (1 << 0)
77
dbcb8981
PB
78/* RAM is mmap-ed with MAP_SHARED */
79#define RAM_SHARED (1 << 1)
80
62be4e3a
MT
81/* Only a portion of RAM (used_length) is actually used, and migrated.
82 * This used_length size can change across reboots.
83 */
84#define RAM_RESIZEABLE (1 << 2)
85
e2eef170 86#endif
9fa3e853 87
bdc44640 88struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
6a00d601
FB
89/* current CPU in the current thread. It is only valid inside
90 cpu_exec() */
4917cf44 91DEFINE_TLS(CPUState *, current_cpu);
2e70f6ef 92/* 0 = Do not count executed instructions.
bf20dc07 93 1 = Precise instruction counting.
2e70f6ef 94 2 = Adaptive rate instruction counting. */
5708fc66 95int use_icount;
6a00d601 96
e2eef170 97#if !defined(CONFIG_USER_ONLY)
4346ae3e 98
1db8abb1
PB
99typedef struct PhysPageEntry PhysPageEntry;
100
101struct PhysPageEntry {
9736e55b 102 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
8b795765 103 uint32_t skip : 6;
9736e55b 104 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
8b795765 105 uint32_t ptr : 26;
1db8abb1
PB
106};
107
8b795765
MT
108#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
109
03f49957 110/* Size of the L2 (and L3, etc) page tables. */
57271d63 111#define ADDR_SPACE_BITS 64
03f49957 112
026736ce 113#define P_L2_BITS 9
03f49957
PB
114#define P_L2_SIZE (1 << P_L2_BITS)
115
116#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
117
118typedef PhysPageEntry Node[P_L2_SIZE];
0475d94f 119
53cb28cb 120typedef struct PhysPageMap {
79e2b9ae
PB
121 struct rcu_head rcu;
122
53cb28cb
MA
123 unsigned sections_nb;
124 unsigned sections_nb_alloc;
125 unsigned nodes_nb;
126 unsigned nodes_nb_alloc;
127 Node *nodes;
128 MemoryRegionSection *sections;
129} PhysPageMap;
130
1db8abb1 131struct AddressSpaceDispatch {
79e2b9ae
PB
132 struct rcu_head rcu;
133
1db8abb1
PB
134 /* This is a multi-level map on the physical address space.
135 * The bottom level has pointers to MemoryRegionSections.
136 */
137 PhysPageEntry phys_map;
53cb28cb 138 PhysPageMap map;
acc9d80b 139 AddressSpace *as;
1db8abb1
PB
140};
141
90260c6c
JK
142#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
143typedef struct subpage_t {
144 MemoryRegion iomem;
acc9d80b 145 AddressSpace *as;
90260c6c
JK
146 hwaddr base;
147 uint16_t sub_section[TARGET_PAGE_SIZE];
148} subpage_t;
149
b41aac4f
LPF
150#define PHYS_SECTION_UNASSIGNED 0
151#define PHYS_SECTION_NOTDIRTY 1
152#define PHYS_SECTION_ROM 2
153#define PHYS_SECTION_WATCH 3
5312bd8b 154
e2eef170 155static void io_mem_init(void);
62152b8a 156static void memory_map_init(void);
09daed84 157static void tcg_commit(MemoryListener *listener);
e2eef170 158
1ec9b909 159static MemoryRegion io_mem_watch;
6658ffb8 160#endif
fd6ce8f6 161
6d9a1304 162#if !defined(CONFIG_USER_ONLY)
d6f2ea22 163
53cb28cb 164static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
d6f2ea22 165{
53cb28cb
MA
166 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
167 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
168 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
169 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
d6f2ea22 170 }
f7bf5461
AK
171}
172
53cb28cb 173static uint32_t phys_map_node_alloc(PhysPageMap *map)
f7bf5461
AK
174{
175 unsigned i;
8b795765 176 uint32_t ret;
f7bf5461 177
53cb28cb 178 ret = map->nodes_nb++;
f7bf5461 179 assert(ret != PHYS_MAP_NODE_NIL);
53cb28cb 180 assert(ret != map->nodes_nb_alloc);
03f49957 181 for (i = 0; i < P_L2_SIZE; ++i) {
53cb28cb
MA
182 map->nodes[ret][i].skip = 1;
183 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
d6f2ea22 184 }
f7bf5461 185 return ret;
d6f2ea22
AK
186}
187
53cb28cb
MA
188static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
189 hwaddr *index, hwaddr *nb, uint16_t leaf,
2999097b 190 int level)
f7bf5461
AK
191{
192 PhysPageEntry *p;
193 int i;
03f49957 194 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
108c49b8 195
9736e55b 196 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
53cb28cb
MA
197 lp->ptr = phys_map_node_alloc(map);
198 p = map->nodes[lp->ptr];
f7bf5461 199 if (level == 0) {
03f49957 200 for (i = 0; i < P_L2_SIZE; i++) {
9736e55b 201 p[i].skip = 0;
b41aac4f 202 p[i].ptr = PHYS_SECTION_UNASSIGNED;
4346ae3e 203 }
67c4d23c 204 }
f7bf5461 205 } else {
53cb28cb 206 p = map->nodes[lp->ptr];
92e873b9 207 }
03f49957 208 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
f7bf5461 209
03f49957 210 while (*nb && lp < &p[P_L2_SIZE]) {
07f07b31 211 if ((*index & (step - 1)) == 0 && *nb >= step) {
9736e55b 212 lp->skip = 0;
c19e8800 213 lp->ptr = leaf;
07f07b31
AK
214 *index += step;
215 *nb -= step;
2999097b 216 } else {
53cb28cb 217 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
2999097b
AK
218 }
219 ++lp;
f7bf5461
AK
220 }
221}
222
ac1970fb 223static void phys_page_set(AddressSpaceDispatch *d,
a8170e5e 224 hwaddr index, hwaddr nb,
2999097b 225 uint16_t leaf)
f7bf5461 226{
2999097b 227 /* Wildly overreserve - it doesn't matter much. */
53cb28cb 228 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
5cd2c5b6 229
53cb28cb 230 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
92e873b9
FB
231}
232
b35ba30f
MT
233/* Compact a non leaf page entry. Simply detect that the entry has a single child,
234 * and update our entry so we can skip it and go directly to the destination.
235 */
236static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
237{
238 unsigned valid_ptr = P_L2_SIZE;
239 int valid = 0;
240 PhysPageEntry *p;
241 int i;
242
243 if (lp->ptr == PHYS_MAP_NODE_NIL) {
244 return;
245 }
246
247 p = nodes[lp->ptr];
248 for (i = 0; i < P_L2_SIZE; i++) {
249 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
250 continue;
251 }
252
253 valid_ptr = i;
254 valid++;
255 if (p[i].skip) {
256 phys_page_compact(&p[i], nodes, compacted);
257 }
258 }
259
260 /* We can only compress if there's only one child. */
261 if (valid != 1) {
262 return;
263 }
264
265 assert(valid_ptr < P_L2_SIZE);
266
267 /* Don't compress if it won't fit in the # of bits we have. */
268 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
269 return;
270 }
271
272 lp->ptr = p[valid_ptr].ptr;
273 if (!p[valid_ptr].skip) {
274 /* If our only child is a leaf, make this a leaf. */
275 /* By design, we should have made this node a leaf to begin with so we
276 * should never reach here.
277 * But since it's so simple to handle this, let's do it just in case we
278 * change this rule.
279 */
280 lp->skip = 0;
281 } else {
282 lp->skip += p[valid_ptr].skip;
283 }
284}
285
286static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
287{
288 DECLARE_BITMAP(compacted, nodes_nb);
289
290 if (d->phys_map.skip) {
53cb28cb 291 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
b35ba30f
MT
292 }
293}
294
97115a8d 295static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
9affd6fc 296 Node *nodes, MemoryRegionSection *sections)
92e873b9 297{
31ab2b4a 298 PhysPageEntry *p;
97115a8d 299 hwaddr index = addr >> TARGET_PAGE_BITS;
31ab2b4a 300 int i;
f1f6e3b8 301
9736e55b 302 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
c19e8800 303 if (lp.ptr == PHYS_MAP_NODE_NIL) {
9affd6fc 304 return &sections[PHYS_SECTION_UNASSIGNED];
31ab2b4a 305 }
9affd6fc 306 p = nodes[lp.ptr];
03f49957 307 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
5312bd8b 308 }
b35ba30f
MT
309
310 if (sections[lp.ptr].size.hi ||
311 range_covers_byte(sections[lp.ptr].offset_within_address_space,
312 sections[lp.ptr].size.lo, addr)) {
313 return &sections[lp.ptr];
314 } else {
315 return &sections[PHYS_SECTION_UNASSIGNED];
316 }
f3705d53
AK
317}
318
e5548617
BS
319bool memory_region_is_unassigned(MemoryRegion *mr)
320{
2a8e7499 321 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
5b6dd868 322 && mr != &io_mem_watch;
fd6ce8f6 323}
149f54b5 324
79e2b9ae 325/* Called from RCU critical section */
c7086b4a 326static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
90260c6c
JK
327 hwaddr addr,
328 bool resolve_subpage)
9f029603 329{
90260c6c
JK
330 MemoryRegionSection *section;
331 subpage_t *subpage;
332
53cb28cb 333 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
90260c6c
JK
334 if (resolve_subpage && section->mr->subpage) {
335 subpage = container_of(section->mr, subpage_t, iomem);
53cb28cb 336 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
90260c6c
JK
337 }
338 return section;
9f029603
JK
339}
340
79e2b9ae 341/* Called from RCU critical section */
90260c6c 342static MemoryRegionSection *
c7086b4a 343address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
90260c6c 344 hwaddr *plen, bool resolve_subpage)
149f54b5
PB
345{
346 MemoryRegionSection *section;
a87f3954 347 Int128 diff;
149f54b5 348
c7086b4a 349 section = address_space_lookup_region(d, addr, resolve_subpage);
149f54b5
PB
350 /* Compute offset within MemoryRegionSection */
351 addr -= section->offset_within_address_space;
352
353 /* Compute offset within MemoryRegion */
354 *xlat = addr + section->offset_within_region;
355
356 diff = int128_sub(section->mr->size, int128_make64(addr));
3752a036 357 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
149f54b5
PB
358 return section;
359}
90260c6c 360
a87f3954
PB
361static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
362{
363 if (memory_region_is_ram(mr)) {
364 return !(is_write && mr->readonly);
365 }
366 if (memory_region_is_romd(mr)) {
367 return !is_write;
368 }
369
370 return false;
371}
372
5c8a00ce
PB
373MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
374 hwaddr *xlat, hwaddr *plen,
375 bool is_write)
90260c6c 376{
30951157
AK
377 IOMMUTLBEntry iotlb;
378 MemoryRegionSection *section;
379 MemoryRegion *mr;
380 hwaddr len = *plen;
381
79e2b9ae 382 rcu_read_lock();
30951157 383 for (;;) {
79e2b9ae
PB
384 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
385 section = address_space_translate_internal(d, addr, &addr, plen, true);
30951157
AK
386 mr = section->mr;
387
388 if (!mr->iommu_ops) {
389 break;
390 }
391
8d7b8cb9 392 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
30951157
AK
393 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
394 | (addr & iotlb.addr_mask));
395 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
396 if (!(iotlb.perm & (1 << is_write))) {
397 mr = &io_mem_unassigned;
398 break;
399 }
400
401 as = iotlb.target_as;
402 }
403
fe680d0d 404 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
a87f3954
PB
405 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
406 len = MIN(page, len);
407 }
408
30951157
AK
409 *plen = len;
410 *xlat = addr;
79e2b9ae 411 rcu_read_unlock();
30951157 412 return mr;
90260c6c
JK
413}
414
79e2b9ae 415/* Called from RCU critical section */
90260c6c 416MemoryRegionSection *
9d82b5a7
PB
417address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
418 hwaddr *xlat, hwaddr *plen)
90260c6c 419{
30951157 420 MemoryRegionSection *section;
9d82b5a7
PB
421 section = address_space_translate_internal(cpu->memory_dispatch,
422 addr, xlat, plen, false);
30951157
AK
423
424 assert(!section->mr->iommu_ops);
425 return section;
90260c6c 426}
5b6dd868 427#endif
fd6ce8f6 428
5b6dd868 429void cpu_exec_init_all(void)
fdbb84d1 430{
5b6dd868 431#if !defined(CONFIG_USER_ONLY)
b2a8658e 432 qemu_mutex_init(&ram_list.mutex);
5b6dd868
BS
433 memory_map_init();
434 io_mem_init();
fdbb84d1 435#endif
5b6dd868 436}
fdbb84d1 437
b170fce3 438#if !defined(CONFIG_USER_ONLY)
5b6dd868
BS
439
440static int cpu_common_post_load(void *opaque, int version_id)
fd6ce8f6 441{
259186a7 442 CPUState *cpu = opaque;
a513fe19 443
5b6dd868
BS
444 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
445 version_id is increased. */
259186a7 446 cpu->interrupt_request &= ~0x01;
c01a71c1 447 tlb_flush(cpu, 1);
5b6dd868
BS
448
449 return 0;
a513fe19 450}
7501267e 451
6c3bff0e
PD
452static int cpu_common_pre_load(void *opaque)
453{
454 CPUState *cpu = opaque;
455
adee6424 456 cpu->exception_index = -1;
6c3bff0e
PD
457
458 return 0;
459}
460
461static bool cpu_common_exception_index_needed(void *opaque)
462{
463 CPUState *cpu = opaque;
464
adee6424 465 return tcg_enabled() && cpu->exception_index != -1;
6c3bff0e
PD
466}
467
468static const VMStateDescription vmstate_cpu_common_exception_index = {
469 .name = "cpu_common/exception_index",
470 .version_id = 1,
471 .minimum_version_id = 1,
472 .fields = (VMStateField[]) {
473 VMSTATE_INT32(exception_index, CPUState),
474 VMSTATE_END_OF_LIST()
475 }
476};
477
1a1562f5 478const VMStateDescription vmstate_cpu_common = {
5b6dd868
BS
479 .name = "cpu_common",
480 .version_id = 1,
481 .minimum_version_id = 1,
6c3bff0e 482 .pre_load = cpu_common_pre_load,
5b6dd868 483 .post_load = cpu_common_post_load,
35d08458 484 .fields = (VMStateField[]) {
259186a7
AF
485 VMSTATE_UINT32(halted, CPUState),
486 VMSTATE_UINT32(interrupt_request, CPUState),
5b6dd868 487 VMSTATE_END_OF_LIST()
6c3bff0e
PD
488 },
489 .subsections = (VMStateSubsection[]) {
490 {
491 .vmsd = &vmstate_cpu_common_exception_index,
492 .needed = cpu_common_exception_index_needed,
493 } , {
494 /* empty */
495 }
5b6dd868
BS
496 }
497};
1a1562f5 498
5b6dd868 499#endif
ea041c0e 500
38d8f5c8 501CPUState *qemu_get_cpu(int index)
ea041c0e 502{
bdc44640 503 CPUState *cpu;
ea041c0e 504
bdc44640 505 CPU_FOREACH(cpu) {
55e5c285 506 if (cpu->cpu_index == index) {
bdc44640 507 return cpu;
55e5c285 508 }
ea041c0e 509 }
5b6dd868 510
bdc44640 511 return NULL;
ea041c0e
FB
512}
513
09daed84
EI
514#if !defined(CONFIG_USER_ONLY)
515void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
516{
517 /* We only support one address space per cpu at the moment. */
518 assert(cpu->as == as);
519
520 if (cpu->tcg_as_listener) {
521 memory_listener_unregister(cpu->tcg_as_listener);
522 } else {
523 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
524 }
525 cpu->tcg_as_listener->commit = tcg_commit;
526 memory_listener_register(cpu->tcg_as_listener, as);
527}
528#endif
529
5b6dd868 530void cpu_exec_init(CPUArchState *env)
ea041c0e 531{
5b6dd868 532 CPUState *cpu = ENV_GET_CPU(env);
b170fce3 533 CPUClass *cc = CPU_GET_CLASS(cpu);
bdc44640 534 CPUState *some_cpu;
5b6dd868
BS
535 int cpu_index;
536
537#if defined(CONFIG_USER_ONLY)
538 cpu_list_lock();
539#endif
5b6dd868 540 cpu_index = 0;
bdc44640 541 CPU_FOREACH(some_cpu) {
5b6dd868
BS
542 cpu_index++;
543 }
55e5c285 544 cpu->cpu_index = cpu_index;
1b1ed8dc 545 cpu->numa_node = 0;
f0c3c505 546 QTAILQ_INIT(&cpu->breakpoints);
ff4700b0 547 QTAILQ_INIT(&cpu->watchpoints);
5b6dd868 548#ifndef CONFIG_USER_ONLY
09daed84 549 cpu->as = &address_space_memory;
5b6dd868
BS
550 cpu->thread_id = qemu_get_thread_id();
551#endif
bdc44640 552 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
5b6dd868
BS
553#if defined(CONFIG_USER_ONLY)
554 cpu_list_unlock();
555#endif
e0d47944
AF
556 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
557 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
558 }
5b6dd868 559#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
5b6dd868
BS
560 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
561 cpu_save, cpu_load, env);
b170fce3 562 assert(cc->vmsd == NULL);
e0d47944 563 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
5b6dd868 564#endif
b170fce3
AF
565 if (cc->vmsd != NULL) {
566 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
567 }
ea041c0e
FB
568}
569
94df27fd 570#if defined(CONFIG_USER_ONLY)
00b941e5 571static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
94df27fd
PB
572{
573 tb_invalidate_phys_page_range(pc, pc + 1, 0);
574}
575#else
00b941e5 576static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
1e7855a5 577{
e8262a1b
MF
578 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
579 if (phys != -1) {
09daed84 580 tb_invalidate_phys_addr(cpu->as,
29d8ec7b 581 phys | (pc & ~TARGET_PAGE_MASK));
e8262a1b 582 }
1e7855a5 583}
c27004ec 584#endif
d720b93d 585
c527ee8f 586#if defined(CONFIG_USER_ONLY)
75a34036 587void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
c527ee8f
PB
588
589{
590}
591
3ee887e8
PM
592int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
593 int flags)
594{
595 return -ENOSYS;
596}
597
598void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
599{
600}
601
75a34036 602int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
c527ee8f
PB
603 int flags, CPUWatchpoint **watchpoint)
604{
605 return -ENOSYS;
606}
607#else
6658ffb8 608/* Add a watchpoint. */
75a34036 609int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 610 int flags, CPUWatchpoint **watchpoint)
6658ffb8 611{
c0ce998e 612 CPUWatchpoint *wp;
6658ffb8 613
05068c0d 614 /* forbid ranges which are empty or run off the end of the address space */
07e2863d 615 if (len == 0 || (addr + len - 1) < addr) {
75a34036
AF
616 error_report("tried to set invalid watchpoint at %"
617 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
b4051334
AL
618 return -EINVAL;
619 }
7267c094 620 wp = g_malloc(sizeof(*wp));
a1d1bb31
AL
621
622 wp->vaddr = addr;
05068c0d 623 wp->len = len;
a1d1bb31
AL
624 wp->flags = flags;
625
2dc9f411 626 /* keep all GDB-injected watchpoints in front */
ff4700b0
AF
627 if (flags & BP_GDB) {
628 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
629 } else {
630 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
631 }
6658ffb8 632
31b030d4 633 tlb_flush_page(cpu, addr);
a1d1bb31
AL
634
635 if (watchpoint)
636 *watchpoint = wp;
637 return 0;
6658ffb8
PB
638}
639
a1d1bb31 640/* Remove a specific watchpoint. */
75a34036 641int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 642 int flags)
6658ffb8 643{
a1d1bb31 644 CPUWatchpoint *wp;
6658ffb8 645
ff4700b0 646 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 647 if (addr == wp->vaddr && len == wp->len
6e140f28 648 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
75a34036 649 cpu_watchpoint_remove_by_ref(cpu, wp);
6658ffb8
PB
650 return 0;
651 }
652 }
a1d1bb31 653 return -ENOENT;
6658ffb8
PB
654}
655
a1d1bb31 656/* Remove a specific watchpoint by reference. */
75a34036 657void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
a1d1bb31 658{
ff4700b0 659 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
7d03f82f 660
31b030d4 661 tlb_flush_page(cpu, watchpoint->vaddr);
a1d1bb31 662
7267c094 663 g_free(watchpoint);
a1d1bb31
AL
664}
665
666/* Remove all matching watchpoints. */
75a34036 667void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 668{
c0ce998e 669 CPUWatchpoint *wp, *next;
a1d1bb31 670
ff4700b0 671 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
75a34036
AF
672 if (wp->flags & mask) {
673 cpu_watchpoint_remove_by_ref(cpu, wp);
674 }
c0ce998e 675 }
7d03f82f 676}
05068c0d
PM
677
678/* Return true if this watchpoint address matches the specified
679 * access (ie the address range covered by the watchpoint overlaps
680 * partially or completely with the address range covered by the
681 * access).
682 */
683static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
684 vaddr addr,
685 vaddr len)
686{
687 /* We know the lengths are non-zero, but a little caution is
688 * required to avoid errors in the case where the range ends
689 * exactly at the top of the address space and so addr + len
690 * wraps round to zero.
691 */
692 vaddr wpend = wp->vaddr + wp->len - 1;
693 vaddr addrend = addr + len - 1;
694
695 return !(addr > wpend || wp->vaddr > addrend);
696}
697
c527ee8f 698#endif
7d03f82f 699
a1d1bb31 700/* Add a breakpoint. */
b3310ab3 701int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
a1d1bb31 702 CPUBreakpoint **breakpoint)
4c3a88a2 703{
c0ce998e 704 CPUBreakpoint *bp;
3b46e624 705
7267c094 706 bp = g_malloc(sizeof(*bp));
4c3a88a2 707
a1d1bb31
AL
708 bp->pc = pc;
709 bp->flags = flags;
710
2dc9f411 711 /* keep all GDB-injected breakpoints in front */
00b941e5 712 if (flags & BP_GDB) {
f0c3c505 713 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
00b941e5 714 } else {
f0c3c505 715 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
00b941e5 716 }
3b46e624 717
f0c3c505 718 breakpoint_invalidate(cpu, pc);
a1d1bb31 719
00b941e5 720 if (breakpoint) {
a1d1bb31 721 *breakpoint = bp;
00b941e5 722 }
4c3a88a2 723 return 0;
4c3a88a2
FB
724}
725
a1d1bb31 726/* Remove a specific breakpoint. */
b3310ab3 727int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
a1d1bb31 728{
a1d1bb31
AL
729 CPUBreakpoint *bp;
730
f0c3c505 731 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
a1d1bb31 732 if (bp->pc == pc && bp->flags == flags) {
b3310ab3 733 cpu_breakpoint_remove_by_ref(cpu, bp);
a1d1bb31
AL
734 return 0;
735 }
7d03f82f 736 }
a1d1bb31 737 return -ENOENT;
7d03f82f
EI
738}
739
a1d1bb31 740/* Remove a specific breakpoint by reference. */
b3310ab3 741void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
4c3a88a2 742{
f0c3c505
AF
743 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
744
745 breakpoint_invalidate(cpu, breakpoint->pc);
a1d1bb31 746
7267c094 747 g_free(breakpoint);
a1d1bb31
AL
748}
749
750/* Remove all matching breakpoints. */
b3310ab3 751void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 752{
c0ce998e 753 CPUBreakpoint *bp, *next;
a1d1bb31 754
f0c3c505 755 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
b3310ab3
AF
756 if (bp->flags & mask) {
757 cpu_breakpoint_remove_by_ref(cpu, bp);
758 }
c0ce998e 759 }
4c3a88a2
FB
760}
761
c33a346e
FB
762/* enable or disable single step mode. EXCP_DEBUG is returned by the
763 CPU loop after each instruction */
3825b28f 764void cpu_single_step(CPUState *cpu, int enabled)
c33a346e 765{
ed2803da
AF
766 if (cpu->singlestep_enabled != enabled) {
767 cpu->singlestep_enabled = enabled;
768 if (kvm_enabled()) {
38e478ec 769 kvm_update_guest_debug(cpu, 0);
ed2803da 770 } else {
ccbb4d44 771 /* must flush all the translated code to avoid inconsistencies */
e22a25c9 772 /* XXX: only flush what is necessary */
38e478ec 773 CPUArchState *env = cpu->env_ptr;
e22a25c9
AL
774 tb_flush(env);
775 }
c33a346e 776 }
c33a346e
FB
777}
778
a47dddd7 779void cpu_abort(CPUState *cpu, const char *fmt, ...)
7501267e
FB
780{
781 va_list ap;
493ae1f0 782 va_list ap2;
7501267e
FB
783
784 va_start(ap, fmt);
493ae1f0 785 va_copy(ap2, ap);
7501267e
FB
786 fprintf(stderr, "qemu: fatal: ");
787 vfprintf(stderr, fmt, ap);
788 fprintf(stderr, "\n");
878096ee 789 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
93fcfe39
AL
790 if (qemu_log_enabled()) {
791 qemu_log("qemu: fatal: ");
792 qemu_log_vprintf(fmt, ap2);
793 qemu_log("\n");
a0762859 794 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
31b1a7b4 795 qemu_log_flush();
93fcfe39 796 qemu_log_close();
924edcae 797 }
493ae1f0 798 va_end(ap2);
f9373291 799 va_end(ap);
fd052bf6
RV
800#if defined(CONFIG_USER_ONLY)
801 {
802 struct sigaction act;
803 sigfillset(&act.sa_mask);
804 act.sa_handler = SIG_DFL;
805 sigaction(SIGABRT, &act, NULL);
806 }
807#endif
7501267e
FB
808 abort();
809}
810
0124311e 811#if !defined(CONFIG_USER_ONLY)
0dc3f44a 812/* Called from RCU critical section */
041603fe
PB
813static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
814{
815 RAMBlock *block;
816
43771539 817 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 818 if (block && addr - block->offset < block->max_length) {
041603fe
PB
819 goto found;
820 }
0dc3f44a 821 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 822 if (addr - block->offset < block->max_length) {
041603fe
PB
823 goto found;
824 }
825 }
826
827 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
828 abort();
829
830found:
43771539
PB
831 /* It is safe to write mru_block outside the iothread lock. This
832 * is what happens:
833 *
834 * mru_block = xxx
835 * rcu_read_unlock()
836 * xxx removed from list
837 * rcu_read_lock()
838 * read mru_block
839 * mru_block = NULL;
840 * call_rcu(reclaim_ramblock, xxx);
841 * rcu_read_unlock()
842 *
843 * atomic_rcu_set is not needed here. The block was already published
844 * when it was placed into the list. Here we're just making an extra
845 * copy of the pointer.
846 */
041603fe
PB
847 ram_list.mru_block = block;
848 return block;
849}
850
a2f4d5be 851static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
d24981d3 852{
041603fe 853 ram_addr_t start1;
a2f4d5be
JQ
854 RAMBlock *block;
855 ram_addr_t end;
856
857 end = TARGET_PAGE_ALIGN(start + length);
858 start &= TARGET_PAGE_MASK;
d24981d3 859
0dc3f44a 860 rcu_read_lock();
041603fe
PB
861 block = qemu_get_ram_block(start);
862 assert(block == qemu_get_ram_block(end - 1));
1240be24 863 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
041603fe 864 cpu_tlb_reset_dirty_all(start1, length);
0dc3f44a 865 rcu_read_unlock();
d24981d3
JQ
866}
867
5579c7f3 868/* Note: start and end must be within the same ram block. */
a2f4d5be 869void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
52159192 870 unsigned client)
1ccde1cb 871{
1ccde1cb
FB
872 if (length == 0)
873 return;
c8d6f66a 874 cpu_physical_memory_clear_dirty_range_type(start, length, client);
f23db169 875
d24981d3 876 if (tcg_enabled()) {
a2f4d5be 877 tlb_reset_dirty_range_all(start, length);
5579c7f3 878 }
1ccde1cb
FB
879}
880
981fdf23 881static void cpu_physical_memory_set_dirty_tracking(bool enable)
74576198
AL
882{
883 in_migration = enable;
74576198
AL
884}
885
79e2b9ae 886/* Called from RCU critical section */
bb0e627a 887hwaddr memory_region_section_get_iotlb(CPUState *cpu,
149f54b5
PB
888 MemoryRegionSection *section,
889 target_ulong vaddr,
890 hwaddr paddr, hwaddr xlat,
891 int prot,
892 target_ulong *address)
e5548617 893{
a8170e5e 894 hwaddr iotlb;
e5548617
BS
895 CPUWatchpoint *wp;
896
cc5bea60 897 if (memory_region_is_ram(section->mr)) {
e5548617
BS
898 /* Normal RAM. */
899 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
149f54b5 900 + xlat;
e5548617 901 if (!section->readonly) {
b41aac4f 902 iotlb |= PHYS_SECTION_NOTDIRTY;
e5548617 903 } else {
b41aac4f 904 iotlb |= PHYS_SECTION_ROM;
e5548617
BS
905 }
906 } else {
1b3fb98f 907 iotlb = section - section->address_space->dispatch->map.sections;
149f54b5 908 iotlb += xlat;
e5548617
BS
909 }
910
911 /* Make accesses to pages with watchpoints go via the
912 watchpoint trap routines. */
ff4700b0 913 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 914 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
e5548617
BS
915 /* Avoid trapping reads of pages with a write breakpoint. */
916 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
b41aac4f 917 iotlb = PHYS_SECTION_WATCH + paddr;
e5548617
BS
918 *address |= TLB_MMIO;
919 break;
920 }
921 }
922 }
923
924 return iotlb;
925}
9fa3e853
FB
926#endif /* defined(CONFIG_USER_ONLY) */
927
e2eef170 928#if !defined(CONFIG_USER_ONLY)
8da3ff18 929
c227f099 930static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 931 uint16_t section);
acc9d80b 932static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
54688b1e 933
a2b257d6
IM
934static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
935 qemu_anon_ram_alloc;
91138037
MA
936
937/*
938 * Set a custom physical guest memory alloator.
939 * Accelerators with unusual needs may need this. Hopefully, we can
940 * get rid of it eventually.
941 */
a2b257d6 942void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
91138037
MA
943{
944 phys_mem_alloc = alloc;
945}
946
53cb28cb
MA
947static uint16_t phys_section_add(PhysPageMap *map,
948 MemoryRegionSection *section)
5312bd8b 949{
68f3f65b
PB
950 /* The physical section number is ORed with a page-aligned
951 * pointer to produce the iotlb entries. Thus it should
952 * never overflow into the page-aligned value.
953 */
53cb28cb 954 assert(map->sections_nb < TARGET_PAGE_SIZE);
68f3f65b 955
53cb28cb
MA
956 if (map->sections_nb == map->sections_nb_alloc) {
957 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
958 map->sections = g_renew(MemoryRegionSection, map->sections,
959 map->sections_nb_alloc);
5312bd8b 960 }
53cb28cb 961 map->sections[map->sections_nb] = *section;
dfde4e6e 962 memory_region_ref(section->mr);
53cb28cb 963 return map->sections_nb++;
5312bd8b
AK
964}
965
058bc4b5
PB
966static void phys_section_destroy(MemoryRegion *mr)
967{
dfde4e6e
PB
968 memory_region_unref(mr);
969
058bc4b5
PB
970 if (mr->subpage) {
971 subpage_t *subpage = container_of(mr, subpage_t, iomem);
b4fefef9 972 object_unref(OBJECT(&subpage->iomem));
058bc4b5
PB
973 g_free(subpage);
974 }
975}
976
6092666e 977static void phys_sections_free(PhysPageMap *map)
5312bd8b 978{
9affd6fc
PB
979 while (map->sections_nb > 0) {
980 MemoryRegionSection *section = &map->sections[--map->sections_nb];
058bc4b5
PB
981 phys_section_destroy(section->mr);
982 }
9affd6fc
PB
983 g_free(map->sections);
984 g_free(map->nodes);
5312bd8b
AK
985}
986
ac1970fb 987static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
0f0cb164
AK
988{
989 subpage_t *subpage;
a8170e5e 990 hwaddr base = section->offset_within_address_space
0f0cb164 991 & TARGET_PAGE_MASK;
97115a8d 992 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
53cb28cb 993 d->map.nodes, d->map.sections);
0f0cb164
AK
994 MemoryRegionSection subsection = {
995 .offset_within_address_space = base,
052e87b0 996 .size = int128_make64(TARGET_PAGE_SIZE),
0f0cb164 997 };
a8170e5e 998 hwaddr start, end;
0f0cb164 999
f3705d53 1000 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
0f0cb164 1001
f3705d53 1002 if (!(existing->mr->subpage)) {
acc9d80b 1003 subpage = subpage_init(d->as, base);
3be91e86 1004 subsection.address_space = d->as;
0f0cb164 1005 subsection.mr = &subpage->iomem;
ac1970fb 1006 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
53cb28cb 1007 phys_section_add(&d->map, &subsection));
0f0cb164 1008 } else {
f3705d53 1009 subpage = container_of(existing->mr, subpage_t, iomem);
0f0cb164
AK
1010 }
1011 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
052e87b0 1012 end = start + int128_get64(section->size) - 1;
53cb28cb
MA
1013 subpage_register(subpage, start, end,
1014 phys_section_add(&d->map, section));
0f0cb164
AK
1015}
1016
1017
052e87b0
PB
1018static void register_multipage(AddressSpaceDispatch *d,
1019 MemoryRegionSection *section)
33417e70 1020{
a8170e5e 1021 hwaddr start_addr = section->offset_within_address_space;
53cb28cb 1022 uint16_t section_index = phys_section_add(&d->map, section);
052e87b0
PB
1023 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1024 TARGET_PAGE_BITS));
dd81124b 1025
733d5ef5
PB
1026 assert(num_pages);
1027 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
33417e70
FB
1028}
1029
ac1970fb 1030static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
0f0cb164 1031{
89ae337a 1032 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
00752703 1033 AddressSpaceDispatch *d = as->next_dispatch;
99b9cc06 1034 MemoryRegionSection now = *section, remain = *section;
052e87b0 1035 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
0f0cb164 1036
733d5ef5
PB
1037 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1038 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1039 - now.offset_within_address_space;
1040
052e87b0 1041 now.size = int128_min(int128_make64(left), now.size);
ac1970fb 1042 register_subpage(d, &now);
733d5ef5 1043 } else {
052e87b0 1044 now.size = int128_zero();
733d5ef5 1045 }
052e87b0
PB
1046 while (int128_ne(remain.size, now.size)) {
1047 remain.size = int128_sub(remain.size, now.size);
1048 remain.offset_within_address_space += int128_get64(now.size);
1049 remain.offset_within_region += int128_get64(now.size);
69b67646 1050 now = remain;
052e87b0 1051 if (int128_lt(remain.size, page_size)) {
733d5ef5 1052 register_subpage(d, &now);
88266249 1053 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
052e87b0 1054 now.size = page_size;
ac1970fb 1055 register_subpage(d, &now);
69b67646 1056 } else {
052e87b0 1057 now.size = int128_and(now.size, int128_neg(page_size));
ac1970fb 1058 register_multipage(d, &now);
69b67646 1059 }
0f0cb164
AK
1060 }
1061}
1062
62a2744c
SY
1063void qemu_flush_coalesced_mmio_buffer(void)
1064{
1065 if (kvm_enabled())
1066 kvm_flush_coalesced_mmio_buffer();
1067}
1068
b2a8658e
UD
1069void qemu_mutex_lock_ramlist(void)
1070{
1071 qemu_mutex_lock(&ram_list.mutex);
1072}
1073
1074void qemu_mutex_unlock_ramlist(void)
1075{
1076 qemu_mutex_unlock(&ram_list.mutex);
1077}
1078
e1e84ba0 1079#ifdef __linux__
c902760f
MT
1080
1081#include <sys/vfs.h>
1082
1083#define HUGETLBFS_MAGIC 0x958458f6
1084
fc7a5800 1085static long gethugepagesize(const char *path, Error **errp)
c902760f
MT
1086{
1087 struct statfs fs;
1088 int ret;
1089
1090 do {
9742bf26 1091 ret = statfs(path, &fs);
c902760f
MT
1092 } while (ret != 0 && errno == EINTR);
1093
1094 if (ret != 0) {
fc7a5800
HT
1095 error_setg_errno(errp, errno, "failed to get page size of file %s",
1096 path);
9742bf26 1097 return 0;
c902760f
MT
1098 }
1099
1100 if (fs.f_type != HUGETLBFS_MAGIC)
9742bf26 1101 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
c902760f
MT
1102
1103 return fs.f_bsize;
1104}
1105
04b16653
AW
1106static void *file_ram_alloc(RAMBlock *block,
1107 ram_addr_t memory,
7f56e740
PB
1108 const char *path,
1109 Error **errp)
c902760f
MT
1110{
1111 char *filename;
8ca761f6
PF
1112 char *sanitized_name;
1113 char *c;
557529dd 1114 void *area = NULL;
c902760f 1115 int fd;
557529dd 1116 uint64_t hpagesize;
fc7a5800 1117 Error *local_err = NULL;
c902760f 1118
fc7a5800
HT
1119 hpagesize = gethugepagesize(path, &local_err);
1120 if (local_err) {
1121 error_propagate(errp, local_err);
f9a49dfa 1122 goto error;
c902760f 1123 }
a2b257d6 1124 block->mr->align = hpagesize;
c902760f
MT
1125
1126 if (memory < hpagesize) {
557529dd
HT
1127 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1128 "or larger than huge page size 0x%" PRIx64,
1129 memory, hpagesize);
1130 goto error;
c902760f
MT
1131 }
1132
1133 if (kvm_enabled() && !kvm_has_sync_mmu()) {
7f56e740
PB
1134 error_setg(errp,
1135 "host lacks kvm mmu notifiers, -mem-path unsupported");
f9a49dfa 1136 goto error;
c902760f
MT
1137 }
1138
8ca761f6 1139 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
83234bf2 1140 sanitized_name = g_strdup(memory_region_name(block->mr));
8ca761f6
PF
1141 for (c = sanitized_name; *c != '\0'; c++) {
1142 if (*c == '/')
1143 *c = '_';
1144 }
1145
1146 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1147 sanitized_name);
1148 g_free(sanitized_name);
c902760f
MT
1149
1150 fd = mkstemp(filename);
1151 if (fd < 0) {
7f56e740
PB
1152 error_setg_errno(errp, errno,
1153 "unable to create backing store for hugepages");
e4ada482 1154 g_free(filename);
f9a49dfa 1155 goto error;
c902760f
MT
1156 }
1157 unlink(filename);
e4ada482 1158 g_free(filename);
c902760f
MT
1159
1160 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1161
1162 /*
1163 * ftruncate is not supported by hugetlbfs in older
1164 * hosts, so don't bother bailing out on errors.
1165 * If anything goes wrong with it under other filesystems,
1166 * mmap will fail.
1167 */
7f56e740 1168 if (ftruncate(fd, memory)) {
9742bf26 1169 perror("ftruncate");
7f56e740 1170 }
c902760f 1171
dbcb8981
PB
1172 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1173 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1174 fd, 0);
c902760f 1175 if (area == MAP_FAILED) {
7f56e740
PB
1176 error_setg_errno(errp, errno,
1177 "unable to map backing store for hugepages");
9742bf26 1178 close(fd);
f9a49dfa 1179 goto error;
c902760f 1180 }
ef36fa14
MT
1181
1182 if (mem_prealloc) {
38183310 1183 os_mem_prealloc(fd, area, memory);
ef36fa14
MT
1184 }
1185
04b16653 1186 block->fd = fd;
c902760f 1187 return area;
f9a49dfa
MT
1188
1189error:
1190 if (mem_prealloc) {
e4d9df4f 1191 error_report("%s\n", error_get_pretty(*errp));
f9a49dfa
MT
1192 exit(1);
1193 }
1194 return NULL;
c902760f
MT
1195}
1196#endif
1197
0dc3f44a 1198/* Called with the ramlist lock held. */
d17b5288 1199static ram_addr_t find_ram_offset(ram_addr_t size)
04b16653
AW
1200{
1201 RAMBlock *block, *next_block;
3e837b2c 1202 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
04b16653 1203
49cd9ac6
SH
1204 assert(size != 0); /* it would hand out same offset multiple times */
1205
0dc3f44a 1206 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
04b16653 1207 return 0;
0d53d9fe 1208 }
04b16653 1209
0dc3f44a 1210 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
f15fbc4b 1211 ram_addr_t end, next = RAM_ADDR_MAX;
04b16653 1212
62be4e3a 1213 end = block->offset + block->max_length;
04b16653 1214
0dc3f44a 1215 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
04b16653
AW
1216 if (next_block->offset >= end) {
1217 next = MIN(next, next_block->offset);
1218 }
1219 }
1220 if (next - end >= size && next - end < mingap) {
3e837b2c 1221 offset = end;
04b16653
AW
1222 mingap = next - end;
1223 }
1224 }
3e837b2c
AW
1225
1226 if (offset == RAM_ADDR_MAX) {
1227 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1228 (uint64_t)size);
1229 abort();
1230 }
1231
04b16653
AW
1232 return offset;
1233}
1234
652d7ec2 1235ram_addr_t last_ram_offset(void)
d17b5288
AW
1236{
1237 RAMBlock *block;
1238 ram_addr_t last = 0;
1239
0dc3f44a
MD
1240 rcu_read_lock();
1241 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
62be4e3a 1242 last = MAX(last, block->offset + block->max_length);
0d53d9fe 1243 }
0dc3f44a 1244 rcu_read_unlock();
d17b5288
AW
1245 return last;
1246}
1247
ddb97f1d
JB
1248static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1249{
1250 int ret;
ddb97f1d
JB
1251
1252 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2ff3de68
MA
1253 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1254 "dump-guest-core", true)) {
ddb97f1d
JB
1255 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1256 if (ret) {
1257 perror("qemu_madvise");
1258 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1259 "but dump_guest_core=off specified\n");
1260 }
1261 }
1262}
1263
0dc3f44a
MD
1264/* Called within an RCU critical section, or while the ramlist lock
1265 * is held.
1266 */
20cfe881 1267static RAMBlock *find_ram_block(ram_addr_t addr)
84b89d78 1268{
20cfe881 1269 RAMBlock *block;
84b89d78 1270
0dc3f44a 1271 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1272 if (block->offset == addr) {
20cfe881 1273 return block;
c5705a77
AK
1274 }
1275 }
20cfe881
HT
1276
1277 return NULL;
1278}
1279
ae3a7047 1280/* Called with iothread lock held. */
20cfe881
HT
1281void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1282{
ae3a7047 1283 RAMBlock *new_block, *block;
20cfe881 1284
0dc3f44a 1285 rcu_read_lock();
ae3a7047 1286 new_block = find_ram_block(addr);
c5705a77
AK
1287 assert(new_block);
1288 assert(!new_block->idstr[0]);
84b89d78 1289
09e5ab63
AL
1290 if (dev) {
1291 char *id = qdev_get_dev_path(dev);
84b89d78
CM
1292 if (id) {
1293 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
7267c094 1294 g_free(id);
84b89d78
CM
1295 }
1296 }
1297 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1298
0dc3f44a 1299 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1300 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
84b89d78
CM
1301 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1302 new_block->idstr);
1303 abort();
1304 }
1305 }
0dc3f44a 1306 rcu_read_unlock();
c5705a77
AK
1307}
1308
ae3a7047 1309/* Called with iothread lock held. */
20cfe881
HT
1310void qemu_ram_unset_idstr(ram_addr_t addr)
1311{
ae3a7047 1312 RAMBlock *block;
20cfe881 1313
ae3a7047
MD
1314 /* FIXME: arch_init.c assumes that this is not called throughout
1315 * migration. Ignore the problem since hot-unplug during migration
1316 * does not work anyway.
1317 */
1318
0dc3f44a 1319 rcu_read_lock();
ae3a7047 1320 block = find_ram_block(addr);
20cfe881
HT
1321 if (block) {
1322 memset(block->idstr, 0, sizeof(block->idstr));
1323 }
0dc3f44a 1324 rcu_read_unlock();
20cfe881
HT
1325}
1326
8490fc78
LC
1327static int memory_try_enable_merging(void *addr, size_t len)
1328{
2ff3de68 1329 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
8490fc78
LC
1330 /* disabled by the user */
1331 return 0;
1332 }
1333
1334 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1335}
1336
62be4e3a
MT
1337/* Only legal before guest might have detected the memory size: e.g. on
1338 * incoming migration, or right after reset.
1339 *
1340 * As memory core doesn't know how is memory accessed, it is up to
1341 * resize callback to update device state and/or add assertions to detect
1342 * misuse, if necessary.
1343 */
1344int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1345{
1346 RAMBlock *block = find_ram_block(base);
1347
1348 assert(block);
1349
1350 if (block->used_length == newsize) {
1351 return 0;
1352 }
1353
1354 if (!(block->flags & RAM_RESIZEABLE)) {
1355 error_setg_errno(errp, EINVAL,
1356 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1357 " in != 0x" RAM_ADDR_FMT, block->idstr,
1358 newsize, block->used_length);
1359 return -EINVAL;
1360 }
1361
1362 if (block->max_length < newsize) {
1363 error_setg_errno(errp, EINVAL,
1364 "Length too large: %s: 0x" RAM_ADDR_FMT
1365 " > 0x" RAM_ADDR_FMT, block->idstr,
1366 newsize, block->max_length);
1367 return -EINVAL;
1368 }
1369
1370 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1371 block->used_length = newsize;
1372 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1373 memory_region_set_size(block->mr, newsize);
1374 if (block->resized) {
1375 block->resized(block->idstr, newsize, block->host);
1376 }
1377 return 0;
1378}
1379
ef701d7b 1380static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
c5705a77 1381{
e1c57ab8 1382 RAMBlock *block;
0d53d9fe 1383 RAMBlock *last_block = NULL;
2152f5ca
JQ
1384 ram_addr_t old_ram_size, new_ram_size;
1385
1386 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
c5705a77 1387
b2a8658e 1388 qemu_mutex_lock_ramlist();
9b8424d5 1389 new_block->offset = find_ram_offset(new_block->max_length);
e1c57ab8
PB
1390
1391 if (!new_block->host) {
1392 if (xen_enabled()) {
9b8424d5
MT
1393 xen_ram_alloc(new_block->offset, new_block->max_length,
1394 new_block->mr);
e1c57ab8 1395 } else {
9b8424d5 1396 new_block->host = phys_mem_alloc(new_block->max_length,
a2b257d6 1397 &new_block->mr->align);
39228250 1398 if (!new_block->host) {
ef701d7b
HT
1399 error_setg_errno(errp, errno,
1400 "cannot set up guest memory '%s'",
1401 memory_region_name(new_block->mr));
1402 qemu_mutex_unlock_ramlist();
1403 return -1;
39228250 1404 }
9b8424d5 1405 memory_try_enable_merging(new_block->host, new_block->max_length);
6977dfe6 1406 }
c902760f 1407 }
94a6b54f 1408
0d53d9fe
MD
1409 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1410 * QLIST (which has an RCU-friendly variant) does not have insertion at
1411 * tail, so save the last element in last_block.
1412 */
0dc3f44a 1413 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
0d53d9fe 1414 last_block = block;
9b8424d5 1415 if (block->max_length < new_block->max_length) {
abb26d63
PB
1416 break;
1417 }
1418 }
1419 if (block) {
0dc3f44a 1420 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
0d53d9fe 1421 } else if (last_block) {
0dc3f44a 1422 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
0d53d9fe 1423 } else { /* list is empty */
0dc3f44a 1424 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
abb26d63 1425 }
0d6d3c87 1426 ram_list.mru_block = NULL;
94a6b54f 1427
0dc3f44a
MD
1428 /* Write list before version */
1429 smp_wmb();
f798b07f 1430 ram_list.version++;
b2a8658e 1431 qemu_mutex_unlock_ramlist();
f798b07f 1432
2152f5ca
JQ
1433 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1434
1435 if (new_ram_size > old_ram_size) {
1ab4c8ce 1436 int i;
ae3a7047
MD
1437
1438 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1ab4c8ce
JQ
1439 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1440 ram_list.dirty_memory[i] =
1441 bitmap_zero_extend(ram_list.dirty_memory[i],
1442 old_ram_size, new_ram_size);
1443 }
2152f5ca 1444 }
9b8424d5
MT
1445 cpu_physical_memory_set_dirty_range(new_block->offset,
1446 new_block->used_length);
94a6b54f 1447
a904c911
PB
1448 if (new_block->host) {
1449 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1450 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1451 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1452 if (kvm_enabled()) {
1453 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1454 }
e1c57ab8 1455 }
6f0437e8 1456
94a6b54f
PB
1457 return new_block->offset;
1458}
e9a1ab19 1459
0b183fc8 1460#ifdef __linux__
e1c57ab8 1461ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
dbcb8981 1462 bool share, const char *mem_path,
7f56e740 1463 Error **errp)
e1c57ab8
PB
1464{
1465 RAMBlock *new_block;
ef701d7b
HT
1466 ram_addr_t addr;
1467 Error *local_err = NULL;
e1c57ab8
PB
1468
1469 if (xen_enabled()) {
7f56e740
PB
1470 error_setg(errp, "-mem-path not supported with Xen");
1471 return -1;
e1c57ab8
PB
1472 }
1473
1474 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1475 /*
1476 * file_ram_alloc() needs to allocate just like
1477 * phys_mem_alloc, but we haven't bothered to provide
1478 * a hook there.
1479 */
7f56e740
PB
1480 error_setg(errp,
1481 "-mem-path not supported with this accelerator");
1482 return -1;
e1c57ab8
PB
1483 }
1484
1485 size = TARGET_PAGE_ALIGN(size);
1486 new_block = g_malloc0(sizeof(*new_block));
1487 new_block->mr = mr;
9b8424d5
MT
1488 new_block->used_length = size;
1489 new_block->max_length = size;
dbcb8981 1490 new_block->flags = share ? RAM_SHARED : 0;
7f56e740
PB
1491 new_block->host = file_ram_alloc(new_block, size,
1492 mem_path, errp);
1493 if (!new_block->host) {
1494 g_free(new_block);
1495 return -1;
1496 }
1497
ef701d7b
HT
1498 addr = ram_block_add(new_block, &local_err);
1499 if (local_err) {
1500 g_free(new_block);
1501 error_propagate(errp, local_err);
1502 return -1;
1503 }
1504 return addr;
e1c57ab8 1505}
0b183fc8 1506#endif
e1c57ab8 1507
62be4e3a
MT
1508static
1509ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1510 void (*resized)(const char*,
1511 uint64_t length,
1512 void *host),
1513 void *host, bool resizeable,
ef701d7b 1514 MemoryRegion *mr, Error **errp)
e1c57ab8
PB
1515{
1516 RAMBlock *new_block;
ef701d7b
HT
1517 ram_addr_t addr;
1518 Error *local_err = NULL;
e1c57ab8
PB
1519
1520 size = TARGET_PAGE_ALIGN(size);
62be4e3a 1521 max_size = TARGET_PAGE_ALIGN(max_size);
e1c57ab8
PB
1522 new_block = g_malloc0(sizeof(*new_block));
1523 new_block->mr = mr;
62be4e3a 1524 new_block->resized = resized;
9b8424d5
MT
1525 new_block->used_length = size;
1526 new_block->max_length = max_size;
62be4e3a 1527 assert(max_size >= size);
e1c57ab8
PB
1528 new_block->fd = -1;
1529 new_block->host = host;
1530 if (host) {
7bd4f430 1531 new_block->flags |= RAM_PREALLOC;
e1c57ab8 1532 }
62be4e3a
MT
1533 if (resizeable) {
1534 new_block->flags |= RAM_RESIZEABLE;
1535 }
ef701d7b
HT
1536 addr = ram_block_add(new_block, &local_err);
1537 if (local_err) {
1538 g_free(new_block);
1539 error_propagate(errp, local_err);
1540 return -1;
1541 }
1542 return addr;
e1c57ab8
PB
1543}
1544
62be4e3a
MT
1545ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1546 MemoryRegion *mr, Error **errp)
1547{
1548 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1549}
1550
ef701d7b 1551ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
6977dfe6 1552{
62be4e3a
MT
1553 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1554}
1555
1556ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1557 void (*resized)(const char*,
1558 uint64_t length,
1559 void *host),
1560 MemoryRegion *mr, Error **errp)
1561{
1562 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
6977dfe6
YT
1563}
1564
1f2e98b6
AW
1565void qemu_ram_free_from_ptr(ram_addr_t addr)
1566{
1567 RAMBlock *block;
1568
b2a8658e 1569 qemu_mutex_lock_ramlist();
0dc3f44a 1570 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1f2e98b6 1571 if (addr == block->offset) {
0dc3f44a 1572 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1573 ram_list.mru_block = NULL;
0dc3f44a
MD
1574 /* Write list before version */
1575 smp_wmb();
f798b07f 1576 ram_list.version++;
43771539 1577 g_free_rcu(block, rcu);
b2a8658e 1578 break;
1f2e98b6
AW
1579 }
1580 }
b2a8658e 1581 qemu_mutex_unlock_ramlist();
1f2e98b6
AW
1582}
1583
43771539
PB
1584static void reclaim_ramblock(RAMBlock *block)
1585{
1586 if (block->flags & RAM_PREALLOC) {
1587 ;
1588 } else if (xen_enabled()) {
1589 xen_invalidate_map_cache_entry(block->host);
1590#ifndef _WIN32
1591 } else if (block->fd >= 0) {
1592 munmap(block->host, block->max_length);
1593 close(block->fd);
1594#endif
1595 } else {
1596 qemu_anon_ram_free(block->host, block->max_length);
1597 }
1598 g_free(block);
1599}
1600
c227f099 1601void qemu_ram_free(ram_addr_t addr)
e9a1ab19 1602{
04b16653
AW
1603 RAMBlock *block;
1604
b2a8658e 1605 qemu_mutex_lock_ramlist();
0dc3f44a 1606 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
04b16653 1607 if (addr == block->offset) {
0dc3f44a 1608 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1609 ram_list.mru_block = NULL;
0dc3f44a
MD
1610 /* Write list before version */
1611 smp_wmb();
f798b07f 1612 ram_list.version++;
43771539 1613 call_rcu(block, reclaim_ramblock, rcu);
b2a8658e 1614 break;
04b16653
AW
1615 }
1616 }
b2a8658e 1617 qemu_mutex_unlock_ramlist();
e9a1ab19
FB
1618}
1619
cd19cfa2
HY
1620#ifndef _WIN32
1621void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1622{
1623 RAMBlock *block;
1624 ram_addr_t offset;
1625 int flags;
1626 void *area, *vaddr;
1627
0dc3f44a 1628 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
cd19cfa2 1629 offset = addr - block->offset;
9b8424d5 1630 if (offset < block->max_length) {
1240be24 1631 vaddr = ramblock_ptr(block, offset);
7bd4f430 1632 if (block->flags & RAM_PREALLOC) {
cd19cfa2 1633 ;
dfeaf2ab
MA
1634 } else if (xen_enabled()) {
1635 abort();
cd19cfa2
HY
1636 } else {
1637 flags = MAP_FIXED;
1638 munmap(vaddr, length);
3435f395 1639 if (block->fd >= 0) {
dbcb8981
PB
1640 flags |= (block->flags & RAM_SHARED ?
1641 MAP_SHARED : MAP_PRIVATE);
3435f395
MA
1642 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1643 flags, block->fd, offset);
cd19cfa2 1644 } else {
2eb9fbaa
MA
1645 /*
1646 * Remap needs to match alloc. Accelerators that
1647 * set phys_mem_alloc never remap. If they did,
1648 * we'd need a remap hook here.
1649 */
1650 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1651
cd19cfa2
HY
1652 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1653 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1654 flags, -1, 0);
cd19cfa2
HY
1655 }
1656 if (area != vaddr) {
f15fbc4b
AP
1657 fprintf(stderr, "Could not remap addr: "
1658 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
cd19cfa2
HY
1659 length, addr);
1660 exit(1);
1661 }
8490fc78 1662 memory_try_enable_merging(vaddr, length);
ddb97f1d 1663 qemu_ram_setup_dump(vaddr, length);
cd19cfa2 1664 }
cd19cfa2
HY
1665 }
1666 }
1667}
1668#endif /* !_WIN32 */
1669
a35ba7be
PB
1670int qemu_get_ram_fd(ram_addr_t addr)
1671{
ae3a7047
MD
1672 RAMBlock *block;
1673 int fd;
a35ba7be 1674
0dc3f44a 1675 rcu_read_lock();
ae3a7047
MD
1676 block = qemu_get_ram_block(addr);
1677 fd = block->fd;
0dc3f44a 1678 rcu_read_unlock();
ae3a7047 1679 return fd;
a35ba7be
PB
1680}
1681
3fd74b84
DM
1682void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1683{
ae3a7047
MD
1684 RAMBlock *block;
1685 void *ptr;
3fd74b84 1686
0dc3f44a 1687 rcu_read_lock();
ae3a7047
MD
1688 block = qemu_get_ram_block(addr);
1689 ptr = ramblock_ptr(block, 0);
0dc3f44a 1690 rcu_read_unlock();
ae3a7047 1691 return ptr;
3fd74b84
DM
1692}
1693
1b5ec234 1694/* Return a host pointer to ram allocated with qemu_ram_alloc.
ae3a7047
MD
1695 * This should not be used for general purpose DMA. Use address_space_map
1696 * or address_space_rw instead. For local memory (e.g. video ram) that the
1697 * device owns, use memory_region_get_ram_ptr.
0dc3f44a
MD
1698 *
1699 * By the time this function returns, the returned pointer is not protected
1700 * by RCU anymore. If the caller is not within an RCU critical section and
1701 * does not hold the iothread lock, it must have other means of protecting the
1702 * pointer, such as a reference to the region that includes the incoming
1703 * ram_addr_t.
1b5ec234
PB
1704 */
1705void *qemu_get_ram_ptr(ram_addr_t addr)
1706{
ae3a7047
MD
1707 RAMBlock *block;
1708 void *ptr;
1b5ec234 1709
0dc3f44a 1710 rcu_read_lock();
ae3a7047
MD
1711 block = qemu_get_ram_block(addr);
1712
1713 if (xen_enabled() && block->host == NULL) {
0d6d3c87
PB
1714 /* We need to check if the requested address is in the RAM
1715 * because we don't want to map the entire memory in QEMU.
1716 * In that case just map until the end of the page.
1717 */
1718 if (block->offset == 0) {
ae3a7047 1719 ptr = xen_map_cache(addr, 0, 0);
0dc3f44a 1720 goto unlock;
0d6d3c87 1721 }
ae3a7047
MD
1722
1723 block->host = xen_map_cache(block->offset, block->max_length, 1);
0d6d3c87 1724 }
ae3a7047
MD
1725 ptr = ramblock_ptr(block, addr - block->offset);
1726
0dc3f44a
MD
1727unlock:
1728 rcu_read_unlock();
ae3a7047 1729 return ptr;
dc828ca1
PB
1730}
1731
38bee5dc 1732/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
ae3a7047 1733 * but takes a size argument.
0dc3f44a
MD
1734 *
1735 * By the time this function returns, the returned pointer is not protected
1736 * by RCU anymore. If the caller is not within an RCU critical section and
1737 * does not hold the iothread lock, it must have other means of protecting the
1738 * pointer, such as a reference to the region that includes the incoming
1739 * ram_addr_t.
ae3a7047 1740 */
cb85f7ab 1741static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
38bee5dc 1742{
ae3a7047 1743 void *ptr;
8ab934f9
SS
1744 if (*size == 0) {
1745 return NULL;
1746 }
868bb33f 1747 if (xen_enabled()) {
e41d7c69 1748 return xen_map_cache(addr, *size, 1);
868bb33f 1749 } else {
38bee5dc 1750 RAMBlock *block;
0dc3f44a
MD
1751 rcu_read_lock();
1752 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5
MT
1753 if (addr - block->offset < block->max_length) {
1754 if (addr - block->offset + *size > block->max_length)
1755 *size = block->max_length - addr + block->offset;
ae3a7047 1756 ptr = ramblock_ptr(block, addr - block->offset);
0dc3f44a 1757 rcu_read_unlock();
ae3a7047 1758 return ptr;
38bee5dc
SS
1759 }
1760 }
1761
1762 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1763 abort();
38bee5dc
SS
1764 }
1765}
1766
7443b437 1767/* Some of the softmmu routines need to translate from a host pointer
ae3a7047
MD
1768 * (typically a TLB entry) back to a ram offset.
1769 *
1770 * By the time this function returns, the returned pointer is not protected
1771 * by RCU anymore. If the caller is not within an RCU critical section and
1772 * does not hold the iothread lock, it must have other means of protecting the
1773 * pointer, such as a reference to the region that includes the incoming
1774 * ram_addr_t.
1775 */
1b5ec234 1776MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
5579c7f3 1777{
94a6b54f
PB
1778 RAMBlock *block;
1779 uint8_t *host = ptr;
ae3a7047 1780 MemoryRegion *mr;
94a6b54f 1781
868bb33f 1782 if (xen_enabled()) {
0dc3f44a 1783 rcu_read_lock();
e41d7c69 1784 *ram_addr = xen_ram_addr_from_mapcache(ptr);
ae3a7047 1785 mr = qemu_get_ram_block(*ram_addr)->mr;
0dc3f44a 1786 rcu_read_unlock();
ae3a7047 1787 return mr;
712c2b41
SS
1788 }
1789
0dc3f44a
MD
1790 rcu_read_lock();
1791 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 1792 if (block && block->host && host - block->host < block->max_length) {
23887b79
PB
1793 goto found;
1794 }
1795
0dc3f44a 1796 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
432d268c
JN
1797 /* This case append when the block is not mapped. */
1798 if (block->host == NULL) {
1799 continue;
1800 }
9b8424d5 1801 if (host - block->host < block->max_length) {
23887b79 1802 goto found;
f471a17e 1803 }
94a6b54f 1804 }
432d268c 1805
0dc3f44a 1806 rcu_read_unlock();
1b5ec234 1807 return NULL;
23887b79
PB
1808
1809found:
1810 *ram_addr = block->offset + (host - block->host);
ae3a7047 1811 mr = block->mr;
0dc3f44a 1812 rcu_read_unlock();
ae3a7047 1813 return mr;
e890261f 1814}
f471a17e 1815
a8170e5e 1816static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
0e0df1e2 1817 uint64_t val, unsigned size)
9fa3e853 1818{
52159192 1819 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
0e0df1e2 1820 tb_invalidate_phys_page_fast(ram_addr, size);
3a7d929e 1821 }
0e0df1e2
AK
1822 switch (size) {
1823 case 1:
1824 stb_p(qemu_get_ram_ptr(ram_addr), val);
1825 break;
1826 case 2:
1827 stw_p(qemu_get_ram_ptr(ram_addr), val);
1828 break;
1829 case 4:
1830 stl_p(qemu_get_ram_ptr(ram_addr), val);
1831 break;
1832 default:
1833 abort();
3a7d929e 1834 }
6886867e 1835 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
f23db169
FB
1836 /* we remove the notdirty callback only if the code has been
1837 flushed */
a2cd8c85 1838 if (!cpu_physical_memory_is_clean(ram_addr)) {
4917cf44 1839 CPUArchState *env = current_cpu->env_ptr;
93afeade 1840 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
4917cf44 1841 }
9fa3e853
FB
1842}
1843
b018ddf6
PB
1844static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1845 unsigned size, bool is_write)
1846{
1847 return is_write;
1848}
1849
0e0df1e2 1850static const MemoryRegionOps notdirty_mem_ops = {
0e0df1e2 1851 .write = notdirty_mem_write,
b018ddf6 1852 .valid.accepts = notdirty_mem_accepts,
0e0df1e2 1853 .endianness = DEVICE_NATIVE_ENDIAN,
1ccde1cb
FB
1854};
1855
0f459d16 1856/* Generate a debug exception if a watchpoint has been hit. */
05068c0d 1857static void check_watchpoint(int offset, int len, int flags)
0f459d16 1858{
93afeade
AF
1859 CPUState *cpu = current_cpu;
1860 CPUArchState *env = cpu->env_ptr;
06d55cc1 1861 target_ulong pc, cs_base;
0f459d16 1862 target_ulong vaddr;
a1d1bb31 1863 CPUWatchpoint *wp;
06d55cc1 1864 int cpu_flags;
0f459d16 1865
ff4700b0 1866 if (cpu->watchpoint_hit) {
06d55cc1
AL
1867 /* We re-entered the check after replacing the TB. Now raise
1868 * the debug interrupt so that is will trigger after the
1869 * current instruction. */
93afeade 1870 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
06d55cc1
AL
1871 return;
1872 }
93afeade 1873 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
ff4700b0 1874 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d
PM
1875 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1876 && (wp->flags & flags)) {
08225676
PM
1877 if (flags == BP_MEM_READ) {
1878 wp->flags |= BP_WATCHPOINT_HIT_READ;
1879 } else {
1880 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1881 }
1882 wp->hitaddr = vaddr;
ff4700b0
AF
1883 if (!cpu->watchpoint_hit) {
1884 cpu->watchpoint_hit = wp;
239c51a5 1885 tb_check_watchpoint(cpu);
6e140f28 1886 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
27103424 1887 cpu->exception_index = EXCP_DEBUG;
5638d180 1888 cpu_loop_exit(cpu);
6e140f28
AL
1889 } else {
1890 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
648f034c 1891 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
0ea8cb88 1892 cpu_resume_from_signal(cpu, NULL);
6e140f28 1893 }
06d55cc1 1894 }
6e140f28
AL
1895 } else {
1896 wp->flags &= ~BP_WATCHPOINT_HIT;
0f459d16
PB
1897 }
1898 }
1899}
1900
6658ffb8
PB
1901/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1902 so these check for a hit then pass through to the normal out-of-line
1903 phys routines. */
a8170e5e 1904static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1ec9b909 1905 unsigned size)
6658ffb8 1906{
05068c0d 1907 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1ec9b909 1908 switch (size) {
2c17449b 1909 case 1: return ldub_phys(&address_space_memory, addr);
41701aa4 1910 case 2: return lduw_phys(&address_space_memory, addr);
fdfba1a2 1911 case 4: return ldl_phys(&address_space_memory, addr);
1ec9b909
AK
1912 default: abort();
1913 }
6658ffb8
PB
1914}
1915
a8170e5e 1916static void watch_mem_write(void *opaque, hwaddr addr,
1ec9b909 1917 uint64_t val, unsigned size)
6658ffb8 1918{
05068c0d 1919 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1ec9b909 1920 switch (size) {
67364150 1921 case 1:
db3be60d 1922 stb_phys(&address_space_memory, addr, val);
67364150
MF
1923 break;
1924 case 2:
5ce5944d 1925 stw_phys(&address_space_memory, addr, val);
67364150
MF
1926 break;
1927 case 4:
ab1da857 1928 stl_phys(&address_space_memory, addr, val);
67364150 1929 break;
1ec9b909
AK
1930 default: abort();
1931 }
6658ffb8
PB
1932}
1933
1ec9b909
AK
1934static const MemoryRegionOps watch_mem_ops = {
1935 .read = watch_mem_read,
1936 .write = watch_mem_write,
1937 .endianness = DEVICE_NATIVE_ENDIAN,
6658ffb8 1938};
6658ffb8 1939
a8170e5e 1940static uint64_t subpage_read(void *opaque, hwaddr addr,
70c68e44 1941 unsigned len)
db7b5426 1942{
acc9d80b 1943 subpage_t *subpage = opaque;
ff6cff75 1944 uint8_t buf[8];
791af8c8 1945
db7b5426 1946#if defined(DEBUG_SUBPAGE)
016e9d62 1947 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
acc9d80b 1948 subpage, len, addr);
db7b5426 1949#endif
acc9d80b
JK
1950 address_space_read(subpage->as, addr + subpage->base, buf, len);
1951 switch (len) {
1952 case 1:
1953 return ldub_p(buf);
1954 case 2:
1955 return lduw_p(buf);
1956 case 4:
1957 return ldl_p(buf);
ff6cff75
PB
1958 case 8:
1959 return ldq_p(buf);
acc9d80b
JK
1960 default:
1961 abort();
1962 }
db7b5426
BS
1963}
1964
a8170e5e 1965static void subpage_write(void *opaque, hwaddr addr,
70c68e44 1966 uint64_t value, unsigned len)
db7b5426 1967{
acc9d80b 1968 subpage_t *subpage = opaque;
ff6cff75 1969 uint8_t buf[8];
acc9d80b 1970
db7b5426 1971#if defined(DEBUG_SUBPAGE)
016e9d62 1972 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
acc9d80b
JK
1973 " value %"PRIx64"\n",
1974 __func__, subpage, len, addr, value);
db7b5426 1975#endif
acc9d80b
JK
1976 switch (len) {
1977 case 1:
1978 stb_p(buf, value);
1979 break;
1980 case 2:
1981 stw_p(buf, value);
1982 break;
1983 case 4:
1984 stl_p(buf, value);
1985 break;
ff6cff75
PB
1986 case 8:
1987 stq_p(buf, value);
1988 break;
acc9d80b
JK
1989 default:
1990 abort();
1991 }
1992 address_space_write(subpage->as, addr + subpage->base, buf, len);
db7b5426
BS
1993}
1994
c353e4cc 1995static bool subpage_accepts(void *opaque, hwaddr addr,
016e9d62 1996 unsigned len, bool is_write)
c353e4cc 1997{
acc9d80b 1998 subpage_t *subpage = opaque;
c353e4cc 1999#if defined(DEBUG_SUBPAGE)
016e9d62 2000 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
acc9d80b 2001 __func__, subpage, is_write ? 'w' : 'r', len, addr);
c353e4cc
PB
2002#endif
2003
acc9d80b 2004 return address_space_access_valid(subpage->as, addr + subpage->base,
016e9d62 2005 len, is_write);
c353e4cc
PB
2006}
2007
70c68e44
AK
2008static const MemoryRegionOps subpage_ops = {
2009 .read = subpage_read,
2010 .write = subpage_write,
ff6cff75
PB
2011 .impl.min_access_size = 1,
2012 .impl.max_access_size = 8,
2013 .valid.min_access_size = 1,
2014 .valid.max_access_size = 8,
c353e4cc 2015 .valid.accepts = subpage_accepts,
70c68e44 2016 .endianness = DEVICE_NATIVE_ENDIAN,
db7b5426
BS
2017};
2018
c227f099 2019static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 2020 uint16_t section)
db7b5426
BS
2021{
2022 int idx, eidx;
2023
2024 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2025 return -1;
2026 idx = SUBPAGE_IDX(start);
2027 eidx = SUBPAGE_IDX(end);
2028#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2029 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2030 __func__, mmio, start, end, idx, eidx, section);
db7b5426 2031#endif
db7b5426 2032 for (; idx <= eidx; idx++) {
5312bd8b 2033 mmio->sub_section[idx] = section;
db7b5426
BS
2034 }
2035
2036 return 0;
2037}
2038
acc9d80b 2039static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
db7b5426 2040{
c227f099 2041 subpage_t *mmio;
db7b5426 2042
7267c094 2043 mmio = g_malloc0(sizeof(subpage_t));
1eec614b 2044
acc9d80b 2045 mmio->as = as;
1eec614b 2046 mmio->base = base;
2c9b15ca 2047 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
b4fefef9 2048 NULL, TARGET_PAGE_SIZE);
b3b00c78 2049 mmio->iomem.subpage = true;
db7b5426 2050#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2051 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2052 mmio, base, TARGET_PAGE_SIZE);
db7b5426 2053#endif
b41aac4f 2054 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
db7b5426
BS
2055
2056 return mmio;
2057}
2058
a656e22f
PC
2059static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2060 MemoryRegion *mr)
5312bd8b 2061{
a656e22f 2062 assert(as);
5312bd8b 2063 MemoryRegionSection section = {
a656e22f 2064 .address_space = as,
5312bd8b
AK
2065 .mr = mr,
2066 .offset_within_address_space = 0,
2067 .offset_within_region = 0,
052e87b0 2068 .size = int128_2_64(),
5312bd8b
AK
2069 };
2070
53cb28cb 2071 return phys_section_add(map, &section);
5312bd8b
AK
2072}
2073
9d82b5a7 2074MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
aa102231 2075{
79e2b9ae
PB
2076 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2077 MemoryRegionSection *sections = d->map.sections;
9d82b5a7
PB
2078
2079 return sections[index & ~TARGET_PAGE_MASK].mr;
aa102231
AK
2080}
2081
e9179ce1
AK
2082static void io_mem_init(void)
2083{
1f6245e5 2084 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2c9b15ca 2085 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1f6245e5 2086 NULL, UINT64_MAX);
2c9b15ca 2087 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1f6245e5 2088 NULL, UINT64_MAX);
2c9b15ca 2089 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1f6245e5 2090 NULL, UINT64_MAX);
e9179ce1
AK
2091}
2092
ac1970fb 2093static void mem_begin(MemoryListener *listener)
00752703
PB
2094{
2095 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
53cb28cb
MA
2096 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2097 uint16_t n;
2098
a656e22f 2099 n = dummy_section(&d->map, as, &io_mem_unassigned);
53cb28cb 2100 assert(n == PHYS_SECTION_UNASSIGNED);
a656e22f 2101 n = dummy_section(&d->map, as, &io_mem_notdirty);
53cb28cb 2102 assert(n == PHYS_SECTION_NOTDIRTY);
a656e22f 2103 n = dummy_section(&d->map, as, &io_mem_rom);
53cb28cb 2104 assert(n == PHYS_SECTION_ROM);
a656e22f 2105 n = dummy_section(&d->map, as, &io_mem_watch);
53cb28cb 2106 assert(n == PHYS_SECTION_WATCH);
00752703 2107
9736e55b 2108 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
00752703
PB
2109 d->as = as;
2110 as->next_dispatch = d;
2111}
2112
79e2b9ae
PB
2113static void address_space_dispatch_free(AddressSpaceDispatch *d)
2114{
2115 phys_sections_free(&d->map);
2116 g_free(d);
2117}
2118
00752703 2119static void mem_commit(MemoryListener *listener)
ac1970fb 2120{
89ae337a 2121 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
0475d94f
PB
2122 AddressSpaceDispatch *cur = as->dispatch;
2123 AddressSpaceDispatch *next = as->next_dispatch;
2124
53cb28cb 2125 phys_page_compact_all(next, next->map.nodes_nb);
b35ba30f 2126
79e2b9ae 2127 atomic_rcu_set(&as->dispatch, next);
53cb28cb 2128 if (cur) {
79e2b9ae 2129 call_rcu(cur, address_space_dispatch_free, rcu);
53cb28cb 2130 }
9affd6fc
PB
2131}
2132
1d71148e 2133static void tcg_commit(MemoryListener *listener)
50c1e149 2134{
182735ef 2135 CPUState *cpu;
117712c3
AK
2136
2137 /* since each CPU stores ram addresses in its TLB cache, we must
2138 reset the modified entries */
2139 /* XXX: slow ! */
bdc44640 2140 CPU_FOREACH(cpu) {
33bde2e1
EI
2141 /* FIXME: Disentangle the cpu.h circular files deps so we can
2142 directly get the right CPU from listener. */
2143 if (cpu->tcg_as_listener != listener) {
2144 continue;
2145 }
76e5c76f 2146 cpu_reload_memory_map(cpu);
117712c3 2147 }
50c1e149
AK
2148}
2149
93632747
AK
2150static void core_log_global_start(MemoryListener *listener)
2151{
981fdf23 2152 cpu_physical_memory_set_dirty_tracking(true);
93632747
AK
2153}
2154
2155static void core_log_global_stop(MemoryListener *listener)
2156{
981fdf23 2157 cpu_physical_memory_set_dirty_tracking(false);
93632747
AK
2158}
2159
93632747 2160static MemoryListener core_memory_listener = {
93632747
AK
2161 .log_global_start = core_log_global_start,
2162 .log_global_stop = core_log_global_stop,
ac1970fb 2163 .priority = 1,
93632747
AK
2164};
2165
ac1970fb
AK
2166void address_space_init_dispatch(AddressSpace *as)
2167{
00752703 2168 as->dispatch = NULL;
89ae337a 2169 as->dispatch_listener = (MemoryListener) {
ac1970fb 2170 .begin = mem_begin,
00752703 2171 .commit = mem_commit,
ac1970fb
AK
2172 .region_add = mem_add,
2173 .region_nop = mem_add,
2174 .priority = 0,
2175 };
89ae337a 2176 memory_listener_register(&as->dispatch_listener, as);
ac1970fb
AK
2177}
2178
6e48e8f9
PB
2179void address_space_unregister(AddressSpace *as)
2180{
2181 memory_listener_unregister(&as->dispatch_listener);
2182}
2183
83f3c251
AK
2184void address_space_destroy_dispatch(AddressSpace *as)
2185{
2186 AddressSpaceDispatch *d = as->dispatch;
2187
79e2b9ae
PB
2188 atomic_rcu_set(&as->dispatch, NULL);
2189 if (d) {
2190 call_rcu(d, address_space_dispatch_free, rcu);
2191 }
83f3c251
AK
2192}
2193
62152b8a
AK
2194static void memory_map_init(void)
2195{
7267c094 2196 system_memory = g_malloc(sizeof(*system_memory));
03f49957 2197
57271d63 2198 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
7dca8043 2199 address_space_init(&address_space_memory, system_memory, "memory");
309cb471 2200
7267c094 2201 system_io = g_malloc(sizeof(*system_io));
3bb28b72
JK
2202 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2203 65536);
7dca8043 2204 address_space_init(&address_space_io, system_io, "I/O");
93632747 2205
f6790af6 2206 memory_listener_register(&core_memory_listener, &address_space_memory);
62152b8a
AK
2207}
2208
2209MemoryRegion *get_system_memory(void)
2210{
2211 return system_memory;
2212}
2213
309cb471
AK
2214MemoryRegion *get_system_io(void)
2215{
2216 return system_io;
2217}
2218
e2eef170
PB
2219#endif /* !defined(CONFIG_USER_ONLY) */
2220
13eb76e0
FB
2221/* physical memory access (slow version, mainly for debug) */
2222#if defined(CONFIG_USER_ONLY)
f17ec444 2223int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
a68fe89c 2224 uint8_t *buf, int len, int is_write)
13eb76e0
FB
2225{
2226 int l, flags;
2227 target_ulong page;
53a5960a 2228 void * p;
13eb76e0
FB
2229
2230 while (len > 0) {
2231 page = addr & TARGET_PAGE_MASK;
2232 l = (page + TARGET_PAGE_SIZE) - addr;
2233 if (l > len)
2234 l = len;
2235 flags = page_get_flags(page);
2236 if (!(flags & PAGE_VALID))
a68fe89c 2237 return -1;
13eb76e0
FB
2238 if (is_write) {
2239 if (!(flags & PAGE_WRITE))
a68fe89c 2240 return -1;
579a97f7 2241 /* XXX: this code should not depend on lock_user */
72fb7daa 2242 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
a68fe89c 2243 return -1;
72fb7daa
AJ
2244 memcpy(p, buf, l);
2245 unlock_user(p, addr, l);
13eb76e0
FB
2246 } else {
2247 if (!(flags & PAGE_READ))
a68fe89c 2248 return -1;
579a97f7 2249 /* XXX: this code should not depend on lock_user */
72fb7daa 2250 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
a68fe89c 2251 return -1;
72fb7daa 2252 memcpy(buf, p, l);
5b257578 2253 unlock_user(p, addr, 0);
13eb76e0
FB
2254 }
2255 len -= l;
2256 buf += l;
2257 addr += l;
2258 }
a68fe89c 2259 return 0;
13eb76e0 2260}
8df1cd07 2261
13eb76e0 2262#else
51d7a9eb 2263
a8170e5e
AK
2264static void invalidate_and_set_dirty(hwaddr addr,
2265 hwaddr length)
51d7a9eb 2266{
f874bf90
PM
2267 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2268 tb_invalidate_phys_range(addr, addr + length, 0);
6886867e 2269 cpu_physical_memory_set_dirty_range_nocode(addr, length);
51d7a9eb 2270 }
e226939d 2271 xen_modified_memory(addr, length);
51d7a9eb
AP
2272}
2273
23326164 2274static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
82f2563f 2275{
e1622f4b 2276 unsigned access_size_max = mr->ops->valid.max_access_size;
23326164
RH
2277
2278 /* Regions are assumed to support 1-4 byte accesses unless
2279 otherwise specified. */
23326164
RH
2280 if (access_size_max == 0) {
2281 access_size_max = 4;
2282 }
2283
2284 /* Bound the maximum access by the alignment of the address. */
2285 if (!mr->ops->impl.unaligned) {
2286 unsigned align_size_max = addr & -addr;
2287 if (align_size_max != 0 && align_size_max < access_size_max) {
2288 access_size_max = align_size_max;
2289 }
82f2563f 2290 }
23326164
RH
2291
2292 /* Don't attempt accesses larger than the maximum. */
2293 if (l > access_size_max) {
2294 l = access_size_max;
82f2563f 2295 }
098178f2
PB
2296 if (l & (l - 1)) {
2297 l = 1 << (qemu_fls(l) - 1);
2298 }
23326164
RH
2299
2300 return l;
82f2563f
PB
2301}
2302
fd8aaa76 2303bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
ac1970fb 2304 int len, bool is_write)
13eb76e0 2305{
149f54b5 2306 hwaddr l;
13eb76e0 2307 uint8_t *ptr;
791af8c8 2308 uint64_t val;
149f54b5 2309 hwaddr addr1;
5c8a00ce 2310 MemoryRegion *mr;
fd8aaa76 2311 bool error = false;
3b46e624 2312
13eb76e0 2313 while (len > 0) {
149f54b5 2314 l = len;
5c8a00ce 2315 mr = address_space_translate(as, addr, &addr1, &l, is_write);
3b46e624 2316
13eb76e0 2317 if (is_write) {
5c8a00ce
PB
2318 if (!memory_access_is_direct(mr, is_write)) {
2319 l = memory_access_size(mr, l, addr1);
4917cf44 2320 /* XXX: could force current_cpu to NULL to avoid
6a00d601 2321 potential bugs */
23326164
RH
2322 switch (l) {
2323 case 8:
2324 /* 64 bit write access */
2325 val = ldq_p(buf);
2326 error |= io_mem_write(mr, addr1, val, 8);
2327 break;
2328 case 4:
1c213d19 2329 /* 32 bit write access */
c27004ec 2330 val = ldl_p(buf);
5c8a00ce 2331 error |= io_mem_write(mr, addr1, val, 4);
23326164
RH
2332 break;
2333 case 2:
1c213d19 2334 /* 16 bit write access */
c27004ec 2335 val = lduw_p(buf);
5c8a00ce 2336 error |= io_mem_write(mr, addr1, val, 2);
23326164
RH
2337 break;
2338 case 1:
1c213d19 2339 /* 8 bit write access */
c27004ec 2340 val = ldub_p(buf);
5c8a00ce 2341 error |= io_mem_write(mr, addr1, val, 1);
23326164
RH
2342 break;
2343 default:
2344 abort();
13eb76e0 2345 }
2bbfa05d 2346 } else {
5c8a00ce 2347 addr1 += memory_region_get_ram_addr(mr);
13eb76e0 2348 /* RAM case */
5579c7f3 2349 ptr = qemu_get_ram_ptr(addr1);
13eb76e0 2350 memcpy(ptr, buf, l);
51d7a9eb 2351 invalidate_and_set_dirty(addr1, l);
13eb76e0
FB
2352 }
2353 } else {
5c8a00ce 2354 if (!memory_access_is_direct(mr, is_write)) {
13eb76e0 2355 /* I/O case */
5c8a00ce 2356 l = memory_access_size(mr, l, addr1);
23326164
RH
2357 switch (l) {
2358 case 8:
2359 /* 64 bit read access */
2360 error |= io_mem_read(mr, addr1, &val, 8);
2361 stq_p(buf, val);
2362 break;
2363 case 4:
13eb76e0 2364 /* 32 bit read access */
5c8a00ce 2365 error |= io_mem_read(mr, addr1, &val, 4);
c27004ec 2366 stl_p(buf, val);
23326164
RH
2367 break;
2368 case 2:
13eb76e0 2369 /* 16 bit read access */
5c8a00ce 2370 error |= io_mem_read(mr, addr1, &val, 2);
c27004ec 2371 stw_p(buf, val);
23326164
RH
2372 break;
2373 case 1:
1c213d19 2374 /* 8 bit read access */
5c8a00ce 2375 error |= io_mem_read(mr, addr1, &val, 1);
c27004ec 2376 stb_p(buf, val);
23326164
RH
2377 break;
2378 default:
2379 abort();
13eb76e0
FB
2380 }
2381 } else {
2382 /* RAM case */
5c8a00ce 2383 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
f3705d53 2384 memcpy(buf, ptr, l);
13eb76e0
FB
2385 }
2386 }
2387 len -= l;
2388 buf += l;
2389 addr += l;
2390 }
fd8aaa76
PB
2391
2392 return error;
13eb76e0 2393}
8df1cd07 2394
fd8aaa76 2395bool address_space_write(AddressSpace *as, hwaddr addr,
ac1970fb
AK
2396 const uint8_t *buf, int len)
2397{
fd8aaa76 2398 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
ac1970fb
AK
2399}
2400
fd8aaa76 2401bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
ac1970fb 2402{
fd8aaa76 2403 return address_space_rw(as, addr, buf, len, false);
ac1970fb
AK
2404}
2405
2406
a8170e5e 2407void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
ac1970fb
AK
2408 int len, int is_write)
2409{
fd8aaa76 2410 address_space_rw(&address_space_memory, addr, buf, len, is_write);
ac1970fb
AK
2411}
2412
582b55a9
AG
2413enum write_rom_type {
2414 WRITE_DATA,
2415 FLUSH_CACHE,
2416};
2417
2a221651 2418static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
582b55a9 2419 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
d0ecd2aa 2420{
149f54b5 2421 hwaddr l;
d0ecd2aa 2422 uint8_t *ptr;
149f54b5 2423 hwaddr addr1;
5c8a00ce 2424 MemoryRegion *mr;
3b46e624 2425
d0ecd2aa 2426 while (len > 0) {
149f54b5 2427 l = len;
2a221651 2428 mr = address_space_translate(as, addr, &addr1, &l, true);
3b46e624 2429
5c8a00ce
PB
2430 if (!(memory_region_is_ram(mr) ||
2431 memory_region_is_romd(mr))) {
d0ecd2aa
FB
2432 /* do nothing */
2433 } else {
5c8a00ce 2434 addr1 += memory_region_get_ram_addr(mr);
d0ecd2aa 2435 /* ROM/RAM case */
5579c7f3 2436 ptr = qemu_get_ram_ptr(addr1);
582b55a9
AG
2437 switch (type) {
2438 case WRITE_DATA:
2439 memcpy(ptr, buf, l);
2440 invalidate_and_set_dirty(addr1, l);
2441 break;
2442 case FLUSH_CACHE:
2443 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2444 break;
2445 }
d0ecd2aa
FB
2446 }
2447 len -= l;
2448 buf += l;
2449 addr += l;
2450 }
2451}
2452
582b55a9 2453/* used for ROM loading : can write in RAM and ROM */
2a221651 2454void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
582b55a9
AG
2455 const uint8_t *buf, int len)
2456{
2a221651 2457 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
582b55a9
AG
2458}
2459
2460void cpu_flush_icache_range(hwaddr start, int len)
2461{
2462 /*
2463 * This function should do the same thing as an icache flush that was
2464 * triggered from within the guest. For TCG we are always cache coherent,
2465 * so there is no need to flush anything. For KVM / Xen we need to flush
2466 * the host's instruction cache at least.
2467 */
2468 if (tcg_enabled()) {
2469 return;
2470 }
2471
2a221651
EI
2472 cpu_physical_memory_write_rom_internal(&address_space_memory,
2473 start, NULL, len, FLUSH_CACHE);
582b55a9
AG
2474}
2475
6d16c2f8 2476typedef struct {
d3e71559 2477 MemoryRegion *mr;
6d16c2f8 2478 void *buffer;
a8170e5e
AK
2479 hwaddr addr;
2480 hwaddr len;
6d16c2f8
AL
2481} BounceBuffer;
2482
2483static BounceBuffer bounce;
2484
ba223c29
AL
2485typedef struct MapClient {
2486 void *opaque;
2487 void (*callback)(void *opaque);
72cf2d4f 2488 QLIST_ENTRY(MapClient) link;
ba223c29
AL
2489} MapClient;
2490
72cf2d4f
BS
2491static QLIST_HEAD(map_client_list, MapClient) map_client_list
2492 = QLIST_HEAD_INITIALIZER(map_client_list);
ba223c29
AL
2493
2494void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2495{
7267c094 2496 MapClient *client = g_malloc(sizeof(*client));
ba223c29
AL
2497
2498 client->opaque = opaque;
2499 client->callback = callback;
72cf2d4f 2500 QLIST_INSERT_HEAD(&map_client_list, client, link);
ba223c29
AL
2501 return client;
2502}
2503
8b9c99d9 2504static void cpu_unregister_map_client(void *_client)
ba223c29
AL
2505{
2506 MapClient *client = (MapClient *)_client;
2507
72cf2d4f 2508 QLIST_REMOVE(client, link);
7267c094 2509 g_free(client);
ba223c29
AL
2510}
2511
2512static void cpu_notify_map_clients(void)
2513{
2514 MapClient *client;
2515
72cf2d4f
BS
2516 while (!QLIST_EMPTY(&map_client_list)) {
2517 client = QLIST_FIRST(&map_client_list);
ba223c29 2518 client->callback(client->opaque);
34d5e948 2519 cpu_unregister_map_client(client);
ba223c29
AL
2520 }
2521}
2522
51644ab7
PB
2523bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2524{
5c8a00ce 2525 MemoryRegion *mr;
51644ab7
PB
2526 hwaddr l, xlat;
2527
2528 while (len > 0) {
2529 l = len;
5c8a00ce
PB
2530 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2531 if (!memory_access_is_direct(mr, is_write)) {
2532 l = memory_access_size(mr, l, addr);
2533 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
51644ab7
PB
2534 return false;
2535 }
2536 }
2537
2538 len -= l;
2539 addr += l;
2540 }
2541 return true;
2542}
2543
6d16c2f8
AL
2544/* Map a physical memory region into a host virtual address.
2545 * May map a subset of the requested range, given by and returned in *plen.
2546 * May return NULL if resources needed to perform the mapping are exhausted.
2547 * Use only for reads OR writes - not for read-modify-write operations.
ba223c29
AL
2548 * Use cpu_register_map_client() to know when retrying the map operation is
2549 * likely to succeed.
6d16c2f8 2550 */
ac1970fb 2551void *address_space_map(AddressSpace *as,
a8170e5e
AK
2552 hwaddr addr,
2553 hwaddr *plen,
ac1970fb 2554 bool is_write)
6d16c2f8 2555{
a8170e5e 2556 hwaddr len = *plen;
e3127ae0
PB
2557 hwaddr done = 0;
2558 hwaddr l, xlat, base;
2559 MemoryRegion *mr, *this_mr;
2560 ram_addr_t raddr;
6d16c2f8 2561
e3127ae0
PB
2562 if (len == 0) {
2563 return NULL;
2564 }
38bee5dc 2565
e3127ae0
PB
2566 l = len;
2567 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2568 if (!memory_access_is_direct(mr, is_write)) {
2569 if (bounce.buffer) {
2570 return NULL;
6d16c2f8 2571 }
e85d9db5
KW
2572 /* Avoid unbounded allocations */
2573 l = MIN(l, TARGET_PAGE_SIZE);
2574 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
e3127ae0
PB
2575 bounce.addr = addr;
2576 bounce.len = l;
d3e71559
PB
2577
2578 memory_region_ref(mr);
2579 bounce.mr = mr;
e3127ae0
PB
2580 if (!is_write) {
2581 address_space_read(as, addr, bounce.buffer, l);
8ab934f9 2582 }
6d16c2f8 2583
e3127ae0
PB
2584 *plen = l;
2585 return bounce.buffer;
2586 }
2587
2588 base = xlat;
2589 raddr = memory_region_get_ram_addr(mr);
2590
2591 for (;;) {
6d16c2f8
AL
2592 len -= l;
2593 addr += l;
e3127ae0
PB
2594 done += l;
2595 if (len == 0) {
2596 break;
2597 }
2598
2599 l = len;
2600 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2601 if (this_mr != mr || xlat != base + done) {
2602 break;
2603 }
6d16c2f8 2604 }
e3127ae0 2605
d3e71559 2606 memory_region_ref(mr);
e3127ae0
PB
2607 *plen = done;
2608 return qemu_ram_ptr_length(raddr + base, plen);
6d16c2f8
AL
2609}
2610
ac1970fb 2611/* Unmaps a memory region previously mapped by address_space_map().
6d16c2f8
AL
2612 * Will also mark the memory as dirty if is_write == 1. access_len gives
2613 * the amount of memory that was actually read or written by the caller.
2614 */
a8170e5e
AK
2615void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2616 int is_write, hwaddr access_len)
6d16c2f8
AL
2617{
2618 if (buffer != bounce.buffer) {
d3e71559
PB
2619 MemoryRegion *mr;
2620 ram_addr_t addr1;
2621
2622 mr = qemu_ram_addr_from_host(buffer, &addr1);
2623 assert(mr != NULL);
6d16c2f8 2624 if (is_write) {
6886867e 2625 invalidate_and_set_dirty(addr1, access_len);
6d16c2f8 2626 }
868bb33f 2627 if (xen_enabled()) {
e41d7c69 2628 xen_invalidate_map_cache_entry(buffer);
050a0ddf 2629 }
d3e71559 2630 memory_region_unref(mr);
6d16c2f8
AL
2631 return;
2632 }
2633 if (is_write) {
ac1970fb 2634 address_space_write(as, bounce.addr, bounce.buffer, access_len);
6d16c2f8 2635 }
f8a83245 2636 qemu_vfree(bounce.buffer);
6d16c2f8 2637 bounce.buffer = NULL;
d3e71559 2638 memory_region_unref(bounce.mr);
ba223c29 2639 cpu_notify_map_clients();
6d16c2f8 2640}
d0ecd2aa 2641
a8170e5e
AK
2642void *cpu_physical_memory_map(hwaddr addr,
2643 hwaddr *plen,
ac1970fb
AK
2644 int is_write)
2645{
2646 return address_space_map(&address_space_memory, addr, plen, is_write);
2647}
2648
a8170e5e
AK
2649void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2650 int is_write, hwaddr access_len)
ac1970fb
AK
2651{
2652 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2653}
2654
8df1cd07 2655/* warning: addr must be aligned */
fdfba1a2 2656static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2657 enum device_endian endian)
8df1cd07 2658{
8df1cd07 2659 uint8_t *ptr;
791af8c8 2660 uint64_t val;
5c8a00ce 2661 MemoryRegion *mr;
149f54b5
PB
2662 hwaddr l = 4;
2663 hwaddr addr1;
8df1cd07 2664
fdfba1a2 2665 mr = address_space_translate(as, addr, &addr1, &l, false);
5c8a00ce 2666 if (l < 4 || !memory_access_is_direct(mr, false)) {
8df1cd07 2667 /* I/O case */
5c8a00ce 2668 io_mem_read(mr, addr1, &val, 4);
1e78bcc1
AG
2669#if defined(TARGET_WORDS_BIGENDIAN)
2670 if (endian == DEVICE_LITTLE_ENDIAN) {
2671 val = bswap32(val);
2672 }
2673#else
2674 if (endian == DEVICE_BIG_ENDIAN) {
2675 val = bswap32(val);
2676 }
2677#endif
8df1cd07
FB
2678 } else {
2679 /* RAM case */
5c8a00ce 2680 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2681 & TARGET_PAGE_MASK)
149f54b5 2682 + addr1);
1e78bcc1
AG
2683 switch (endian) {
2684 case DEVICE_LITTLE_ENDIAN:
2685 val = ldl_le_p(ptr);
2686 break;
2687 case DEVICE_BIG_ENDIAN:
2688 val = ldl_be_p(ptr);
2689 break;
2690 default:
2691 val = ldl_p(ptr);
2692 break;
2693 }
8df1cd07
FB
2694 }
2695 return val;
2696}
2697
fdfba1a2 2698uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2699{
fdfba1a2 2700 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2701}
2702
fdfba1a2 2703uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2704{
fdfba1a2 2705 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2706}
2707
fdfba1a2 2708uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2709{
fdfba1a2 2710 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2711}
2712
84b7b8e7 2713/* warning: addr must be aligned */
2c17449b 2714static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2715 enum device_endian endian)
84b7b8e7 2716{
84b7b8e7
FB
2717 uint8_t *ptr;
2718 uint64_t val;
5c8a00ce 2719 MemoryRegion *mr;
149f54b5
PB
2720 hwaddr l = 8;
2721 hwaddr addr1;
84b7b8e7 2722
2c17449b 2723 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2724 false);
2725 if (l < 8 || !memory_access_is_direct(mr, false)) {
84b7b8e7 2726 /* I/O case */
5c8a00ce 2727 io_mem_read(mr, addr1, &val, 8);
968a5627
PB
2728#if defined(TARGET_WORDS_BIGENDIAN)
2729 if (endian == DEVICE_LITTLE_ENDIAN) {
2730 val = bswap64(val);
2731 }
2732#else
2733 if (endian == DEVICE_BIG_ENDIAN) {
2734 val = bswap64(val);
2735 }
84b7b8e7
FB
2736#endif
2737 } else {
2738 /* RAM case */
5c8a00ce 2739 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2740 & TARGET_PAGE_MASK)
149f54b5 2741 + addr1);
1e78bcc1
AG
2742 switch (endian) {
2743 case DEVICE_LITTLE_ENDIAN:
2744 val = ldq_le_p(ptr);
2745 break;
2746 case DEVICE_BIG_ENDIAN:
2747 val = ldq_be_p(ptr);
2748 break;
2749 default:
2750 val = ldq_p(ptr);
2751 break;
2752 }
84b7b8e7
FB
2753 }
2754 return val;
2755}
2756
2c17449b 2757uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2758{
2c17449b 2759 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2760}
2761
2c17449b 2762uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2763{
2c17449b 2764 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2765}
2766
2c17449b 2767uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2768{
2c17449b 2769 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2770}
2771
aab33094 2772/* XXX: optimize */
2c17449b 2773uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
aab33094
FB
2774{
2775 uint8_t val;
2c17449b 2776 address_space_rw(as, addr, &val, 1, 0);
aab33094
FB
2777 return val;
2778}
2779
733f0b02 2780/* warning: addr must be aligned */
41701aa4 2781static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2782 enum device_endian endian)
aab33094 2783{
733f0b02
MT
2784 uint8_t *ptr;
2785 uint64_t val;
5c8a00ce 2786 MemoryRegion *mr;
149f54b5
PB
2787 hwaddr l = 2;
2788 hwaddr addr1;
733f0b02 2789
41701aa4 2790 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2791 false);
2792 if (l < 2 || !memory_access_is_direct(mr, false)) {
733f0b02 2793 /* I/O case */
5c8a00ce 2794 io_mem_read(mr, addr1, &val, 2);
1e78bcc1
AG
2795#if defined(TARGET_WORDS_BIGENDIAN)
2796 if (endian == DEVICE_LITTLE_ENDIAN) {
2797 val = bswap16(val);
2798 }
2799#else
2800 if (endian == DEVICE_BIG_ENDIAN) {
2801 val = bswap16(val);
2802 }
2803#endif
733f0b02
MT
2804 } else {
2805 /* RAM case */
5c8a00ce 2806 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2807 & TARGET_PAGE_MASK)
149f54b5 2808 + addr1);
1e78bcc1
AG
2809 switch (endian) {
2810 case DEVICE_LITTLE_ENDIAN:
2811 val = lduw_le_p(ptr);
2812 break;
2813 case DEVICE_BIG_ENDIAN:
2814 val = lduw_be_p(ptr);
2815 break;
2816 default:
2817 val = lduw_p(ptr);
2818 break;
2819 }
733f0b02
MT
2820 }
2821 return val;
aab33094
FB
2822}
2823
41701aa4 2824uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2825{
41701aa4 2826 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2827}
2828
41701aa4 2829uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2830{
41701aa4 2831 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2832}
2833
41701aa4 2834uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2835{
41701aa4 2836 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2837}
2838
8df1cd07
FB
2839/* warning: addr must be aligned. The ram page is not masked as dirty
2840 and the code inside is not invalidated. It is useful if the dirty
2841 bits are used to track modified PTEs */
2198a121 2842void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
8df1cd07 2843{
8df1cd07 2844 uint8_t *ptr;
5c8a00ce 2845 MemoryRegion *mr;
149f54b5
PB
2846 hwaddr l = 4;
2847 hwaddr addr1;
8df1cd07 2848
2198a121 2849 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2850 true);
2851 if (l < 4 || !memory_access_is_direct(mr, true)) {
2852 io_mem_write(mr, addr1, val, 4);
8df1cd07 2853 } else {
5c8a00ce 2854 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2855 ptr = qemu_get_ram_ptr(addr1);
8df1cd07 2856 stl_p(ptr, val);
74576198
AL
2857
2858 if (unlikely(in_migration)) {
a2cd8c85 2859 if (cpu_physical_memory_is_clean(addr1)) {
74576198
AL
2860 /* invalidate code */
2861 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2862 /* set dirty bit */
6886867e 2863 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
74576198
AL
2864 }
2865 }
8df1cd07
FB
2866 }
2867}
2868
2869/* warning: addr must be aligned */
ab1da857
EI
2870static inline void stl_phys_internal(AddressSpace *as,
2871 hwaddr addr, uint32_t val,
1e78bcc1 2872 enum device_endian endian)
8df1cd07 2873{
8df1cd07 2874 uint8_t *ptr;
5c8a00ce 2875 MemoryRegion *mr;
149f54b5
PB
2876 hwaddr l = 4;
2877 hwaddr addr1;
8df1cd07 2878
ab1da857 2879 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2880 true);
2881 if (l < 4 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
2882#if defined(TARGET_WORDS_BIGENDIAN)
2883 if (endian == DEVICE_LITTLE_ENDIAN) {
2884 val = bswap32(val);
2885 }
2886#else
2887 if (endian == DEVICE_BIG_ENDIAN) {
2888 val = bswap32(val);
2889 }
2890#endif
5c8a00ce 2891 io_mem_write(mr, addr1, val, 4);
8df1cd07 2892 } else {
8df1cd07 2893 /* RAM case */
5c8a00ce 2894 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2895 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
2896 switch (endian) {
2897 case DEVICE_LITTLE_ENDIAN:
2898 stl_le_p(ptr, val);
2899 break;
2900 case DEVICE_BIG_ENDIAN:
2901 stl_be_p(ptr, val);
2902 break;
2903 default:
2904 stl_p(ptr, val);
2905 break;
2906 }
51d7a9eb 2907 invalidate_and_set_dirty(addr1, 4);
8df1cd07
FB
2908 }
2909}
2910
ab1da857 2911void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2912{
ab1da857 2913 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2914}
2915
ab1da857 2916void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2917{
ab1da857 2918 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2919}
2920
ab1da857 2921void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2922{
ab1da857 2923 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2924}
2925
aab33094 2926/* XXX: optimize */
db3be60d 2927void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
aab33094
FB
2928{
2929 uint8_t v = val;
db3be60d 2930 address_space_rw(as, addr, &v, 1, 1);
aab33094
FB
2931}
2932
733f0b02 2933/* warning: addr must be aligned */
5ce5944d
EI
2934static inline void stw_phys_internal(AddressSpace *as,
2935 hwaddr addr, uint32_t val,
1e78bcc1 2936 enum device_endian endian)
aab33094 2937{
733f0b02 2938 uint8_t *ptr;
5c8a00ce 2939 MemoryRegion *mr;
149f54b5
PB
2940 hwaddr l = 2;
2941 hwaddr addr1;
733f0b02 2942
5ce5944d 2943 mr = address_space_translate(as, addr, &addr1, &l, true);
5c8a00ce 2944 if (l < 2 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
2945#if defined(TARGET_WORDS_BIGENDIAN)
2946 if (endian == DEVICE_LITTLE_ENDIAN) {
2947 val = bswap16(val);
2948 }
2949#else
2950 if (endian == DEVICE_BIG_ENDIAN) {
2951 val = bswap16(val);
2952 }
2953#endif
5c8a00ce 2954 io_mem_write(mr, addr1, val, 2);
733f0b02 2955 } else {
733f0b02 2956 /* RAM case */
5c8a00ce 2957 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
733f0b02 2958 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
2959 switch (endian) {
2960 case DEVICE_LITTLE_ENDIAN:
2961 stw_le_p(ptr, val);
2962 break;
2963 case DEVICE_BIG_ENDIAN:
2964 stw_be_p(ptr, val);
2965 break;
2966 default:
2967 stw_p(ptr, val);
2968 break;
2969 }
51d7a9eb 2970 invalidate_and_set_dirty(addr1, 2);
733f0b02 2971 }
aab33094
FB
2972}
2973
5ce5944d 2974void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2975{
5ce5944d 2976 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2977}
2978
5ce5944d 2979void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2980{
5ce5944d 2981 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2982}
2983
5ce5944d 2984void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2985{
5ce5944d 2986 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2987}
2988
aab33094 2989/* XXX: optimize */
f606604f 2990void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
aab33094
FB
2991{
2992 val = tswap64(val);
f606604f 2993 address_space_rw(as, addr, (void *) &val, 8, 1);
aab33094
FB
2994}
2995
f606604f 2996void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1
AG
2997{
2998 val = cpu_to_le64(val);
f606604f 2999 address_space_rw(as, addr, (void *) &val, 8, 1);
1e78bcc1
AG
3000}
3001
f606604f 3002void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1
AG
3003{
3004 val = cpu_to_be64(val);
f606604f 3005 address_space_rw(as, addr, (void *) &val, 8, 1);
1e78bcc1
AG
3006}
3007
5e2972fd 3008/* virtual memory access for debug (includes writing to ROM) */
f17ec444 3009int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
b448f2f3 3010 uint8_t *buf, int len, int is_write)
13eb76e0
FB
3011{
3012 int l;
a8170e5e 3013 hwaddr phys_addr;
9b3c35e0 3014 target_ulong page;
13eb76e0
FB
3015
3016 while (len > 0) {
3017 page = addr & TARGET_PAGE_MASK;
f17ec444 3018 phys_addr = cpu_get_phys_page_debug(cpu, page);
13eb76e0
FB
3019 /* if no physical page mapped, return an error */
3020 if (phys_addr == -1)
3021 return -1;
3022 l = (page + TARGET_PAGE_SIZE) - addr;
3023 if (l > len)
3024 l = len;
5e2972fd 3025 phys_addr += (addr & ~TARGET_PAGE_MASK);
2e38847b
EI
3026 if (is_write) {
3027 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3028 } else {
3029 address_space_rw(cpu->as, phys_addr, buf, l, 0);
3030 }
13eb76e0
FB
3031 len -= l;
3032 buf += l;
3033 addr += l;
3034 }
3035 return 0;
3036}
a68fe89c 3037#endif
13eb76e0 3038
8e4a424b
BS
3039/*
3040 * A helper function for the _utterly broken_ virtio device model to find out if
3041 * it's running on a big endian machine. Don't do this at home kids!
3042 */
98ed8ecf
GK
3043bool target_words_bigendian(void);
3044bool target_words_bigendian(void)
8e4a424b
BS
3045{
3046#if defined(TARGET_WORDS_BIGENDIAN)
3047 return true;
3048#else
3049 return false;
3050#endif
3051}
3052
76f35538 3053#ifndef CONFIG_USER_ONLY
a8170e5e 3054bool cpu_physical_memory_is_io(hwaddr phys_addr)
76f35538 3055{
5c8a00ce 3056 MemoryRegion*mr;
149f54b5 3057 hwaddr l = 1;
76f35538 3058
5c8a00ce
PB
3059 mr = address_space_translate(&address_space_memory,
3060 phys_addr, &phys_addr, &l, false);
76f35538 3061
5c8a00ce
PB
3062 return !(memory_region_is_ram(mr) ||
3063 memory_region_is_romd(mr));
76f35538 3064}
bd2fa51f
MH
3065
3066void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3067{
3068 RAMBlock *block;
3069
0dc3f44a
MD
3070 rcu_read_lock();
3071 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 3072 func(block->host, block->offset, block->used_length, opaque);
bd2fa51f 3073 }
0dc3f44a 3074 rcu_read_unlock();
bd2fa51f 3075}
ec3f8c99 3076#endif